mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-05-08 19:32:45 +02:00
108 lines
3.4 KiB
ObjectPascal
108 lines
3.4 KiB
ObjectPascal
{
|
||
This is a test program for LazUnicode unit.
|
||
Works with UTF-8 and UTF-16 encodings by switching UseUTF16 define.
|
||
Works also in Delphi where String=UnicodeString. Just rename this file as *.dpr.
|
||
For Delphi you must copy units LazUnicode and LazUTF16, both part of LazUtils package.
|
||
}
|
||
program LazUnicodeConsole;
|
||
|
||
{$IFDEF FPC}
|
||
{$mode objfpc}{$H+}
|
||
{$ENDIF}
|
||
|
||
{$APPTYPE CONSOLE} // Mostly for Delphi
|
||
|
||
// For testing the UTF16 version.
|
||
{$IF DEFINED(FPC) and DEFINED(UseUTF16)}
|
||
{$ModeSwitch UnicodeStrings} // Sets also FPC_UNICODESTRINGS.
|
||
{$ENDIF}
|
||
|
||
{$IF DEFINED(FPC_UNICODESTRINGS) or not DEFINED(FPC)}
|
||
{$DEFINE ReallyUseUTF16} // FPC with UTF-16 or Delphi
|
||
{$ENDIF}
|
||
|
||
uses
|
||
LazUnicode;
|
||
|
||
const
|
||
Eyes = '👀';
|
||
Thai = 'ฃ'; // No idea what it means.
|
||
WineGlass = '🍷';
|
||
Heart = '💓'; // or '♡';
|
||
// Accents in combining codepoints. Last one has 2 consecutive combining marks.
|
||
Combining = 'ÓÓỐỐỚỚÒÒỒỒỎỎỔỔỞỞỌỌBあC'#$CC#$81#$CC#$B2;
|
||
//ArEnStr1 = 'مAرBحCبDاE';
|
||
|
||
var
|
||
s_UTF8: {$IFDEF FPC}AnsiString{$ELSE}String{$ENDIF};
|
||
s, ch: String;
|
||
//CodeUnit: Char;
|
||
i: NativeInt;
|
||
cpIter: TCodePointEnumerator;
|
||
ucIter: TUnicodeCharacterEnumerator;
|
||
begin
|
||
s_UTF8 := 'Pöö ' + Eyes + Thai + '. Have ' + WineGlass + ' for FPC' + Heart + 'Lazarus';
|
||
s := s_UTF8; // Converts encoding when needed.
|
||
{$IFDEF ReallyUseUTF16}
|
||
writeln('Using UnicodeString + UTF-16');
|
||
{$ELSE}
|
||
writeln('Using AnsiString + UTF-8');
|
||
{$ENDIF}
|
||
writeln('Sizeof(Char) = ', Sizeof(Char));
|
||
writeln('Testing with string: ', s);
|
||
|
||
writeln('');
|
||
ch := CodePointCopy(s, 14, 1); // Should return the wine glass.
|
||
writeln('Testing CodePointCopy. SubString = "', ch + '"');
|
||
|
||
i := CodePointLength(s); // Should return 30.
|
||
writeln('Testing CodePointLength. Result = ', i);
|
||
|
||
// Constant must be assigned to AnsiString when using the UTF-8 system.
|
||
s_UTF8 := WineGlass;
|
||
i := CodePointPos(s_UTF8, s); // Should return 14.
|
||
writeln('Testing CodePointPos. Result = ', i);
|
||
s_UTF8 := '☐';
|
||
i := CodePointPos(s_UTF8, s); // Should return 0.
|
||
writeln('Testing CodePointPos for non-existent char. Result = ', i);
|
||
|
||
// Use CodePoint enumerator explicitly
|
||
writeln('');
|
||
writeln('*** Using CodePoint enumerator explicitly: ***');
|
||
cpIter := TCodePointEnumerator.Create(s);
|
||
while cpIter.MoveNext do
|
||
writeln('ch=', cpIter.Current, ' has ', cpIter.CurrentCodeUnitCount, ' codeunits.');
|
||
cpIter.Free;
|
||
|
||
s_UTF8 := Combining;
|
||
s := s_UTF8; // Converts encoding when needed.
|
||
writeln('');
|
||
writeln('Testing with string: ', s);
|
||
|
||
// Use UnicodeCharacter enumerator explicitly
|
||
writeln('');
|
||
writeln('*** Using UnicodeCharacter enumerator explicitly: ***');
|
||
ucIter := TUnicodeCharacterEnumerator.Create(s);
|
||
while ucIter.MoveNext do
|
||
writeln('ch=', ucIter.Current, ' has ', ucIter.CurrentCodePointCount, ' codepoints and ', ucIter.CurrentCodeUnitCount, ' codeunits.');
|
||
ucIter.Free;
|
||
|
||
{$IFDEF FPC}
|
||
// Use for-in loop for Unicode Characters.
|
||
writeln('');
|
||
writeln('*** Using for-in loop for Unicode Characters : ***');
|
||
for ch in s do
|
||
writeln('ch=',ch);
|
||
{$ENDIF}
|
||
|
||
// for-in loop for codeunits using a Char variable still works.
|
||
{ Uncomment to test.
|
||
writeln('');
|
||
writeln('*** Using for-in loop for codeunits: ***');
|
||
for CodeUnit in s do
|
||
writeln('CodeUnit=',CodeUnit); // The output makes no sense obviously.
|
||
}
|
||
|
||
end.
|
||
|