LazUtils: Use TUnicodeCharacterEnumerator for the "for .. in" iterator syntax. Move demo to examples dir and add a GUI demo.

git-svn-id: trunk@56260 -
This commit is contained in:
juha 2017-11-03 16:13:46 +00:00
parent 7772e8646b
commit 18570daf7b
8 changed files with 300 additions and 32 deletions

8
.gitattributes vendored
View File

@ -3126,6 +3126,12 @@ components/lazutils/dynamicarray.pas svneol=native#text/pascal
components/lazutils/dynhasharray.pp svneol=native#text/pascal
components/lazutils/dynqueue.pas svneol=native#text/pascal
components/lazutils/easylazfreetype.pas svneol=native#text/pascal
components/lazutils/examples/LazUnicode/LazUnicodeConsole.lpi svneol=native#text/plain
components/lazutils/examples/LazUnicode/LazUnicodeConsole.lpr svneol=native#text/pascal
components/lazutils/examples/LazUnicode/LazUnicodeGUI.lpi svneol=native#text/plain
components/lazutils/examples/LazUnicode/LazUnicodeGUI.lpr svneol=native#text/pascal
components/lazutils/examples/LazUnicode/lazunicodeunit.dfm svneol=native#text/plain
components/lazutils/examples/LazUnicode/lazunicodeunit.pas svneol=native#text/pascal
components/lazutils/examples/LookupStringList/ReadMe.txt svneol=native#text/plain
components/lazutils/examples/LookupStringList/TDedupeDemo.lpi svneol=native#text/plain
components/lazutils/examples/LookupStringList/TDedupeDemo.lpr svneol=native#text/pascal
@ -3208,8 +3214,6 @@ components/lazutils/maps.pp svneol=native#text/pascal
components/lazutils/masks.pas svneol=native#text/pascal
components/lazutils/paswstring.pas svneol=native#text/pascal
components/lazutils/stringhashlist.pas svneol=native#text/pascal
components/lazutils/test/LazUnicodeTest.lpi svneol=native#text/plain
components/lazutils/test/LazUnicodeTest.lpr svneol=native#text/pascal
components/lazutils/test/TestLazStorageMem.lpi svneol=native#text/plain
components/lazutils/test/TestLazStorageMem.lpr svneol=native#text/plain
components/lazutils/test/testlazstoragememcase1.pas svneol=native#text/plain

View File

@ -9,7 +9,7 @@
</Flags>
<SessionStorage Value="InProjectDir"/>
<MainUnit Value="0"/>
<Title Value="LazUnicodeTest"/>
<Title Value="LazUnicodeConsole"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
</General>
@ -19,7 +19,7 @@
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="LazUnicodeTest"/>
<Filename Value="LazUnicodeConsole"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
@ -51,7 +51,7 @@
</RequiredPackages>
<Units Count="1">
<Unit0>
<Filename Value="LazUnicodeTest.lpr"/>
<Filename Value="LazUnicodeConsole.lpr"/>
<IsPartOfProject Value="True"/>
</Unit0>
</Units>
@ -59,7 +59,7 @@
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="LazUnicodeTest"/>
<Filename Value="LazUnicodeConsole"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>

View File

@ -4,7 +4,7 @@
Works also in Delphi where String=UnicodeString. Just rename this file as *.dpr.
For Delphi you must copy units LazUnicode and LazUTF16, both part of LazUtils package.
}
program LazUnicodeTest;
program LazUnicodeConsole;
{$IFDEF FPC}
{$mode objfpc}{$H+}
@ -68,28 +68,12 @@ begin
// Use CodePoint enumerator explicitly
writeln('');
writeln('*** Using CodePoint iterator explicitly: ***');
writeln('*** Using CodePoint enumerator explicitly: ***');
cpIter := TCodePointEnumerator.Create(s);
while cpIter.MoveNext do
writeln('ch=', cpIter.Current, ' has ', cpIter.CurrentCodeUnitCount, ' codeunits.');
cpIter.Free;
{$IFDEF FPC}
// Use for-in loop for CodePoints.
writeln('');
writeln('*** Using for-in loop for CodePoints : ***');
for ch in s do
writeln('ch=',ch);
{$ENDIF}
// Use for-in loop for codeunits using a Char variable still works.
{ Uncomment to test.
writeln('');
writeln('*** Using for-in loop for codeunits: ***');
for CodeUnit in s do
writeln('CodeUnit=',CodeUnit); // The output makes no sense obviously.
}
s_UTF8 := Combining;
s := s_UTF8; // Converts encoding when needed.
writeln('');
@ -97,11 +81,27 @@ begin
// Use UnicodeCharacter enumerator explicitly
writeln('');
writeln('*** Using UnicodeCharacter iterator explicitly: ***');
writeln('*** Using UnicodeCharacter enumerator explicitly: ***');
ucIter := TUnicodeCharacterEnumerator.Create(s);
while ucIter.MoveNext do
writeln('ch=', ucIter.Current, ' has ', ucIter.CurrentCodePointCount, ' codepoints and ', ucIter.CurrentCodeUnitCount, ' codeunits.');
ucIter.Free;
{$IFDEF FPC}
// Use for-in loop for Unicode Characters.
writeln('');
writeln('*** Using for-in loop for Unicode Characters : ***');
for ch in s do
writeln('ch=',ch);
{$ENDIF}
// for-in loop for codeunits using a Char variable still works.
{ Uncomment to test.
writeln('');
writeln('*** Using for-in loop for codeunits: ***');
for CodeUnit in s do
writeln('CodeUnit=',CodeUnit); // The output makes no sense obviously.
}
end.

View File

@ -0,0 +1,82 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="10"/>
<General>
<SessionStorage Value="InProjectDir"/>
<MainUnit Value="0"/>
<Title Value="LazUnicodeGUI"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
<Icon Value="0"/>
</General>
<BuildModes Count="1">
<Item1 Name="Default" Default="True"/>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
</PublishOptions>
<RunParams>
<local>
<FormatVersion Value="1"/>
</local>
</RunParams>
<RequiredPackages Count="1">
<Item1>
<PackageName Value="LCL"/>
</Item1>
</RequiredPackages>
<Units Count="2">
<Unit0>
<Filename Value="LazUnicodeGUI.lpr"/>
<IsPartOfProject Value="True"/>
</Unit0>
<Unit1>
<Filename Value="lazunicodeunit.pas"/>
<IsPartOfProject Value="True"/>
<ComponentName Value="Form1"/>
<HasResources Value="True"/>
<ResourceBaseClass Value="Form"/>
<UnitName Value="LazUnicodeUnit"/>
</Unit1>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="LazUnicodeGUI"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
<UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
<Parsing>
<SyntaxOptions>
<SyntaxMode Value="Delphi"/>
</SyntaxOptions>
</Parsing>
<Linking>
<Options>
<Win32>
<GraphicApplication Value="True"/>
</Win32>
</Options>
</Linking>
<Other>
<CustomOptions Value="-dBorland -dVer150 -dDelphi7 -dCompiler6_Up -dPUREPASCAL"/>
</Other>
</CompilerOptions>
<Debugging>
<Exceptions Count="3">
<Item1>
<Name Value="EAbort"/>
</Item1>
<Item2>
<Name Value="ECodetoolError"/>
</Item2>
<Item3>
<Name Value="EFOpenError"/>
</Item3>
</Exceptions>
</Debugging>
</CONFIG>

View File

@ -0,0 +1,21 @@
program LazUnicodeGUI;
{$mode objfpc}{$H+}
uses
{$IFDEF UNIX}{$IFDEF UseCThreads}
cthreads,
{$ENDIF}{$ENDIF}
Interfaces, // this includes the LCL widgetset
Forms, LazUnicodeUnit
{ you can add units after this };
{$R *.res}
begin
RequireDerivedFormResource:=True;
Application.Initialize;
Application.CreateForm(TForm1, Form1);
Application.Run;
end.

View File

@ -0,0 +1,30 @@
object Form1: TForm1
Left = 153
Height = 528
Top = 77
Width = 608
Caption = 'LazUnicode Demo'
ClientHeight = 528
ClientWidth = 608
LCLVersion = '1.9.0.0'
object Button1: TButton
Left = 0
Height = 29
Top = 0
Width = 608
Align = alTop
AutoSize = True
Caption = 'Test LazUnicode functions and enumerators'
OnClick = Button1Click
TabOrder = 0
end
object Memo1: TMemo
Left = 0
Height = 499
Top = 29
Width = 608
Align = alClient
ScrollBars = ssAutoBoth
TabOrder = 1
end
end

View File

@ -0,0 +1,130 @@
unit LazUnicodeUnit;
{$IFDEF FPC}
{$mode objfpc}{$H+}
{$ENDIF}
// For testing the UTF16 version.
{$IF DEFINED(FPC) and DEFINED(UseUTF16)}
{$ModeSwitch UnicodeStrings} // Sets also FPC_UNICODESTRINGS.
{$ENDIF}
{$IF DEFINED(FPC_UNICODESTRINGS) or not DEFINED(FPC)}
{$DEFINE ReallyUseUTF16} // FPC with UTF-16 or Delphi
{$ENDIF}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls,
LazUnicode;
type
{ TForm1 }
TForm1 = class(TForm)
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
public
end;
var
Form1: TForm1;
const
Eyes = '👀';
Thai = 'ฃ'; // No idea what it means.
WineGlass = '🍷';
Heart = '💓'; // or '♡';
// Accents in combining codepoints. Last one has 2 consecutive combining marks.
Combining = 'ÓÓỐỐỚỚÒÒỒỒỎỎỔỔỞỞỌỌBあC'#$CC#$81#$CC#$B2;
//ArEnStr1 = 'مAرBحCبDاE';
implementation
{$R *.dfm}
{ TForm1 }
procedure TForm1.Button1Click(Sender: TObject);
var
s_UTF8: {$IFDEF FPC}AnsiString{$ELSE}String{$ENDIF};
s, ch: String;
//CodeUnit: Char;
i: NativeInt;
cpIter: TCodePointEnumerator;
ucIter: TUnicodeCharacterEnumerator;
begin
Memo1.Lines.Clear;
s_UTF8 := 'Pöö ' + Eyes + Thai + '. Have ' + WineGlass + ' for FPC' + Heart + 'Lazarus';
s := s_UTF8; // Converts encoding when needed.
{$IFDEF ReallyUseUTF16}
Memo1.Lines.Add('Using UnicodeString + UTF-16');
{$ELSE}
Memo1.Lines.Add('Using AnsiString + UTF-8');
{$ENDIF}
Memo1.Lines.Add(Format('Sizeof(Char) = %d.', [Sizeof(Char)]));
Memo1.Lines.Add('Testing with string: ' + s);
Memo1.Lines.Add('');
ch := CodePointCopy(s, 14, 1); // Should return the wine glass.
Memo1.Lines.Add('Testing CodePointCopy. SubString = "' + ch + '"');
i := CodePointLength(s); // Should return 30.
Memo1.Lines.Add(Format('Testing CodePointLength. Result = %d.', [i]));
// Constant must be assigned to AnsiString when using the UTF-8 system.
s_UTF8 := WineGlass;
i := CodePointPos(s_UTF8, s); // Should return 14.
Memo1.Lines.Add(Format('Testing CodePointPos. Result = %d.', [i]));
s_UTF8 := '☐';
i := CodePointPos(s_UTF8, s); // Should return 0.
Memo1.Lines.Add(Format('Testing CodePointPos for non-existent char. Result = %d.', [i]));
// Use CodePoint enumerator explicitly
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using CodePoint enumerator explicitly: ***');
cpIter := TCodePointEnumerator.Create(s);
while cpIter.MoveNext do
Memo1.Lines.Add(Format('ch=%s has %d codeunits.',
[cpIter.Current, cpIter.CurrentCodeUnitCount]));
cpIter.Free;
s_UTF8 := Combining;
s := s_UTF8; // Converts encoding when needed.
Memo1.Lines.Add('');
Memo1.Lines.Add('Testing with string: ' + s);
// Use UnicodeCharacter enumerator explicitly
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using UnicodeCharacter enumerator explicitly: ***');
ucIter := TUnicodeCharacterEnumerator.Create(s);
while ucIter.MoveNext do
Memo1.Lines.Add(Format('ch=%s has %d codepoints and %d codeunits.',
[ucIter.Current, ucIter.CurrentCodePointCount, ucIter.CurrentCodeUnitCount]));
ucIter.Free;
{$IFDEF FPC}
// Use for-in loop for Unicode Characters.
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using for-in loop for Unicode Characters : ***');
for ch in s do
Memo1.Lines.Add('ch=' + ch);
{$ENDIF}
// for-in loop for codeunits using a Char variable still works.
{ Uncomment to test.
Memo1.Lines.Add('');
Memo1.Lines.Add('*** Using for-in loop for codeunits: ***');
for CodeUnit in s do
Memo1.Lines.Add('CodeUnit=',CodeUnit); // The output makes no sense obviously.
}
end;
end.

View File

@ -93,12 +93,13 @@ type
end;
{$IFDEF FPC}
// Enumerator for CodePoints is used for for-in loop now.
operator Enumerator(A: String): TCodePointEnumerator;
// Enumerator for CodePoints could be used for the for-in loop.
//operator Enumerator(A: String): TCodePointEnumerator;
// This enumerator combines diacritical marks. Not enabled by default because
// there are many more rules for combining codepoints.
//operator Enumerator(A: String): TUnicodeCharacterEnumerator;
// This enumerator combines diacritical marks.
// It is used by default although there are more rules for combining codepoints.
// Diacritical marks cover rules for most western languages.
operator Enumerator(A: String): TUnicodeCharacterEnumerator;
{$ENDIF}
implementation
@ -311,9 +312,9 @@ end;
// Enumerator
//---
{$IFDEF FPC}
operator Enumerator(A: String): TCodePointEnumerator;
operator Enumerator(A: String): TUnicodeCharacterEnumerator;
begin
Result := TCodePointEnumerator.Create(A);
Result := TUnicodeCharacterEnumerator.Create(A);
end;
{$ENDIF}