* synchronized with trunk

git-svn-id: branches/unicodekvm@48756 -
This commit is contained in:
nickysn 2021-02-21 17:48:14 +00:00
commit cd8b0036d4
11 changed files with 843 additions and 8 deletions

5
.gitattributes vendored
View File

@ -9097,6 +9097,8 @@ packages/rtl-unicode/src/inc/cp936.pas svneol=native#text/pascal
packages/rtl-unicode/src/inc/cp949.pas svneol=native#text/pascal
packages/rtl-unicode/src/inc/cp950.pas svneol=native#text/pascal
packages/rtl-unicode/src/inc/cpbuildu.pp svneol=native#text/plain
packages/rtl-unicode/src/inc/eastasianwidth.pp svneol=native#text/plain
packages/rtl-unicode/src/inc/eastasianwidth_code.inc svneol=native#text/plain
packages/rtl-unicode/src/inc/freebidi.pp svneol=native#text/plain
packages/rtl-unicode/src/inc/graphemebreakproperty.pp svneol=native#text/plain
packages/rtl-unicode/src/inc/graphemebreakproperty_code.inc svneol=native#text/plain
@ -16212,6 +16214,7 @@ tests/test/units/strings/tstrings1.pp svneol=native#text/plain
tests/test/units/strutils/taddchar.pp svneol=native#text/plain
tests/test/units/strutils/taddcharr.pp svneol=native#text/plain
tests/test/units/strutils/tbintohex.pp svneol=native#text/plain
tests/test/units/strutils/tboyer.pp svneol=native#text/pascal
tests/test/units/strutils/tdec2numb.pp svneol=native#text/plain
tests/test/units/strutils/thex2dec.pp svneol=native#text/plain
tests/test/units/strutils/thextobin.pp svneol=native#text/plain
@ -19836,6 +19839,8 @@ utils/unicode/cldrtest.pas svneol=native#text/pascal
utils/unicode/cldrtxt.pas svneol=native#text/plain
utils/unicode/cldrxml.pas svneol=native#text/pascal
utils/unicode/data/readme.txt svneol=native#text/plain
utils/unicode/eawparser.lpi svneol=native#text/plain
utils/unicode/eawparser.lpr svneol=native#text/pascal
utils/unicode/fpmake.pp svneol=native#text/plain
utils/unicode/gbpparser.lpi svneol=native#text/plain
utils/unicode/gbpparser.lpr svneol=native#text/pascal

View File

@ -429,8 +429,7 @@ begin
AddMatch(i+1);
//Only first match ?
if not aMatchAll then break;
inc(i,OldPatternSize);
inc(i,OldPatternSize);
inc(i,DeltaJumpTable2[0]);
end else begin
i:=i + Max(DeltaJumpTable1[ord(s[i])],DeltaJumpTable2[j]);
end;
@ -582,8 +581,7 @@ begin
AddMatch(i+1);
//Only first match ?
if not aMatchAll then break;
inc(i,OldPatternSize);
inc(i,OldPatternSize);
inc(i,DeltaJumpTable2[0]);
end else begin
i:=i + Max(DeltaJumpTable1[Ord(lCaseArray[Ord(s[i])])],DeltaJumpTable2[j]);
end;

View File

@ -16,12 +16,13 @@ Const
CPUnits = [aix,amiga,aros,android,beos,darwin,iphonesim,ios,emx,gba,nds,freebsd,go32v2,haiku,linux,morphos,netbsd,netware,netwlibc,openbsd,os2,solaris,watcom,wii,win32,win64,wince,dragonfly,freertos];
utf8bidiOSes = [netware,netwlibc];
freebidiOSes = [netware,netwlibc];
GraphemeBreakPropertyOSes = AllOSes;
GraphemeBreakPropertyOSes = AllOSes-[embedded,zxspectrum,msxdos,amstradcpc];
EastAsianWidthOSes = AllOSes-[embedded,zxspectrum,msxdos,amstradcpc];
// Character not movable because fpwidestring depends on it.
// CharacterOSes = [android,darwin,freebsd,linux,netbsd,openbsd,solaris,win32,win64,dragonfly];
UnicodeAllOSes = CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits + GraphemeBreakPropertyOSes;
UnicodeAllOSes = CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits + GraphemeBreakPropertyOSes + EastAsianWidthOSes;
// Amiga has a crt in its RTL dir, but it is commented in the makefile
@ -145,6 +146,12 @@ begin
begin
AddInclude('graphemebreakproperty_code.inc');
end;
T:=P.Targets.AddUnit('eastasianwidth.pp',EastAsianWidthOSes);
with T.Dependencies do
begin
AddInclude('eastasianwidth_code.inc');
end;
end
end;

View File

@ -0,0 +1,55 @@
{ EastAsianWidth Unicode data unit.
Copyright (C) 2021 Nikolay Nikolov <nickysn@users.sourceforge.net>
This library is free software; you can redistribute it and/or modify it
under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version with the following modification:
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent modules,and
to copy and distribute the resulting executable under terms of your choice,
provided that you also meet, for each linked independent module, the terms
and conditions of the license of that module. An independent module is a
module which is not derived from or based on this library. If you modify
this library, you may extend this exception to your version of the library,
but you are not obligated to do so. If you do not wish to do so, delete this
exception statement from your version.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
for more details.
You should have received a copy of the GNU Library General Public License
along with this library; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1335, USA.
}
unit eastasianwidth;
{$MODE objfpc}
interface
type
TEastAsianWidth = (
eawN,
eawA,
eawF,
eawH,
eawNa,
eawW);
function GetEastAsianWidth(Ch: UCS4Char): TEastAsianWidth;
implementation
function GetEastAsianWidth(Ch: UCS4Char): TEastAsianWidth;
begin
{$I eastasianwidth_code.inc}
end;
end.

View File

@ -0,0 +1,300 @@
{ do not edit, this file is autogenerated by the eawparser tool }
if(Ch=12288)or
((Ch>=65281)and(Ch<=65376))or
((Ch>=65504)and(Ch<=65510))then result:=eawF else
if(Ch=8361)or
((Ch>=65377)and(Ch<=65470))or
((Ch>=65474)and(Ch<=65479))or
((Ch>=65482)and(Ch<=65487))or
((Ch>=65490)and(Ch<=65495))or
((Ch>=65498)and(Ch<=65500))or
((Ch>=65512)and(Ch<=65518))then result:=eawH else
if((Ch>=32)and(Ch<=126))or
((Ch>=162)and(Ch<=163))or
((Ch>=165)and(Ch<=166))or
(Ch=172)or
(Ch=175)or
((Ch>=10214)and(Ch<=10221))or
((Ch>=10629)and(Ch<=10630))then result:=eawNa else
case Ch of
161..168,
170,
173..180,
182..186,
188..191,
198,
208,
215..216,
222..225,
230,
232..234,
236..237,
240,
242..243,
247..250,
252,
254,
257,
273,
275,
283,
294..295,
299,
305..307,
312,
319..322,
324,
328..331,
333,
338..339,
358..359,
363,
462,
464,
466,
468,
470,
472,
474,
476,
593,
609,
708,
711,
713..715,
717,
720,
728..731,
733,
735,
768..879,
913..929,
931..937,
945..961,
963..969,
1025,
1040..1103,
1105,
8208,
8211..8214,
8216..8217,
8220..8221,
8224..8226,
8228..8231,
8240,
8242..8243,
8245,
8251,
8254,
8308,
8319,
8321..8324,
8364,
8451,
8453,
8457,
8467,
8470,
8481..8482,
8486,
8491,
8531..8532,
8539..8542,
8544..8555,
8560..8569,
8585,
8592..8601,
8632..8633,
8658,
8660,
8679,
8704,
8706..8707,
8711..8712,
8715,
8719,
8721,
8725,
8730,
8733..8736,
8739,
8741,
8743..8748,
8750,
8756..8759,
8764..8765,
8776,
8780,
8786,
8800..8801,
8804..8807,
8810..8811,
8814..8815,
8834..8835,
8838..8839,
8853,
8857,
8869,
8895,
8978,
9312..9449,
9451..9547,
9552..9587,
9600..9615,
9618..9621,
9632..9633,
9635..9641,
9650..9651,
9654..9655,
9660..9661,
9664..9665,
9670..9672,
9675,
9678..9681,
9698..9701,
9711,
9733..9734,
9737,
9742..9743,
9756,
9758,
9792,
9794,
9824..9825,
9827..9829,
9831..9834,
9836..9837,
9839,
9886..9887,
9919,
9926..9933,
9935..9939,
9941..9953,
9955,
9960..9961,
9963..9969,
9972,
9974..9977,
9979..9980,
9982..9983,
10045,
10102..10111,
11094..11097,
12872..12879,
57344..63743,
65024..65039,
65533,
127232..127242,
127248..127277,
127280..127337,
127344..127373,
127375..127376,
127387..127404,
917760..917999,
983040..1048573,
1048576..1114109:result:=eawA;
4352..4447,
8986..8987,
9001..9002,
9193..9196,
9200,
9203,
9725..9726,
9748..9749,
9800..9811,
9855,
9875,
9889,
9898..9899,
9917..9918,
9924..9925,
9934,
9940,
9962,
9970..9971,
9973,
9978,
9981,
9989,
9994..9995,
10024,
10060,
10062,
10067..10069,
10071,
10133..10135,
10160,
10175,
11035..11036,
11088,
11093,
11904..11929,
11931..12019,
12032..12245,
12272..12283,
12289..12350,
12353..12438,
12441..12543,
12549..12589,
12593..12686,
12688..12730,
12736..12771,
12784..12830,
12832..12871,
12880..13054,
13056..19903,
19968..42124,
42128..42182,
43360..43388,
44032..55203,
63744..64255,
65040..65049,
65072..65106,
65108..65126,
65128..65131,
94176,
94208..100332,
100352..101106,
110592..110593,
126980,
127183,
127374,
127377..127386,
127488..127490,
127504..127547,
127552..127560,
127568..127569,
127744..127776,
127789..127797,
127799..127868,
127870..127891,
127904..127946,
127951..127955,
127968..127984,
127988,
127992..128062,
128064,
128066..128252,
128255..128317,
128331..128334,
128336..128359,
128378,
128405..128406,
128420,
128507..128591,
128640..128709,
128716,
128720..128722,
128747..128748,
128756..128758,
129296..129310,
129312..129319,
129328,
129331..129342,
129344..129355,
129360..129374,
129408..129425,
129472,
131072..196605,
196608..262141:result:=eawW;
else result:=eawN end

View File

@ -374,7 +374,7 @@ CPU_SPECIFIC_COMMON_UNITS=
ifeq ($(ARCH),arm)
CPU_SPECIFIC_COMMON_UNITS=sysutils math classes fgl macpas typinfo types rtlconsts getopts lineinfo
ifeq ($(SUBARCH),armv7m)
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 lm4f120 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
CPU_UNITS_DEFINED=1
endif
ifeq ($(SUBARCH),armv7em)

View File

@ -71,7 +71,7 @@ CPU_SPECIFIC_COMMON_UNITS=
ifeq ($(ARCH),arm)
CPU_SPECIFIC_COMMON_UNITS=sysutils math classes fgl macpas typinfo types rtlconsts getopts lineinfo
ifeq ($(SUBARCH),armv7m)
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 lm4f120 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
CPU_UNITS_DEFINED=1
endif
ifeq ($(SUBARCH),armv7em)

View File

@ -0,0 +1,79 @@
{$mode objfpc}
uses
StrUtils;
const
result1 : array of SizeInt = (1, 4, 7, 10, 13, 16);
var
a : array of SizeInt;
i : LongInt;
begin
if FindMatchesBoyerMooreCaseSensitive('abcabcabcabcabcabcab','abcab',a,false) then
begin
if Length(a)<>1 then
halt(2);
if a[0]<>result1[0] then
halt(3);
end
else
halt(1);
if FindMatchesBoyerMooreCaseSensitive('abcabcabcabcabcabcab','abcab',a,true) then
begin
if Length(a)<>Length(result1) then
halt(12);
for i:=Low(a) to High(a) do
if a[i]<>result1[i] then
halt(13);
end
else
halt(11);
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcabcabcabcab','abcab',a,false) then
begin
if Length(a)<>1 then
halt(22);
if a[0]<>result1[0] then
halt(23);
end
else
halt(21);
{
apparently not working yet:
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcabcabcabcab','abcab',a,true) then
begin
if Length(a)<>Length(result1) then
halt(32);
for i:=Low(a) to High(a) do
if a[i]<>result1[i] then
halt(33);
end
else
halt(31);
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcAbcabcAbcab','abcaB',a,false) then
begin
if Length(a)<>1 then
halt(42);
if a[0]<>result1[0] then
halt(43);
end
else
halt(41);
if FindMatchesBoyerMooreCaseInSensitive('abcabCabcAbcabcABcab','abcaB',a,true) then
begin
if Length(a)<>Length(result1) then
halt(52);
for i:=Low(a) to High(a) do
if a[i]<>result1[i] then
halt(53);
end
else
halt(51);
}
writeln('ok');
end.

View File

@ -0,0 +1,58 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="11"/>
<General>
<Flags>
<MainUnitHasCreateFormStatements Value="False"/>
<MainUnitHasTitleStatement Value="False"/>
<MainUnitHasScaledStatement Value="False"/>
</Flags>
<SessionStorage Value="InProjectDir"/>
<MainUnit Value="0"/>
<Title Value="eawparser"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
</General>
<BuildModes Count="1">
<Item1 Name="Default" Default="True"/>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
<UseFileFilters Value="True"/>
</PublishOptions>
<RunParams>
<FormatVersion Value="2"/>
<Modes Count="0"/>
</RunParams>
<Units Count="1">
<Unit0>
<Filename Value="eawparser.lpr"/>
<IsPartOfProject Value="True"/>
</Unit0>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<Target>
<Filename Value="eawparser"/>
</Target>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
<UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
</CompilerOptions>
<Debugging>
<Exceptions Count="3">
<Item1>
<Name Value="EAbort"/>
</Item1>
<Item2>
<Name Value="ECodetoolError"/>
</Item2>
<Item3>
<Name Value="EFOpenError"/>
</Item3>
</Exceptions>
</Debugging>
</CONFIG>

332
utils/unicode/eawparser.lpr Normal file
View File

@ -0,0 +1,332 @@
{ Parser and code generator for the EastAsianWidth.
Copyright (C) 2021 Nikolay Nikolov <nickysn@users.sourceforge.net>
This source is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version.
This code is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
details.
A copy of the GNU General Public License is available on the World Wide Web
at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
Boston, MA 02110-1335, USA.
}
program eawparser;
{$mode objfpc}{$H+}
uses
SysUtils, StrUtils;
type
TEastAsianWidth = (
eawN,
eawA,
eawF,
eawH,
eawNa,
eawW);
TRange = record
RangeLo, RangeHi: UCS4Char;
end;
TRanges = array of TRange;
var
EastAsianWidths: array [UCS4Char] of TEastAsianWidth;
EAWStats: array [TEastAsianWidth] of record
Exists: Boolean;
Handled: Boolean;
MinValue: UCS4Char;
MaxValue: UCS4Char;
Count: LongInt;
Ranges: TRanges;
end;
function ParseEastAsianWidth(S: string): TEastAsianWidth;
begin
S := Trim(S);
case S of
'N':
Result := eawN;
'A':
Result := eawA;
'F':
Result := eawF;
'H':
Result := eawH;
'Na':
Result := eawNa;
'W':
Result := eawW;
else
raise EArgumentException('Unknown east asian width: ''' + S + '''');
end;
end;
procedure ParseRange(S: string; out RangeLo, RangeHi: UCS4Char);
var
dp: SizeInt;
begin
S := Trim(S);
dp := Pos('..', S);
if dp > 0 then
begin
RangeLo := StrToInt('$' + LeftStr(S, dp - 1));
RangeHi := StrToInt('$' + Copy(S, dp + 2, Length(S) - dp + 3));
end
else
begin
RangeLo := StrToInt('$' + S);
RangeHi := RangeLo;
end;
end;
procedure ParseEastAsianWidths(const FileName: string);
var
InF: TextFile;
S: string;
SplitS: TStringArray;
LineNr: Integer = 0;
eaw: TEastAsianWidth;
RangeLo, RangeHi, R: UCS4Char;
begin
{ - All code points, assigned or unassigned, that are not listed
explicitly are given the value "N". }
for R in UCS4Char do
EastAsianWidths[R] := eawN;
{ - The unassigned code points in the following blocks default to "W":
CJK Unified Ideographs Extension A: U+3400..U+4DBF
CJK Unified Ideographs: U+4E00..U+9FFF
CJK Compatibility Ideographs: U+F900..U+FAFF }
for R := $3400 to $4DBF do
EastAsianWidths[R] := eawW;
for R := $4E00 to $9FFF do
EastAsianWidths[R] := eawW;
for R := $F900 to $FAFF do
EastAsianWidths[R] := eawW;
{ - All undesignated code points in Planes 2 and 3, whether inside or
outside of allocated blocks, default to "W":
Plane 2: U+20000..U+2FFFD
Plane 3: U+30000..U+3FFFD }
for R := $20000 to $2FFFD do
EastAsianWidths[R] := eawW;
for R := $30000 to $3FFFD do
EastAsianWidths[R] := eawW;
if not FileExists(FileName) then
begin
Writeln('File doesn''t exist: ', FileName);
Halt(1);
end;
AssignFile(InF, FileName);
Reset(InF);
while not EoF(InF) do
begin
Inc(LineNr);
Readln(InF, S);
S := Trim(S);
if Pos('#', S) > 0 then
S := LeftStr(S, Pos('#', S) - 1);
if S <> '' then
begin
SplitS := S.Split([';']);
if Length(SplitS) <> 2 then
raise Exception.Create('Invalid number of ; separators on line ' + IntToStr(LineNr));
ParseRange(SplitS[0], RangeLo, RangeHi);
eaw := ParseEastAsianWidth(SplitS[1]);
for R := RangeLo to RangeHi do
EastAsianWidths[R] := eaw;
end;
end;
CloseFile(InF);
end;
procedure CalcStatsAndRanges;
var
Ch: UCS4Char;
eaw, prev_eaw: TEastAsianWidth;
begin
FillChar(EAWStats, SizeOf(EAWStats), 0);
eaw := Low(TEastAsianWidth);
for Ch := Low(UCS4Char) to High(UCS4Char) do
begin
prev_eaw := eaw;
eaw := EastAsianWidths[Ch];
with EAWStats[eaw] do
begin
if not Exists then
begin
Exists := True;
MinValue := Ch;
MaxValue := Ch;
Count := 1;
SetLength(Ranges, 1);
Ranges[0].RangeLo := Ch;
Ranges[0].RangeHi := Ch;
end
else
begin
MaxValue := Ch;
Inc(Count);
if prev_eaw <> eaw then
begin
SetLength(Ranges, Length(Ranges) + 1);
with Ranges[High(Ranges)] do
begin
RangeLo := Ch;
RangeHi := Ch;
end;
end
else
Ranges[High(Ranges)].RangeHi := Ch;
end;
end;
end;
end;
procedure MaybeCoalesceRanges(RLo, RHi: UCS4Char);
var
eaw: TEastAsianWidth;
RI: Integer;
begin
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
if EAWStats[eaw].Exists and (not EAWStats[eaw].Handled) then
begin
for RI := 0 to High(EAWStats[eaw].Ranges) - 1 do
if (EAWStats[eaw].Ranges[RI].RangeHi = (RLo - 1)) and
(EAWStats[eaw].Ranges[RI + 1].RangeLo = (RHi + 1)) then
begin
EAWStats[eaw].Ranges[RI].RangeHi := EAWStats[eaw].Ranges[RI + 1].RangeHi;
Delete(EAWStats[eaw].Ranges, RI + 1, 1);
exit;
end;
end;
end;
function FindMinRangeCount: Integer;
var
eaw: TEastAsianWidth;
begin
Result := High(Integer);
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
if EAWStats[eaw].Exists and (not EAWStats[eaw].Handled) and (Length(EAWStats[eaw].Ranges) < Result) then
Result := Length(EAWStats[eaw].Ranges);
end;
procedure GenCode(const OutFileName: string);
const
RangeCountThreshold = 30{400};
var
eaw: TEastAsianWidth;
RI, NextRangeCount: Integer;
OutFile: TextFile;
begin
Writeln('Generating file: ', OutFileName);
AssignFile(OutFile, OutFileName);
Rewrite(OutFile);
Writeln(OutFile, '{ do not edit, this file is autogenerated by the eawparser tool }');
{ unused properties are already handled }
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
if not EAWStats[eaw].Exists then
EAWStats[eaw].Handled := True;
{ handle single codepoints first }
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
if (not EAWStats[eaw].Handled) and (EAWStats[eaw].Count = 1) then
begin
if EAWStats[eaw].MinValue <> EAWStats[eaw].MaxValue then
raise Exception.Create('Internal error');
Writeln(OutFile, 'if Ch=', EAWStats[eaw].MinValue, 'then result:=',eaw,' else');
EAWStats[eaw].Handled := True;
MaybeCoalesceRanges(EAWStats[eaw].MinValue, EAWStats[eaw].MaxValue);
end;
{ handle single range codepoints next }
while FindMinRangeCount = 1 do
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
if (not EAWStats[eaw].Handled) and (Length(EAWStats[eaw].Ranges) = 1) then
begin
Writeln(OutFile, 'if(Ch>=', EAWStats[eaw].MinValue, ')and(Ch<=', EAWStats[eaw].MaxValue, ')then result:=',eaw,' else');
EAWStats[eaw].Handled := True;
MaybeCoalesceRanges(EAWStats[eaw].MinValue, EAWStats[eaw].MaxValue);
end;
repeat
NextRangeCount := FindMinRangeCount;
if NextRangeCount <= RangeCountThreshold then
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
begin
if not EAWStats[eaw].Handled and (Length(EAWStats[eaw].Ranges) <= NextRangeCount) then
begin
EAWStats[eaw].Handled := True;
Write(OutFile, 'if');
for RI := 0 to High(EAWStats[eaw].Ranges) do
begin
if RI <> 0 then
Writeln(OutFile, 'or');
with EAWStats[eaw].Ranges[RI] do
begin
if RangeLo = RangeHi then
Write(OutFile, '(Ch=', RangeLo, ')')
else
Write(OutFile, '((Ch>=', RangeLo, ')and(Ch<=', RangeHi, '))');
MaybeCoalesceRanges(RangeLo, RangeHi);
end;
end;
Writeln(OutFile, 'then result:=',eaw,' else');
end;
end;
until NextRangeCount > RangeCountThreshold;
if NextRangeCount <> High(Integer) then
begin
//for eaw := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
// if not EAWStats[eaw].Handled then
// Writeln(eaw, ' ', EAWStats[eaw].MinValue, '..', EAWStats[eaw].MaxValue, ' ', EAWStats[eaw].Count, ' ', Length(EAWStats[eaw].Ranges), ' ', (EAWStats[eaw].MaxValue - EAWStats[eaw].MinValue + 7) div 8);
Writeln(OutFile, 'case Ch of');
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
begin
if not EAWStats[eaw].Handled then
begin
EAWStats[eaw].Handled := True;
for RI := 0 to High(EAWStats[eaw].Ranges) do
begin
if RI <> 0 then
Writeln(OutFile, ',');
with EAWStats[eaw].Ranges[RI] do
begin
if RangeLo = RangeHi then
Write(OutFile, RangeLo)
else
Write(OutFile, RangeLo, '..', RangeHi);
end;
end;
Writeln(OutFile, ':result:=', eaw, ';');
end;
end;
Writeln(OutFile, 'else result:=eawN end');
end
else
Writeln(OutFile, 'result:=eawN');
CloseFile(OutFile);
end;
begin
ParseEastAsianWidths('data/UCD/EastAsianWidth.txt');
CalcStatsAndRanges;
GenCode('eastasianwidth_code.inc');
Writeln('Done');
end.

View File

@ -61,6 +61,7 @@ begin
T:=P.Targets.AddProgram('cldrparser.lpr');
T:=P.Targets.AddProgram('unihelper.lpr');
T:=P.Targets.AddProgram('gbpparser.lpr');
T:=P.Targets.AddProgram('eawparser.lpr');
end;
end;