mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-09 02:48:14 +02:00
* synchronized with trunk
git-svn-id: branches/unicodekvm@48756 -
This commit is contained in:
commit
cd8b0036d4
5
.gitattributes
vendored
5
.gitattributes
vendored
@ -9097,6 +9097,8 @@ packages/rtl-unicode/src/inc/cp936.pas svneol=native#text/pascal
|
||||
packages/rtl-unicode/src/inc/cp949.pas svneol=native#text/pascal
|
||||
packages/rtl-unicode/src/inc/cp950.pas svneol=native#text/pascal
|
||||
packages/rtl-unicode/src/inc/cpbuildu.pp svneol=native#text/plain
|
||||
packages/rtl-unicode/src/inc/eastasianwidth.pp svneol=native#text/plain
|
||||
packages/rtl-unicode/src/inc/eastasianwidth_code.inc svneol=native#text/plain
|
||||
packages/rtl-unicode/src/inc/freebidi.pp svneol=native#text/plain
|
||||
packages/rtl-unicode/src/inc/graphemebreakproperty.pp svneol=native#text/plain
|
||||
packages/rtl-unicode/src/inc/graphemebreakproperty_code.inc svneol=native#text/plain
|
||||
@ -16212,6 +16214,7 @@ tests/test/units/strings/tstrings1.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/taddchar.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/taddcharr.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/tbintohex.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/tboyer.pp svneol=native#text/pascal
|
||||
tests/test/units/strutils/tdec2numb.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/thex2dec.pp svneol=native#text/plain
|
||||
tests/test/units/strutils/thextobin.pp svneol=native#text/plain
|
||||
@ -19836,6 +19839,8 @@ utils/unicode/cldrtest.pas svneol=native#text/pascal
|
||||
utils/unicode/cldrtxt.pas svneol=native#text/plain
|
||||
utils/unicode/cldrxml.pas svneol=native#text/pascal
|
||||
utils/unicode/data/readme.txt svneol=native#text/plain
|
||||
utils/unicode/eawparser.lpi svneol=native#text/plain
|
||||
utils/unicode/eawparser.lpr svneol=native#text/pascal
|
||||
utils/unicode/fpmake.pp svneol=native#text/plain
|
||||
utils/unicode/gbpparser.lpi svneol=native#text/plain
|
||||
utils/unicode/gbpparser.lpr svneol=native#text/pascal
|
||||
|
@ -429,8 +429,7 @@ begin
|
||||
AddMatch(i+1);
|
||||
//Only first match ?
|
||||
if not aMatchAll then break;
|
||||
inc(i,OldPatternSize);
|
||||
inc(i,OldPatternSize);
|
||||
inc(i,DeltaJumpTable2[0]);
|
||||
end else begin
|
||||
i:=i + Max(DeltaJumpTable1[ord(s[i])],DeltaJumpTable2[j]);
|
||||
end;
|
||||
@ -582,8 +581,7 @@ begin
|
||||
AddMatch(i+1);
|
||||
//Only first match ?
|
||||
if not aMatchAll then break;
|
||||
inc(i,OldPatternSize);
|
||||
inc(i,OldPatternSize);
|
||||
inc(i,DeltaJumpTable2[0]);
|
||||
end else begin
|
||||
i:=i + Max(DeltaJumpTable1[Ord(lCaseArray[Ord(s[i])])],DeltaJumpTable2[j]);
|
||||
end;
|
||||
|
@ -16,12 +16,13 @@ Const
|
||||
CPUnits = [aix,amiga,aros,android,beos,darwin,iphonesim,ios,emx,gba,nds,freebsd,go32v2,haiku,linux,morphos,netbsd,netware,netwlibc,openbsd,os2,solaris,watcom,wii,win32,win64,wince,dragonfly,freertos];
|
||||
utf8bidiOSes = [netware,netwlibc];
|
||||
freebidiOSes = [netware,netwlibc];
|
||||
GraphemeBreakPropertyOSes = AllOSes;
|
||||
GraphemeBreakPropertyOSes = AllOSes-[embedded,zxspectrum,msxdos,amstradcpc];
|
||||
EastAsianWidthOSes = AllOSes-[embedded,zxspectrum,msxdos,amstradcpc];
|
||||
|
||||
// Character not movable because fpwidestring depends on it.
|
||||
// CharacterOSes = [android,darwin,freebsd,linux,netbsd,openbsd,solaris,win32,win64,dragonfly];
|
||||
|
||||
UnicodeAllOSes = CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits + GraphemeBreakPropertyOSes;
|
||||
UnicodeAllOSes = CollationOSes + utf8bidiOSes + freebidiOSes + CPUnits + GraphemeBreakPropertyOSes + EastAsianWidthOSes;
|
||||
|
||||
// Amiga has a crt in its RTL dir, but it is commented in the makefile
|
||||
|
||||
@ -145,6 +146,12 @@ begin
|
||||
begin
|
||||
AddInclude('graphemebreakproperty_code.inc');
|
||||
end;
|
||||
|
||||
T:=P.Targets.AddUnit('eastasianwidth.pp',EastAsianWidthOSes);
|
||||
with T.Dependencies do
|
||||
begin
|
||||
AddInclude('eastasianwidth_code.inc');
|
||||
end;
|
||||
end
|
||||
end;
|
||||
|
||||
|
55
packages/rtl-unicode/src/inc/eastasianwidth.pp
Normal file
55
packages/rtl-unicode/src/inc/eastasianwidth.pp
Normal file
@ -0,0 +1,55 @@
|
||||
{ EastAsianWidth Unicode data unit.
|
||||
|
||||
Copyright (C) 2021 Nikolay Nikolov <nickysn@users.sourceforge.net>
|
||||
|
||||
This library is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Library General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or (at your
|
||||
option) any later version with the following modification:
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent modules,and
|
||||
to copy and distribute the resulting executable under terms of your choice,
|
||||
provided that you also meet, for each linked independent module, the terms
|
||||
and conditions of the license of that module. An independent module is a
|
||||
module which is not derived from or based on this library. If you modify
|
||||
this library, you may extend this exception to your version of the library,
|
||||
but you are not obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU Library General Public License
|
||||
along with this library; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1335, USA.
|
||||
}
|
||||
|
||||
unit eastasianwidth;
|
||||
|
||||
{$MODE objfpc}
|
||||
|
||||
interface
|
||||
|
||||
type
|
||||
TEastAsianWidth = (
|
||||
eawN,
|
||||
eawA,
|
||||
eawF,
|
||||
eawH,
|
||||
eawNa,
|
||||
eawW);
|
||||
|
||||
function GetEastAsianWidth(Ch: UCS4Char): TEastAsianWidth;
|
||||
|
||||
implementation
|
||||
|
||||
function GetEastAsianWidth(Ch: UCS4Char): TEastAsianWidth;
|
||||
begin
|
||||
{$I eastasianwidth_code.inc}
|
||||
end;
|
||||
|
||||
end.
|
300
packages/rtl-unicode/src/inc/eastasianwidth_code.inc
Normal file
300
packages/rtl-unicode/src/inc/eastasianwidth_code.inc
Normal file
@ -0,0 +1,300 @@
|
||||
{ do not edit, this file is autogenerated by the eawparser tool }
|
||||
if(Ch=12288)or
|
||||
((Ch>=65281)and(Ch<=65376))or
|
||||
((Ch>=65504)and(Ch<=65510))then result:=eawF else
|
||||
if(Ch=8361)or
|
||||
((Ch>=65377)and(Ch<=65470))or
|
||||
((Ch>=65474)and(Ch<=65479))or
|
||||
((Ch>=65482)and(Ch<=65487))or
|
||||
((Ch>=65490)and(Ch<=65495))or
|
||||
((Ch>=65498)and(Ch<=65500))or
|
||||
((Ch>=65512)and(Ch<=65518))then result:=eawH else
|
||||
if((Ch>=32)and(Ch<=126))or
|
||||
((Ch>=162)and(Ch<=163))or
|
||||
((Ch>=165)and(Ch<=166))or
|
||||
(Ch=172)or
|
||||
(Ch=175)or
|
||||
((Ch>=10214)and(Ch<=10221))or
|
||||
((Ch>=10629)and(Ch<=10630))then result:=eawNa else
|
||||
case Ch of
|
||||
161..168,
|
||||
170,
|
||||
173..180,
|
||||
182..186,
|
||||
188..191,
|
||||
198,
|
||||
208,
|
||||
215..216,
|
||||
222..225,
|
||||
230,
|
||||
232..234,
|
||||
236..237,
|
||||
240,
|
||||
242..243,
|
||||
247..250,
|
||||
252,
|
||||
254,
|
||||
257,
|
||||
273,
|
||||
275,
|
||||
283,
|
||||
294..295,
|
||||
299,
|
||||
305..307,
|
||||
312,
|
||||
319..322,
|
||||
324,
|
||||
328..331,
|
||||
333,
|
||||
338..339,
|
||||
358..359,
|
||||
363,
|
||||
462,
|
||||
464,
|
||||
466,
|
||||
468,
|
||||
470,
|
||||
472,
|
||||
474,
|
||||
476,
|
||||
593,
|
||||
609,
|
||||
708,
|
||||
711,
|
||||
713..715,
|
||||
717,
|
||||
720,
|
||||
728..731,
|
||||
733,
|
||||
735,
|
||||
768..879,
|
||||
913..929,
|
||||
931..937,
|
||||
945..961,
|
||||
963..969,
|
||||
1025,
|
||||
1040..1103,
|
||||
1105,
|
||||
8208,
|
||||
8211..8214,
|
||||
8216..8217,
|
||||
8220..8221,
|
||||
8224..8226,
|
||||
8228..8231,
|
||||
8240,
|
||||
8242..8243,
|
||||
8245,
|
||||
8251,
|
||||
8254,
|
||||
8308,
|
||||
8319,
|
||||
8321..8324,
|
||||
8364,
|
||||
8451,
|
||||
8453,
|
||||
8457,
|
||||
8467,
|
||||
8470,
|
||||
8481..8482,
|
||||
8486,
|
||||
8491,
|
||||
8531..8532,
|
||||
8539..8542,
|
||||
8544..8555,
|
||||
8560..8569,
|
||||
8585,
|
||||
8592..8601,
|
||||
8632..8633,
|
||||
8658,
|
||||
8660,
|
||||
8679,
|
||||
8704,
|
||||
8706..8707,
|
||||
8711..8712,
|
||||
8715,
|
||||
8719,
|
||||
8721,
|
||||
8725,
|
||||
8730,
|
||||
8733..8736,
|
||||
8739,
|
||||
8741,
|
||||
8743..8748,
|
||||
8750,
|
||||
8756..8759,
|
||||
8764..8765,
|
||||
8776,
|
||||
8780,
|
||||
8786,
|
||||
8800..8801,
|
||||
8804..8807,
|
||||
8810..8811,
|
||||
8814..8815,
|
||||
8834..8835,
|
||||
8838..8839,
|
||||
8853,
|
||||
8857,
|
||||
8869,
|
||||
8895,
|
||||
8978,
|
||||
9312..9449,
|
||||
9451..9547,
|
||||
9552..9587,
|
||||
9600..9615,
|
||||
9618..9621,
|
||||
9632..9633,
|
||||
9635..9641,
|
||||
9650..9651,
|
||||
9654..9655,
|
||||
9660..9661,
|
||||
9664..9665,
|
||||
9670..9672,
|
||||
9675,
|
||||
9678..9681,
|
||||
9698..9701,
|
||||
9711,
|
||||
9733..9734,
|
||||
9737,
|
||||
9742..9743,
|
||||
9756,
|
||||
9758,
|
||||
9792,
|
||||
9794,
|
||||
9824..9825,
|
||||
9827..9829,
|
||||
9831..9834,
|
||||
9836..9837,
|
||||
9839,
|
||||
9886..9887,
|
||||
9919,
|
||||
9926..9933,
|
||||
9935..9939,
|
||||
9941..9953,
|
||||
9955,
|
||||
9960..9961,
|
||||
9963..9969,
|
||||
9972,
|
||||
9974..9977,
|
||||
9979..9980,
|
||||
9982..9983,
|
||||
10045,
|
||||
10102..10111,
|
||||
11094..11097,
|
||||
12872..12879,
|
||||
57344..63743,
|
||||
65024..65039,
|
||||
65533,
|
||||
127232..127242,
|
||||
127248..127277,
|
||||
127280..127337,
|
||||
127344..127373,
|
||||
127375..127376,
|
||||
127387..127404,
|
||||
917760..917999,
|
||||
983040..1048573,
|
||||
1048576..1114109:result:=eawA;
|
||||
4352..4447,
|
||||
8986..8987,
|
||||
9001..9002,
|
||||
9193..9196,
|
||||
9200,
|
||||
9203,
|
||||
9725..9726,
|
||||
9748..9749,
|
||||
9800..9811,
|
||||
9855,
|
||||
9875,
|
||||
9889,
|
||||
9898..9899,
|
||||
9917..9918,
|
||||
9924..9925,
|
||||
9934,
|
||||
9940,
|
||||
9962,
|
||||
9970..9971,
|
||||
9973,
|
||||
9978,
|
||||
9981,
|
||||
9989,
|
||||
9994..9995,
|
||||
10024,
|
||||
10060,
|
||||
10062,
|
||||
10067..10069,
|
||||
10071,
|
||||
10133..10135,
|
||||
10160,
|
||||
10175,
|
||||
11035..11036,
|
||||
11088,
|
||||
11093,
|
||||
11904..11929,
|
||||
11931..12019,
|
||||
12032..12245,
|
||||
12272..12283,
|
||||
12289..12350,
|
||||
12353..12438,
|
||||
12441..12543,
|
||||
12549..12589,
|
||||
12593..12686,
|
||||
12688..12730,
|
||||
12736..12771,
|
||||
12784..12830,
|
||||
12832..12871,
|
||||
12880..13054,
|
||||
13056..19903,
|
||||
19968..42124,
|
||||
42128..42182,
|
||||
43360..43388,
|
||||
44032..55203,
|
||||
63744..64255,
|
||||
65040..65049,
|
||||
65072..65106,
|
||||
65108..65126,
|
||||
65128..65131,
|
||||
94176,
|
||||
94208..100332,
|
||||
100352..101106,
|
||||
110592..110593,
|
||||
126980,
|
||||
127183,
|
||||
127374,
|
||||
127377..127386,
|
||||
127488..127490,
|
||||
127504..127547,
|
||||
127552..127560,
|
||||
127568..127569,
|
||||
127744..127776,
|
||||
127789..127797,
|
||||
127799..127868,
|
||||
127870..127891,
|
||||
127904..127946,
|
||||
127951..127955,
|
||||
127968..127984,
|
||||
127988,
|
||||
127992..128062,
|
||||
128064,
|
||||
128066..128252,
|
||||
128255..128317,
|
||||
128331..128334,
|
||||
128336..128359,
|
||||
128378,
|
||||
128405..128406,
|
||||
128420,
|
||||
128507..128591,
|
||||
128640..128709,
|
||||
128716,
|
||||
128720..128722,
|
||||
128747..128748,
|
||||
128756..128758,
|
||||
129296..129310,
|
||||
129312..129319,
|
||||
129328,
|
||||
129331..129342,
|
||||
129344..129355,
|
||||
129360..129374,
|
||||
129408..129425,
|
||||
129472,
|
||||
131072..196605,
|
||||
196608..262141:result:=eawW;
|
||||
else result:=eawN end
|
@ -374,7 +374,7 @@ CPU_SPECIFIC_COMMON_UNITS=
|
||||
ifeq ($(ARCH),arm)
|
||||
CPU_SPECIFIC_COMMON_UNITS=sysutils math classes fgl macpas typinfo types rtlconsts getopts lineinfo
|
||||
ifeq ($(SUBARCH),armv7m)
|
||||
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 lm4f120 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
|
||||
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
|
||||
CPU_UNITS_DEFINED=1
|
||||
endif
|
||||
ifeq ($(SUBARCH),armv7em)
|
||||
|
@ -71,7 +71,7 @@ CPU_SPECIFIC_COMMON_UNITS=
|
||||
ifeq ($(ARCH),arm)
|
||||
CPU_SPECIFIC_COMMON_UNITS=sysutils math classes fgl macpas typinfo types rtlconsts getopts lineinfo
|
||||
ifeq ($(SUBARCH),armv7m)
|
||||
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 lm4f120 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
|
||||
CPU_UNITS=lm3fury lm3tempest stm32f10x_ld stm32f10x_md stm32f10x_hd stm32f10x_xl stm32f10x_conn stm32f10x_cl lpc13xx lpc1768 sam3x8e xmc4500 cortexm3 cortexm4 # thumb2_bare
|
||||
CPU_UNITS_DEFINED=1
|
||||
endif
|
||||
ifeq ($(SUBARCH),armv7em)
|
||||
|
79
tests/test/units/strutils/tboyer.pp
Normal file
79
tests/test/units/strutils/tboyer.pp
Normal file
@ -0,0 +1,79 @@
|
||||
{$mode objfpc}
|
||||
|
||||
uses
|
||||
StrUtils;
|
||||
const
|
||||
result1 : array of SizeInt = (1, 4, 7, 10, 13, 16);
|
||||
var
|
||||
a : array of SizeInt;
|
||||
i : LongInt;
|
||||
begin
|
||||
if FindMatchesBoyerMooreCaseSensitive('abcabcabcabcabcabcab','abcab',a,false) then
|
||||
begin
|
||||
if Length(a)<>1 then
|
||||
halt(2);
|
||||
if a[0]<>result1[0] then
|
||||
halt(3);
|
||||
end
|
||||
else
|
||||
halt(1);
|
||||
|
||||
if FindMatchesBoyerMooreCaseSensitive('abcabcabcabcabcabcab','abcab',a,true) then
|
||||
begin
|
||||
if Length(a)<>Length(result1) then
|
||||
halt(12);
|
||||
for i:=Low(a) to High(a) do
|
||||
if a[i]<>result1[i] then
|
||||
halt(13);
|
||||
end
|
||||
else
|
||||
halt(11);
|
||||
|
||||
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcabcabcabcab','abcab',a,false) then
|
||||
begin
|
||||
if Length(a)<>1 then
|
||||
halt(22);
|
||||
if a[0]<>result1[0] then
|
||||
halt(23);
|
||||
end
|
||||
else
|
||||
halt(21);
|
||||
|
||||
{
|
||||
apparently not working yet:
|
||||
|
||||
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcabcabcabcab','abcab',a,true) then
|
||||
begin
|
||||
if Length(a)<>Length(result1) then
|
||||
halt(32);
|
||||
for i:=Low(a) to High(a) do
|
||||
if a[i]<>result1[i] then
|
||||
halt(33);
|
||||
end
|
||||
else
|
||||
halt(31);
|
||||
|
||||
if FindMatchesBoyerMooreCaseInSensitive('abcabcabcAbcabcAbcab','abcaB',a,false) then
|
||||
begin
|
||||
if Length(a)<>1 then
|
||||
halt(42);
|
||||
if a[0]<>result1[0] then
|
||||
halt(43);
|
||||
end
|
||||
else
|
||||
halt(41);
|
||||
|
||||
if FindMatchesBoyerMooreCaseInSensitive('abcabCabcAbcabcABcab','abcaB',a,true) then
|
||||
begin
|
||||
if Length(a)<>Length(result1) then
|
||||
halt(52);
|
||||
for i:=Low(a) to High(a) do
|
||||
if a[i]<>result1[i] then
|
||||
halt(53);
|
||||
end
|
||||
else
|
||||
halt(51);
|
||||
}
|
||||
|
||||
writeln('ok');
|
||||
end.
|
58
utils/unicode/eawparser.lpi
Normal file
58
utils/unicode/eawparser.lpi
Normal file
@ -0,0 +1,58 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CONFIG>
|
||||
<ProjectOptions>
|
||||
<Version Value="11"/>
|
||||
<General>
|
||||
<Flags>
|
||||
<MainUnitHasCreateFormStatements Value="False"/>
|
||||
<MainUnitHasTitleStatement Value="False"/>
|
||||
<MainUnitHasScaledStatement Value="False"/>
|
||||
</Flags>
|
||||
<SessionStorage Value="InProjectDir"/>
|
||||
<MainUnit Value="0"/>
|
||||
<Title Value="eawparser"/>
|
||||
<UseAppBundle Value="False"/>
|
||||
<ResourceType Value="res"/>
|
||||
</General>
|
||||
<BuildModes Count="1">
|
||||
<Item1 Name="Default" Default="True"/>
|
||||
</BuildModes>
|
||||
<PublishOptions>
|
||||
<Version Value="2"/>
|
||||
<UseFileFilters Value="True"/>
|
||||
</PublishOptions>
|
||||
<RunParams>
|
||||
<FormatVersion Value="2"/>
|
||||
<Modes Count="0"/>
|
||||
</RunParams>
|
||||
<Units Count="1">
|
||||
<Unit0>
|
||||
<Filename Value="eawparser.lpr"/>
|
||||
<IsPartOfProject Value="True"/>
|
||||
</Unit0>
|
||||
</Units>
|
||||
</ProjectOptions>
|
||||
<CompilerOptions>
|
||||
<Version Value="11"/>
|
||||
<Target>
|
||||
<Filename Value="eawparser"/>
|
||||
</Target>
|
||||
<SearchPaths>
|
||||
<IncludeFiles Value="$(ProjOutDir)"/>
|
||||
<UnitOutputDirectory Value="lib/$(TargetCPU)-$(TargetOS)"/>
|
||||
</SearchPaths>
|
||||
</CompilerOptions>
|
||||
<Debugging>
|
||||
<Exceptions Count="3">
|
||||
<Item1>
|
||||
<Name Value="EAbort"/>
|
||||
</Item1>
|
||||
<Item2>
|
||||
<Name Value="ECodetoolError"/>
|
||||
</Item2>
|
||||
<Item3>
|
||||
<Name Value="EFOpenError"/>
|
||||
</Item3>
|
||||
</Exceptions>
|
||||
</Debugging>
|
||||
</CONFIG>
|
332
utils/unicode/eawparser.lpr
Normal file
332
utils/unicode/eawparser.lpr
Normal file
@ -0,0 +1,332 @@
|
||||
{ Parser and code generator for the EastAsianWidth.
|
||||
|
||||
Copyright (C) 2021 Nikolay Nikolov <nickysn@users.sourceforge.net>
|
||||
|
||||
This source is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 2 of the License, or (at your option)
|
||||
any later version.
|
||||
|
||||
This code is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
details.
|
||||
|
||||
A copy of the GNU General Public License is available on the World Wide Web
|
||||
at <http://www.gnu.org/copyleft/gpl.html>. You can also obtain it by writing
|
||||
to the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor,
|
||||
Boston, MA 02110-1335, USA.
|
||||
}
|
||||
|
||||
program eawparser;
|
||||
|
||||
{$mode objfpc}{$H+}
|
||||
|
||||
uses
|
||||
SysUtils, StrUtils;
|
||||
|
||||
type
|
||||
TEastAsianWidth = (
|
||||
eawN,
|
||||
eawA,
|
||||
eawF,
|
||||
eawH,
|
||||
eawNa,
|
||||
eawW);
|
||||
|
||||
TRange = record
|
||||
RangeLo, RangeHi: UCS4Char;
|
||||
end;
|
||||
TRanges = array of TRange;
|
||||
|
||||
var
|
||||
EastAsianWidths: array [UCS4Char] of TEastAsianWidth;
|
||||
EAWStats: array [TEastAsianWidth] of record
|
||||
Exists: Boolean;
|
||||
Handled: Boolean;
|
||||
MinValue: UCS4Char;
|
||||
MaxValue: UCS4Char;
|
||||
Count: LongInt;
|
||||
Ranges: TRanges;
|
||||
end;
|
||||
|
||||
function ParseEastAsianWidth(S: string): TEastAsianWidth;
|
||||
begin
|
||||
S := Trim(S);
|
||||
case S of
|
||||
'N':
|
||||
Result := eawN;
|
||||
'A':
|
||||
Result := eawA;
|
||||
'F':
|
||||
Result := eawF;
|
||||
'H':
|
||||
Result := eawH;
|
||||
'Na':
|
||||
Result := eawNa;
|
||||
'W':
|
||||
Result := eawW;
|
||||
else
|
||||
raise EArgumentException('Unknown east asian width: ''' + S + '''');
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseRange(S: string; out RangeLo, RangeHi: UCS4Char);
|
||||
var
|
||||
dp: SizeInt;
|
||||
begin
|
||||
S := Trim(S);
|
||||
dp := Pos('..', S);
|
||||
if dp > 0 then
|
||||
begin
|
||||
RangeLo := StrToInt('$' + LeftStr(S, dp - 1));
|
||||
RangeHi := StrToInt('$' + Copy(S, dp + 2, Length(S) - dp + 3));
|
||||
end
|
||||
else
|
||||
begin
|
||||
RangeLo := StrToInt('$' + S);
|
||||
RangeHi := RangeLo;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseEastAsianWidths(const FileName: string);
|
||||
var
|
||||
InF: TextFile;
|
||||
S: string;
|
||||
SplitS: TStringArray;
|
||||
LineNr: Integer = 0;
|
||||
eaw: TEastAsianWidth;
|
||||
RangeLo, RangeHi, R: UCS4Char;
|
||||
begin
|
||||
{ - All code points, assigned or unassigned, that are not listed
|
||||
explicitly are given the value "N". }
|
||||
for R in UCS4Char do
|
||||
EastAsianWidths[R] := eawN;
|
||||
{ - The unassigned code points in the following blocks default to "W":
|
||||
CJK Unified Ideographs Extension A: U+3400..U+4DBF
|
||||
CJK Unified Ideographs: U+4E00..U+9FFF
|
||||
CJK Compatibility Ideographs: U+F900..U+FAFF }
|
||||
for R := $3400 to $4DBF do
|
||||
EastAsianWidths[R] := eawW;
|
||||
for R := $4E00 to $9FFF do
|
||||
EastAsianWidths[R] := eawW;
|
||||
for R := $F900 to $FAFF do
|
||||
EastAsianWidths[R] := eawW;
|
||||
{ - All undesignated code points in Planes 2 and 3, whether inside or
|
||||
outside of allocated blocks, default to "W":
|
||||
Plane 2: U+20000..U+2FFFD
|
||||
Plane 3: U+30000..U+3FFFD }
|
||||
for R := $20000 to $2FFFD do
|
||||
EastAsianWidths[R] := eawW;
|
||||
for R := $30000 to $3FFFD do
|
||||
EastAsianWidths[R] := eawW;
|
||||
|
||||
if not FileExists(FileName) then
|
||||
begin
|
||||
Writeln('File doesn''t exist: ', FileName);
|
||||
Halt(1);
|
||||
end;
|
||||
AssignFile(InF, FileName);
|
||||
Reset(InF);
|
||||
while not EoF(InF) do
|
||||
begin
|
||||
Inc(LineNr);
|
||||
Readln(InF, S);
|
||||
S := Trim(S);
|
||||
if Pos('#', S) > 0 then
|
||||
S := LeftStr(S, Pos('#', S) - 1);
|
||||
if S <> '' then
|
||||
begin
|
||||
SplitS := S.Split([';']);
|
||||
if Length(SplitS) <> 2 then
|
||||
raise Exception.Create('Invalid number of ; separators on line ' + IntToStr(LineNr));
|
||||
ParseRange(SplitS[0], RangeLo, RangeHi);
|
||||
eaw := ParseEastAsianWidth(SplitS[1]);
|
||||
for R := RangeLo to RangeHi do
|
||||
EastAsianWidths[R] := eaw;
|
||||
end;
|
||||
end;
|
||||
CloseFile(InF);
|
||||
end;
|
||||
|
||||
procedure CalcStatsAndRanges;
|
||||
var
|
||||
Ch: UCS4Char;
|
||||
eaw, prev_eaw: TEastAsianWidth;
|
||||
begin
|
||||
FillChar(EAWStats, SizeOf(EAWStats), 0);
|
||||
eaw := Low(TEastAsianWidth);
|
||||
for Ch := Low(UCS4Char) to High(UCS4Char) do
|
||||
begin
|
||||
prev_eaw := eaw;
|
||||
eaw := EastAsianWidths[Ch];
|
||||
with EAWStats[eaw] do
|
||||
begin
|
||||
if not Exists then
|
||||
begin
|
||||
Exists := True;
|
||||
MinValue := Ch;
|
||||
MaxValue := Ch;
|
||||
Count := 1;
|
||||
SetLength(Ranges, 1);
|
||||
Ranges[0].RangeLo := Ch;
|
||||
Ranges[0].RangeHi := Ch;
|
||||
end
|
||||
else
|
||||
begin
|
||||
MaxValue := Ch;
|
||||
Inc(Count);
|
||||
if prev_eaw <> eaw then
|
||||
begin
|
||||
SetLength(Ranges, Length(Ranges) + 1);
|
||||
with Ranges[High(Ranges)] do
|
||||
begin
|
||||
RangeLo := Ch;
|
||||
RangeHi := Ch;
|
||||
end;
|
||||
end
|
||||
else
|
||||
Ranges[High(Ranges)].RangeHi := Ch;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure MaybeCoalesceRanges(RLo, RHi: UCS4Char);
|
||||
var
|
||||
eaw: TEastAsianWidth;
|
||||
RI: Integer;
|
||||
begin
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
if EAWStats[eaw].Exists and (not EAWStats[eaw].Handled) then
|
||||
begin
|
||||
for RI := 0 to High(EAWStats[eaw].Ranges) - 1 do
|
||||
if (EAWStats[eaw].Ranges[RI].RangeHi = (RLo - 1)) and
|
||||
(EAWStats[eaw].Ranges[RI + 1].RangeLo = (RHi + 1)) then
|
||||
begin
|
||||
EAWStats[eaw].Ranges[RI].RangeHi := EAWStats[eaw].Ranges[RI + 1].RangeHi;
|
||||
Delete(EAWStats[eaw].Ranges, RI + 1, 1);
|
||||
exit;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
function FindMinRangeCount: Integer;
|
||||
var
|
||||
eaw: TEastAsianWidth;
|
||||
begin
|
||||
Result := High(Integer);
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
if EAWStats[eaw].Exists and (not EAWStats[eaw].Handled) and (Length(EAWStats[eaw].Ranges) < Result) then
|
||||
Result := Length(EAWStats[eaw].Ranges);
|
||||
end;
|
||||
|
||||
procedure GenCode(const OutFileName: string);
|
||||
const
|
||||
RangeCountThreshold = 30{400};
|
||||
var
|
||||
eaw: TEastAsianWidth;
|
||||
RI, NextRangeCount: Integer;
|
||||
OutFile: TextFile;
|
||||
begin
|
||||
Writeln('Generating file: ', OutFileName);
|
||||
|
||||
AssignFile(OutFile, OutFileName);
|
||||
Rewrite(OutFile);
|
||||
|
||||
Writeln(OutFile, '{ do not edit, this file is autogenerated by the eawparser tool }');
|
||||
|
||||
{ unused properties are already handled }
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
if not EAWStats[eaw].Exists then
|
||||
EAWStats[eaw].Handled := True;
|
||||
|
||||
{ handle single codepoints first }
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
if (not EAWStats[eaw].Handled) and (EAWStats[eaw].Count = 1) then
|
||||
begin
|
||||
if EAWStats[eaw].MinValue <> EAWStats[eaw].MaxValue then
|
||||
raise Exception.Create('Internal error');
|
||||
Writeln(OutFile, 'if Ch=', EAWStats[eaw].MinValue, 'then result:=',eaw,' else');
|
||||
EAWStats[eaw].Handled := True;
|
||||
MaybeCoalesceRanges(EAWStats[eaw].MinValue, EAWStats[eaw].MaxValue);
|
||||
end;
|
||||
|
||||
{ handle single range codepoints next }
|
||||
while FindMinRangeCount = 1 do
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
if (not EAWStats[eaw].Handled) and (Length(EAWStats[eaw].Ranges) = 1) then
|
||||
begin
|
||||
Writeln(OutFile, 'if(Ch>=', EAWStats[eaw].MinValue, ')and(Ch<=', EAWStats[eaw].MaxValue, ')then result:=',eaw,' else');
|
||||
EAWStats[eaw].Handled := True;
|
||||
MaybeCoalesceRanges(EAWStats[eaw].MinValue, EAWStats[eaw].MaxValue);
|
||||
end;
|
||||
|
||||
repeat
|
||||
NextRangeCount := FindMinRangeCount;
|
||||
if NextRangeCount <= RangeCountThreshold then
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
begin
|
||||
if not EAWStats[eaw].Handled and (Length(EAWStats[eaw].Ranges) <= NextRangeCount) then
|
||||
begin
|
||||
EAWStats[eaw].Handled := True;
|
||||
Write(OutFile, 'if');
|
||||
for RI := 0 to High(EAWStats[eaw].Ranges) do
|
||||
begin
|
||||
if RI <> 0 then
|
||||
Writeln(OutFile, 'or');
|
||||
with EAWStats[eaw].Ranges[RI] do
|
||||
begin
|
||||
if RangeLo = RangeHi then
|
||||
Write(OutFile, '(Ch=', RangeLo, ')')
|
||||
else
|
||||
Write(OutFile, '((Ch>=', RangeLo, ')and(Ch<=', RangeHi, '))');
|
||||
MaybeCoalesceRanges(RangeLo, RangeHi);
|
||||
end;
|
||||
end;
|
||||
Writeln(OutFile, 'then result:=',eaw,' else');
|
||||
end;
|
||||
end;
|
||||
until NextRangeCount > RangeCountThreshold;
|
||||
|
||||
if NextRangeCount <> High(Integer) then
|
||||
begin
|
||||
//for eaw := Succ(Low(TGraphemeBreakProperty)) to High(TGraphemeBreakProperty) do
|
||||
// if not EAWStats[eaw].Handled then
|
||||
// Writeln(eaw, ' ', EAWStats[eaw].MinValue, '..', EAWStats[eaw].MaxValue, ' ', EAWStats[eaw].Count, ' ', Length(EAWStats[eaw].Ranges), ' ', (EAWStats[eaw].MaxValue - EAWStats[eaw].MinValue + 7) div 8);
|
||||
Writeln(OutFile, 'case Ch of');
|
||||
for eaw := Succ(Low(TEastAsianWidth)) to High(TEastAsianWidth) do
|
||||
begin
|
||||
if not EAWStats[eaw].Handled then
|
||||
begin
|
||||
EAWStats[eaw].Handled := True;
|
||||
for RI := 0 to High(EAWStats[eaw].Ranges) do
|
||||
begin
|
||||
if RI <> 0 then
|
||||
Writeln(OutFile, ',');
|
||||
with EAWStats[eaw].Ranges[RI] do
|
||||
begin
|
||||
if RangeLo = RangeHi then
|
||||
Write(OutFile, RangeLo)
|
||||
else
|
||||
Write(OutFile, RangeLo, '..', RangeHi);
|
||||
end;
|
||||
end;
|
||||
Writeln(OutFile, ':result:=', eaw, ';');
|
||||
end;
|
||||
end;
|
||||
Writeln(OutFile, 'else result:=eawN end');
|
||||
end
|
||||
else
|
||||
Writeln(OutFile, 'result:=eawN');
|
||||
|
||||
CloseFile(OutFile);
|
||||
end;
|
||||
|
||||
begin
|
||||
ParseEastAsianWidths('data/UCD/EastAsianWidth.txt');
|
||||
CalcStatsAndRanges;
|
||||
GenCode('eastasianwidth_code.inc');
|
||||
Writeln('Done');
|
||||
end.
|
||||
|
@ -61,6 +61,7 @@ begin
|
||||
T:=P.Targets.AddProgram('cldrparser.lpr');
|
||||
T:=P.Targets.AddProgram('unihelper.lpr');
|
||||
T:=P.Targets.AddProgram('gbpparser.lpr');
|
||||
T:=P.Targets.AddProgram('eawparser.lpr');
|
||||
|
||||
end;
|
||||
end;
|
||||
|
Loading…
Reference in New Issue
Block a user