rtl: apply patch of Inoussa with Incremental Implementation of the Unicode Collation Algorithm (mantis #0024873)

git-svn-id: trunk@25296 -
This commit is contained in:
paul 2013-08-19 16:35:12 +00:00
parent f285948fcb
commit 7ac3647ff2
10 changed files with 342780 additions and 45 deletions

5
.gitattributes vendored
View File

@ -11982,6 +11982,11 @@ tests/test/units/fpcunit/testclasses.lpr svneol=native#text/plain
tests/test/units/fpcunit/testcomps.pp svneol=native#text/plain
tests/test/units/fpcunit/tstrutils.lpi svneol=native#text/plain
tests/test/units/fpcunit/tstrutils.lpr svneol=native#text/plain
tests/test/units/fpwidestring/CollationTest_NON_IGNORABLE_SHORT.txt svneol=native#text/plain
tests/test/units/fpwidestring/CollationTest_SHIFTED_SHORT.txt svneol=native#text/plain
tests/test/units/fpwidestring/tuca1.pp svneol=native#text/pascal
tests/test/units/fpwidestring/tuca2.pp svneol=native#text/pascal
tests/test/units/fpwidestring/tucawsm.pp svneol=native#text/pascal
tests/test/units/lineinfo/tlininfo.pp svneol=native#text/plain
tests/test/units/math/tdivmod.pp svneol=native#text/plain
tests/test/units/math/tmask.inc svneol=native#text/plain

View File

@ -403,16 +403,13 @@ begin
Result:=LowerUnicodeString(u);
end;
function CompareUnicodeStringUCA(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;
var
k1, k2 : TUCASortKey;
function CompareUnicodeStringUCA(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
begin
k1 := ComputeSortKey(p1,l1,current_Collation);
k2 := ComputeSortKey(p2,l2,current_Collation);
Result := CompareSortKey(k1,k2);
Result := IncrementalCompareString(p1,l1,p2,l2,current_Collation);
end;
function CompareUnicodeString(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;
function CompareUnicodeString(p1,p2:PUnicodeChar; l1,l2:PtrInt) : PtrInt;inline;
begin
if (Pointer(p1)=Pointer(p2)) then
exit(0);

View File

@ -320,6 +320,18 @@ type
) : TUCASortKey;overload;
function CompareSortKey(const A, B : TUCASortKey) : Integer;overload;
function CompareSortKey(const A : TUCASortKey; const B : array of Word) : Integer;overload;
function IncrementalCompareString(
const AStrA : PUnicodeChar;
const ALengthA : SizeInt;
const AStrB : PUnicodeChar;
const ALengthB : SizeInt;
const ACollation : PUCA_DataBook
) : Integer;overload;
function IncrementalCompareString(
const AStrA,
AStrB : UnicodeString;
const ACollation : PUCA_DataBook
) : Integer;inline;overload;
function RegisterCollation(const ACollation : PUCA_DataBook) : Boolean;overload;
function RegisterCollation(
@ -648,11 +660,13 @@ var
i : Integer;
cl : PUCA_DataBook;
begin
for i := Low(CollationTable) to High(CollationTable) do begin
if CollationTable[i].Dynamic then begin
cl := CollationTable[i];
CollationTable[i] := nil;
FreeCollation(cl);
if AFreeDynamicCollations then begin
for i := Low(CollationTable) to High(CollationTable) do begin
if CollationTable[i].Dynamic then begin
cl := CollationTable[i];
CollationTable[i] := nil;
FreeCollation(cl);
end;
end;
end;
SetLength(CollationTable,0);
@ -2379,6 +2393,875 @@ begin
Result := r;
end;
type
TComputeKeyContext = record
Collation : PUCA_DataBook;
r : TUCA_PropWeightsArray;
ral {used length of "r"}: Integer;
rl {capacity of "r"} : Integer;
i : Integer;
s : UnicodeString;
ps : PUnicodeChar;
cp : Cardinal;
cl : PUCA_DataBook;
pp : PUCA_PropItemRec;
ppLevel : Byte;
removedCharIndex : array of DWord;
removedCharIndexLength : DWord;
locHistoryTop : Integer;
locHistory : array[0..24] of record
i : Integer;
cl : PUCA_DataBook;
pp : PUCA_PropItemRec;
ppLevel : Byte;
cp : Cardinal;
removedCharIndexLength : DWord;
end;
suppressState : record
cl : PUCA_DataBook;
CharCount : Integer;
end;
LastKeyOwner : record
Length : Integer;
Chars : array[0..24] of UInt24;
end;
c : Integer;
lastUnblockedNonstarterCCC : Byte;
surrogateState : Boolean;
Finished : Boolean;
end;
PComputeKeyContext = ^TComputeKeyContext;
procedure ClearPP(AContext : PComputeKeyContext; const AClearSuppressInfo : Boolean = True);inline;
begin
AContext^.cl := nil;
AContext^.pp := nil;
AContext^.ppLevel := 0;
if AClearSuppressInfo then begin
AContext^.suppressState.cl := nil;
AContext^.suppressState.CharCount := 0;
end;
end;
procedure InitContext(
AContext : PComputeKeyContext;
const AStr : PUnicodeChar;
const ALength : SizeInt;
const ACollation : PUCA_DataBook
);
begin
AContext^.Collation := ACollation;
AContext^.c := ALength;
AContext^.s := NormalizeNFD(AStr,AContext^.c);
AContext^.c := Length(AContext^.s);
AContext^.rl := 3*AContext^.c;
SetLength(AContext^.r,AContext^.rl);
AContext^.ral := 0;
AContext^.ps := @AContext^.s[1];
ClearPP(AContext);
AContext^.locHistoryTop := -1;
AContext^.removedCharIndexLength := 0;
FillByte(AContext^.suppressState,SizeOf(AContext^.suppressState),0);
AContext^.LastKeyOwner.Length := 0;
AContext^.i := 1;
AContext^.Finished := False;
end;
function FormKey(
const AWeightArray : TUCA_PropWeightsArray;
const ACollation : PUCA_DataBook
) : TUCASortKey;inline;
begin
case ACollation.VariableWeight of
TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(AWeightArray,ACollation);
TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(AWeightArray,ACollation);
TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(AWeightArray,ACollation);
TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(AWeightArray,ACollation);
else
Result := FormKeyShifted(AWeightArray,ACollation);
end;
end;
function ComputeRawSortKeyNextItem(
const AContext : PComputeKeyContext
) : Boolean;forward;
function IncrementalCompareString_NonIgnorable(
const AStrA : PUnicodeChar;
const ALengthA : SizeInt;
const AStrB : PUnicodeChar;
const ALengthB : SizeInt;
const ACollation : PUCA_DataBook
) : Integer;
var
ctxA, ctxB : TComputeKeyContext;
lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
keyIndexB : Integer;
keyA, keyB : TUCASortKey;
begin
if ( (ALengthA = 0) and (ALengthB = 0) ) or
( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
(ALengthA = ALengthB)
)
then
exit(0);
if (ALengthA = 0) then
exit(-1);
if (ALengthB = 0) then
exit(1);
InitContext(@ctxA,AStrA,ALengthA,ACollation);
InitContext(@ctxB,AStrB,ALengthB,ACollation);
lastKeyIndexA := -1;
keyIndexA := -1;
lengthMaxA := 0;
keyIndexB := -1;
while True do begin
if not ComputeRawSortKeyNextItem(@ctxA) then
Break;
if (ctxA.ral = lengthMaxA) then
Continue;
lengthMaxA := ctxA.ral;
keyIndexA := lastKeyIndexA + 1;
while (keyIndexA < lengthMaxA) and (ctxA.r[keyIndexA].Weights[0] = 0) do begin
Inc(keyIndexA);
end;
if (keyIndexA = lengthMaxA) then begin
lastKeyIndexA := keyIndexA-1;
Continue;
end;
while (keyIndexA < lengthMaxA) do begin
if (ctxA.r[keyIndexA].Weights[0] = 0) then begin
Inc(keyIndexA);
Continue;
end;
Inc(keyIndexB);
while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
if (ctxB.ral <= keyIndexB) then begin
if not ComputeRawSortKeyNextItem(@ctxB) then
Break;
Continue;
end;
Inc(keyIndexB);
end;
if (ctxB.ral <= keyIndexB) then
exit(1);
if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
exit(1);
if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
exit(-1);
Inc(keyIndexA);
end;
lastKeyIndexA := keyIndexA - 1;
end;
//Key(A) is completed !
Inc(keyIndexB);
while (ctxB.ral <= keyIndexB) or (ctxB.r[keyIndexB].Weights[0] = 0) do begin
if (ctxB.ral <= keyIndexB) then begin
if not ComputeRawSortKeyNextItem(@ctxB) then
Break;
Continue;
end;
Inc(keyIndexB);
end;
if (ctxB.ral > keyIndexB) then begin
//B has at least one more primary weight that A
exit(-1);
end;
while ComputeRawSortKeyNextItem(@ctxB) do begin
//
end;
//Key(B) is completed !
keyA := FormKey(ctxA.r,ctxA.Collation);
keyB := FormKey(ctxB.r,ctxB.Collation);
Result := CompareSortKey(keyA,keyB);
end;
function IncrementalCompareString_Shift(
const AStrA : PUnicodeChar;
const ALengthA : SizeInt;
const AStrB : PUnicodeChar;
const ALengthB : SizeInt;
const ACollation : PUCA_DataBook
) : Integer;
var
ctxA, ctxB : TComputeKeyContext;
lastKeyIndexA, keyIndexA, lengthMaxA : Integer;
keyIndexB : Integer;
keyA, keyB : TUCASortKey;
begin
if ( (ALengthA = 0) and (ALengthB = 0) ) or
( (PtrUInt(AStrA) = PtrUInt(AStrB)) and
(ALengthA = ALengthB)
)
then
exit(0);
if (ALengthA = 0) then
exit(-1);
if (ALengthB = 0) then
exit(1);
InitContext(@ctxA,AStrA,ALengthA,ACollation);
InitContext(@ctxB,AStrB,ALengthB,ACollation);
lastKeyIndexA := -1;
keyIndexA := -1;
lengthMaxA := 0;
keyIndexB := -1;
while True do begin
if not ComputeRawSortKeyNextItem(@ctxA) then
Break;
if (ctxA.ral = lengthMaxA) then
Continue;
lengthMaxA := ctxA.ral;
keyIndexA := lastKeyIndexA + 1;
while (keyIndexA < lengthMaxA) and
( (ctxA.r[keyIndexA].Weights[0] = 0) or
ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
)
do begin
Inc(keyIndexA);
end;
if (keyIndexA = lengthMaxA) then begin
lastKeyIndexA := keyIndexA-1;
Continue;
end;
while (keyIndexA < lengthMaxA) do begin
if (ctxA.r[keyIndexA].Weights[0] = 0) or
ctxA.Collation^.IsVariable(@ctxA.r[keyIndexA].Weights)
then begin
Inc(keyIndexA);
Continue;
end;
Inc(keyIndexB);
while (ctxB.ral <= keyIndexB) or
(ctxB.r[keyIndexB].Weights[0] = 0) or
ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
do begin
if (ctxB.ral <= keyIndexB) then begin
if not ComputeRawSortKeyNextItem(@ctxB) then
Break;
Continue;
end;
Inc(keyIndexB);
end;
if (ctxB.ral <= keyIndexB) then
exit(1);
if (ctxA.r[keyIndexA].Weights[0] > ctxB.r[keyIndexB].Weights[0]) then
exit(1);
if (ctxA.r[keyIndexA].Weights[0] < ctxB.r[keyIndexB].Weights[0]) then
exit(-1);
Inc(keyIndexA);
end;
lastKeyIndexA := keyIndexA - 1;
end;
//Key(A) is completed !
Inc(keyIndexB);
while (ctxB.ral <= keyIndexB) or
(ctxB.r[keyIndexB].Weights[0] = 0) or
ctxB.Collation^.IsVariable(@ctxB.r[keyIndexB].Weights)
do begin
if (ctxB.ral <= keyIndexB) then begin
if not ComputeRawSortKeyNextItem(@ctxB) then
Break;
Continue;
end;
Inc(keyIndexB);
end;
if (ctxB.ral > keyIndexB) then begin
//B has at least one more primary weight that A
exit(-1);
end;
while ComputeRawSortKeyNextItem(@ctxB) do begin
//
end;
//Key(B) is completed !
keyA := FormKey(ctxA.r,ctxA.Collation);
keyB := FormKey(ctxB.r,ctxB.Collation);
Result := CompareSortKey(keyA,keyB);
end;
function IncrementalCompareString(
const AStrA : PUnicodeChar;
const ALengthA : SizeInt;
const AStrB : PUnicodeChar;
const ALengthB : SizeInt;
const ACollation : PUCA_DataBook
) : Integer;
begin
case ACollation^.VariableWeight of
TUCA_VariableKind.ucaNonIgnorable :
begin
Result := IncrementalCompareString_NonIgnorable(
AStrA,ALengthA,AStrB,ALengthB,ACollation
);
end;
TUCA_VariableKind.ucaBlanked,
TUCA_VariableKind.ucaShiftedTrimmed,
TUCA_VariableKind.ucaIgnoreSP,
TUCA_VariableKind.ucaShifted:
begin
Result := IncrementalCompareString_Shift(
AStrA,ALengthA,AStrB,ALengthB,ACollation
);
end;
else
begin
Result := IncrementalCompareString_Shift(
AStrA,ALengthA,AStrB,ALengthB,ACollation
);
end;
end;
end;
function IncrementalCompareString(
const AStrA,
AStrB : UnicodeString;
const ACollation : PUCA_DataBook
) : Integer;
begin
Result := IncrementalCompareString(
Pointer(AStrA),Length(AStrA),Pointer(AStrB),Length(AStrB),
ACollation
);
end;
function ComputeRawSortKeyNextItem(
const AContext : PComputeKeyContext
) : Boolean;
var
ctx : PComputeKeyContext;
procedure GrowKey(const AMinGrow : Integer = 0);inline;
begin
if (ctx^.rl < AMinGrow) then
ctx^.rl := ctx^.rl + AMinGrow
else
ctx^.rl := 2 * ctx^.rl;
SetLength(ctx^.r,ctx^.rl);
end;
procedure SaveKeyOwner();
var
k : Integer;
kppLevel : Byte;
begin
k := 0;
kppLevel := High(Byte);
while (k <= ctx^.locHistoryTop) do begin
if (kppLevel <> ctx^.locHistory[k].ppLevel) then begin
ctx^.LastKeyOwner.Chars[k] := ctx^.locHistory[k].cp;
kppLevel := ctx^.locHistory[k].ppLevel;
end;
k := k + 1;
end;
if (k = 0) or (kppLevel <> ctx^.ppLevel) then begin
ctx^.LastKeyOwner.Chars[k] := ctx^.cp;
k := k + 1;
end;
ctx^.LastKeyOwner.Length := k;
end;
procedure AddWeights(AItem : PUCA_PropItemRec);inline;
begin
SaveKeyOwner();
if ((ctx^.ral + AItem^.WeightLength) > ctx^.rl) then
GrowKey(AItem^.WeightLength);
AItem^.GetWeightArray(@ctx^.r[ctx^.ral]);
ctx^.ral := ctx^.ral + AItem^.WeightLength;
end;
procedure AddContextWeights(AItem : PUCA_PropItemContextRec);inline;
begin
if ((ctx^.ral + AItem^.WeightCount) > ctx^.rl) then
GrowKey(AItem^.WeightCount);
Move(AItem^.GetWeights()^,ctx^.r[ctx^.ral],(AItem^.WeightCount*SizeOf(ctx^.r[0])));
ctx^.ral := ctx^.ral + AItem^.WeightCount;
end;
procedure AddComputedWeights(ACodePoint : Cardinal);inline;
begin
SaveKeyOwner();
if ((ctx^.ral + 2) > ctx^.rl) then
GrowKey();
DeriveWeight(ACodePoint,@ctx^.r[ctx^.ral]);
ctx^.ral := ctx^.ral + 2;
end;
procedure RecordDeletion();inline;
begin
if ctx^.pp^.IsValid() and ctx^.pp^.IsDeleted() (*pp^.GetWeightLength() = 0*) then begin
if (ctx^.suppressState.cl = nil) or
(ctx^.suppressState.CharCount > ctx^.ppLevel)
then begin
ctx^.suppressState.cl := ctx^.cl;
ctx^.suppressState.CharCount := ctx^.ppLevel;
end;
end;
end;
procedure RecordStep();inline;
begin
Inc(ctx^.locHistoryTop);
ctx^.locHistory[ctx^.locHistoryTop].i := ctx^.i;
ctx^.locHistory[ctx^.locHistoryTop].cl := ctx^.cl;
ctx^.locHistory[ctx^.locHistoryTop].pp := ctx^.pp;
ctx^.locHistory[ctx^.locHistoryTop].ppLevel := ctx^.ppLevel;
ctx^.locHistory[ctx^.locHistoryTop].cp := ctx^.cp;
ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength := ctx^.removedCharIndexLength;
RecordDeletion();
end;
procedure ClearHistory();inline;
begin
ctx^.locHistoryTop := -1;
end;
function HasHistory() : Boolean;inline;
begin
Result := (ctx^.locHistoryTop >= 0);
end;
function GetHistoryLength() : Integer;inline;
begin
Result := (ctx^.locHistoryTop + 1);
end;
procedure GoBack();inline;
begin
Assert(ctx^.locHistoryTop >= 0);
ctx^.i := ctx^.locHistory[ctx^.locHistoryTop].i;
ctx^.cp := ctx^.locHistory[ctx^.locHistoryTop].cp;
ctx^.cl := ctx^.locHistory[ctx^.locHistoryTop].cl;
ctx^.pp := ctx^.locHistory[ctx^.locHistoryTop].pp;
ctx^.ppLevel := ctx^.locHistory[ctx^.locHistoryTop].ppLevel;
ctx^.removedCharIndexLength := ctx^.locHistory[ctx^.locHistoryTop].removedCharIndexLength;
ctx^.ps := @ctx^.s[ctx^.i];
Dec(ctx^.locHistoryTop);
end;
function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
var
k : DWord;
pk : PUnicodeChar;
puk : PUC_Prop;
begin
k := AStartFrom;
if (k > ctx^.c) then
exit(False);
if (ctx^.removedCharIndexLength>0) and
(IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,k) >= 0)
then begin
exit(False);
end;
{if (k = (i+1)) or
( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
then
lastUnblockedNonstarterCCC := 0;}
pk := @ctx^.s[k];
if UnicodeIsHighSurrogate(pk^) then begin
if (k = ctx^.c) then
exit(False);
if UnicodeIsLowSurrogate(pk[1]) then
puk := GetProps(pk[0],pk[1])
else
puk := GetProps(Word(pk^));
end else begin
puk := GetProps(Word(pk^));
end;
if (puk^.CCC = 0) or (ctx^.lastUnblockedNonstarterCCC >= puk^.CCC) then
exit(False);
ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
Result := True;
end;
procedure RemoveChar(APos : Integer);inline;
begin
if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos;
Inc(ctx^.removedCharIndexLength);
if UnicodeIsHighSurrogate(ctx^.s[APos]) and (APos < ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[APos+1]) then begin
if (ctx^.removedCharIndexLength >= Length(ctx^.removedCharIndex)) then
SetLength(ctx^.removedCharIndex,(2*ctx^.removedCharIndexLength + 2));
ctx^.removedCharIndex[ctx^.removedCharIndexLength] := APos+1;
Inc(ctx^.removedCharIndexLength);
end;
end;
procedure Inc_I();inline;
begin
if (ctx^.removedCharIndexLength = 0) then begin
Inc(ctx^.i);
Inc(ctx^.ps);
exit;
end;
while True do begin
Inc(ctx^.i);
Inc(ctx^.ps);
if (IndexDWord(ctx^.removedCharIndex[0],ctx^.removedCharIndexLength,ctx^.i) = -1) then
Break;
end;
end;
function MoveToNextChar() : Boolean;inline;
begin
Result := True;
if UnicodeIsHighSurrogate(ctx^.ps[0]) then begin
if (ctx^.i = ctx^.c) then
exit(False);
if UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
ctx^.surrogateState := True;
ctx^.cp := ToUCS4(ctx^.ps[0],ctx^.ps[1]);
end else begin
ctx^.surrogateState := False;
ctx^.cp := Word(ctx^.ps[0]);
end;
end else begin
ctx^.surrogateState := False;
ctx^.cp := Word(ctx^.ps[0]);
end;
end;
function FindPropUCA() : Boolean;
var
candidateCL : PUCA_DataBook;
begin
ctx^.pp := nil;
if (ctx^.cl = nil) then
candidateCL := ctx^.Collation
else
candidateCL := ctx^.cl;
if ctx^.surrogateState then begin
while (candidateCL <> nil) do begin
ctx^.pp := GetPropUCA(ctx^.ps[0],ctx^.ps[1],candidateCL);
if (ctx^.pp <> nil) then
break;
candidateCL := candidateCL^.Base;
end;
end else begin
while (candidateCL <> nil) do begin
ctx^.pp := GetPropUCA(ctx^.ps[0],candidateCL);
if (ctx^.pp <> nil) then
break;
candidateCL := candidateCL^.Base;
end;
end;
ctx^.cl := candidateCL;
Result := (ctx^.pp <> nil);
end;
procedure AddWeightsAndClear();inline;
var
ctxNode : PUCA_PropItemContextTreeNodeRec;
begin
if (ctx^.pp^.WeightLength > 0) then begin
AddWeights(ctx^.pp);
end else
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
(ctxNode^.Data.WeightCount > 0)
then begin
AddContextWeights(@ctxNode^.Data);
end;
//AddWeights(pp);
ClearHistory();
ClearPP(ctx);
end;
function StartMatch() : Boolean;
procedure HandleLastChar();
var
ctxNode : PUCA_PropItemContextTreeNodeRec;
begin
while True do begin
if ctx^.pp^.IsValid() then begin
if (ctx^.pp^.WeightLength > 0) then
AddWeights(ctx^.pp)
else
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
(ctxNode^.Data.WeightCount > 0)
then
AddContextWeights(@ctxNode^.Data)
else
AddComputedWeights(ctx^.cp){handle deletion of code point};
break;
end;
if (ctx^.cl^.Base = nil) then begin
AddComputedWeights(ctx^.cp);
break;
end;
ctx^.cl := ctx^.cl^.Base;
if not FindPropUCA() then begin
AddComputedWeights(ctx^.cp);
break;
end;
end;
end;
var
tmpCtxNode : PUCA_PropItemContextTreeNodeRec;
begin
Result := False;
ctx^.ppLevel := 0;
if not FindPropUCA() then begin
AddComputedWeights(ctx^.cp);
ClearHistory();
ClearPP(ctx);
Result := True;
end else begin
if (ctx^.i = ctx^.c) then begin
HandleLastChar();
Result := True;
end else begin
if ctx^.pp^.IsValid()then begin
if (ctx^.pp^.ChildCount = 0) then begin
if (ctx^.pp^.WeightLength > 0) then
AddWeights(ctx^.pp)
else
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,tmpCtxNode) and
(tmpCtxNode^.Data.WeightCount > 0)
then
AddContextWeights(@tmpCtxNode^.Data)
else
AddComputedWeights(ctx^.cp){handle deletion of code point};
ClearPP(ctx);
ClearHistory();
Result := True;
end else begin
RecordStep();
end
end else begin
if (ctx^.pp^.ChildCount = 0) then begin
AddComputedWeights(ctx^.cp);
ClearPP(ctx);
ClearHistory();
Result := True;
end else begin
RecordStep();
end;
end;
end;
end;
end;
function TryPermutation() : Boolean;
var
kk : Integer;
b : Boolean;
puk : PUC_Prop;
ppk : PUCA_PropItemRec;
begin
Result := False;
puk := GetProps(ctx^.cp);
if (puk^.CCC = 0) then
exit;
ctx^.lastUnblockedNonstarterCCC := puk^.CCC;
if ctx^.surrogateState then
kk := ctx^.i + 2
else
kk := ctx^.i + 1;
while IsUnblockedNonstarter(kk) do begin
b := UnicodeIsHighSurrogate(ctx^.s[kk]) and (kk<ctx^.c) and UnicodeIsLowSurrogate(ctx^.s[kk+1]);
if b then
ppk := FindChild(ToUCS4(ctx^.s[kk],ctx^.s[kk+1]),ctx^.pp)
else
ppk := FindChild(Word(ctx^.s[kk]),ctx^.pp);
if (ppk <> nil) then begin
ctx^.pp := ppk;
RemoveChar(kk);
Inc(ctx^.ppLevel);
RecordStep();
Result := True;
if (ctx^.pp^.ChildCount = 0 ) then
Break;
end;
if b then
Inc(kk);
Inc(kk);
end;
end;
procedure AdvanceCharPos();inline;
begin
if UnicodeIsHighSurrogate(ctx^.ps[0]) and (ctx^.i<ctx^.c) and UnicodeIsLowSurrogate(ctx^.ps[1]) then begin
Inc(ctx^.i);
Inc(ctx^.ps);
end;
Inc_I();
end;
var
ok : Boolean;
pp1 : PUCA_PropItemRec;
cltemp : PUCA_DataBook;
ctxNode : PUCA_PropItemContextTreeNodeRec;
begin
if AContext^.Finished then
exit(False);
ctx := AContext;
while (ctx^.i <= ctx^.c) and MoveToNextChar() do begin
ok := False;
if (ctx^.pp = nil) then begin // Start Matching
ok := StartMatch();
end else begin
pp1 := FindChild(ctx^.cp,ctx^.pp);
if (pp1 <> nil) then begin
Inc(ctx^.ppLevel);
ctx^.pp := pp1;
if (ctx^.pp^.ChildCount = 0) or (ctx^.i = ctx^.c) then begin
ok := False;
if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
if (ctx^.pp^.WeightLength > 0) then begin
AddWeightsAndClear();
ok := True;
end else
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
(ctxNode^.Data.WeightCount > 0)
then begin
AddContextWeights(@ctxNode^.Data);
ClearHistory();
ClearPP(ctx);
ok := True;
end
end;
if not ok then begin
RecordDeletion();
while HasHistory() do begin
GoBack();
if ctx^.pp^.IsValid() and
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
)
then begin
AddWeightsAndClear();
ok := True;
Break;
end;
end;
if not ok then begin
cltemp := ctx^.cl^.Base;
if (cltemp <> nil) then begin
ClearPP(ctx,False);
ctx^.cl := cltemp;
Continue;
end;
end;
if not ok then begin
AddComputedWeights(ctx^.cp);
ClearHistory();
ClearPP(ctx);
ok := True;
end;
end;
end else begin
RecordStep();
end;
end else begin
// permutations !
ok := False;
if TryPermutation() and ctx^.pp^.IsValid() then begin
if (ctx^.suppressState.CharCount = 0) then begin
AddWeightsAndClear();
ok := True;
exit(True);// Continue;
end;
while True do begin
if ctx^.pp^.IsValid() and
(ctx^.pp^.WeightLength > 0) and
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
)
then begin
AddWeightsAndClear();
ok := True;
break;
end;
if not HasHistory() then
break;
GoBack();
if (ctx^.pp = nil) then
break;
end;
end;
if not ok then begin
if ctx^.pp^.IsValid() and (ctx^.suppressState.CharCount = 0) then begin
if (ctx^.pp^.WeightLength > 0) then begin
AddWeightsAndClear();
ok := True;
end else
if (ctx^.LastKeyOwner.Length > 0) and ctx^.pp^.Contextual and
ctx^.pp^.GetContext()^.Find(@ctx^.LastKeyOwner.Chars[0],ctx^.LastKeyOwner.Length,ctxNode) and
(ctxNode^.Data.WeightCount > 0)
then begin
AddContextWeights(@ctxNode^.Data);
ClearHistory();
ClearPP(ctx);
ok := True;
end
end;
if ok then
exit(True);// Continue;
end;
if not ok then begin
if (ctx^.cl^.Base <> nil) then begin
cltemp := ctx^.cl^.Base;
while HasHistory() do
GoBack();
ctx^.pp := nil;
ctx^.ppLevel := 0;
ctx^.cl := cltemp;
Continue;
end;
//walk back
ok := False;
while HasHistory() do begin
GoBack();
if ctx^.pp^.IsValid() and
(ctx^.pp^.WeightLength > 0) and
( (ctx^.suppressState.CharCount = 0) or
( ( (ctx^.cl = ctx^.suppressState.cl) and (ctx^.ppLevel <> ctx^.suppressState.CharCount) ) or
( (ctx^.cl <> ctx^.suppressState.cl) and (ctx^.ppLevel < ctx^.suppressState.CharCount) )
)
)
then begin
AddWeightsAndClear();
ok := True;
Break;
end;
end;
if ok then begin
AdvanceCharPos();
exit(True);// Continue;
end;
if (ctx^.pp <> nil) then begin
AddComputedWeights(ctx^.cp);
ClearHistory();
ClearPP(ctx);
ok := True;
end;
end;
end;
end;
if ctx^.surrogateState then begin
Inc(ctx^.ps);
Inc(ctx^.i);
end;
//
Inc_I();
if ok then
exit(True);
end;
SetLength(ctx^.r,ctx^.ral);
ctx^.Finished := True;
Result := True;
end;
function ComputeSortKey(
const AStr : PUnicodeChar;
const ALength : SizeInt;
@ -2388,14 +3271,7 @@ var
r : TUCA_PropWeightsArray;
begin
r := ComputeRawSortKey(AStr,ALength,ACollation);
case ACollation^.VariableWeight of
TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
else
Result := FormKeyShifted(r,ACollation);
end;
Result := FormKey(r,ACollation);
end;
end.

View File

@ -1,11 +1,11 @@
#
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/07/13]
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/03/08]
#
default: allexectests
MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android i8086-msdos
MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android
BSDs = freebsd netbsd openbsd darwin
UNIXs = linux $(BSDs) solaris qnx haiku aix
LIMIT83fs = go32v2 os2 emx watcom msdos
UNIXs = linux $(BSDs) solaris qnx haiku aix
LIMIT83fs = go32v2 os2 emx watcom
OSNeedsComspecToRunBatch = go32v2 watcom
FORCE:
.PHONY: FORCE
@ -265,7 +265,6 @@ endif
ifndef BINUTILSPREFIX
ifndef CROSSBINDIR
ifdef CROSSCOMPILE
ifneq ($(OS_TARGET),msdos)
ifndef DARWIN2DARWIN
ifneq ($(CPU_TARGET),jvm)
BINUTILSPREFIX=$(CPU_TARGET)-$(OS_TARGET)-
@ -284,9 +283,6 @@ endif
endif
endif
endif
else
BINUTILSPREFIX=$(OS_TARGET)-
endif
endif
endif
endif
@ -535,9 +531,6 @@ endif
ifeq ($(FULL_TARGET),jvm-android)
override TARGET_PROGRAMS+=gparmake
endif
ifeq ($(FULL_TARGET),i8086-msdos)
override TARGET_PROGRAMS+=gparmake
endif
override INSTALL_FPCPACKAGE=y
ifdef REQUIRE_UNITSDIR
override UNITSDIR+=$(REQUIRE_UNITSDIR)
@ -918,11 +911,6 @@ SHAREDLIBEXT=.jar
SHORTSUFFIX=android
endif
endif
ifeq ($(OS_TARGET),msdos)
STATICLIBPREFIX=
STATICLIBEXT=.lib
SHORTSUFFIX=d16
endif
ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
FPCMADE=fpcmade.$(SHORTSUFFIX)
ZIPSUFFIX=$(SHORTSUFFIX)
@ -1144,11 +1132,7 @@ AS=$(ASPROG)
LD=$(LDPROG)
RC=$(RCPROG)
AR=$(ARPROG)
ifdef inUnix
PPAS=./ppas$(SRCBATCHEXT)
else
PPAS=ppas$(SRCBATCHEXT)
endif
ifdef inUnix
LDCONFIG=ldconfig
else
@ -1385,9 +1369,6 @@ endif
ifeq ($(FULL_TARGET),jvm-android)
REQUIRE_PACKAGES_RTL=1
endif
ifeq ($(FULL_TARGET),i8086-msdos)
REQUIRE_PACKAGES_RTL=1
endif
ifdef REQUIRE_PACKAGES_RTL
PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
ifneq ($(PACKAGEDIR_RTL),)
@ -2050,7 +2031,7 @@ export LOG:=$(TEST_OUTPUTDIR)/log
endif
LOGFILES=$(TEST_OUTPUTDIR)/log $(TEST_OUTPUTDIR)/longlog $(TEST_OUTPUTDIR)/faillist
LOGEXT=.testlog .tbslog .tbflog .webtbslog .webtbflog
TESTSUBDIRS=cg cg/variants cg/cdecl cpu16 cpu16/i8086 library opt units/system units/dos units/crt units/objects units/strings units/sysutils units/math units/sharemem units/strutils units/matrix units/lineinfo units/ucomplex
TESTSUBDIRS=cg cg/variants cg/cdecl cpu16 cpu16/i8086 library opt units/system units/dos units/crt units/objects units/strings units/sysutils units/math units/sharemem units/strutils units/matrix units/lineinfo units/ucomplex units/fpwidestring
TESTPACKAGESUBDIRS=packages/win-base packages/webtbs packages/hash packages/fcl-registry packages/fcl-process packages/zlib packages/fcl-db packages/fcl-base packages/fcl-xml packages/cocoaint packages/bzip2
ifdef QUICKTEST
export QUICKTEST

View File

@ -154,7 +154,7 @@ LOGFILES=$(TEST_OUTPUTDIR)/log $(TEST_OUTPUTDIR)/longlog $(TEST_OUTPUTDIR)/faill
LOGEXT=.testlog .tbslog .tbflog .webtbslog .webtbflog
# Subdirs available in the test subdir
TESTSUBDIRS=cg cg/variants cg/cdecl cpu16 cpu16/i8086 library opt units/system units/dos units/crt units/objects units/strings units/sysutils units/math units/sharemem units/strutils units/matrix units/lineinfo units/ucomplex
TESTSUBDIRS=cg cg/variants cg/cdecl cpu16 cpu16/i8086 library opt units/system units/dos units/crt units/objects units/strings units/sysutils units/math units/sharemem units/strutils units/matrix units/lineinfo units/ucomplex units/fpwidestring
TESTPACKAGESUBDIRS=packages/win-base packages/webtbs packages/hash packages/fcl-registry packages/fcl-process packages/zlib packages/fcl-db packages/fcl-base packages/fcl-xml packages/cocoaint packages/bzip2
ifdef QUICKTEST

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,243 @@
{ %FILES=CollationTest_NON_IGNORABLE_SHORT.txt CollationTest_SHIFTED_SHORT.txt}
program tuca1;
{ Test the Unicode Collation Algorithm (UCA) implementation
This test uses the UCA test files :
* CollationTest_NON_IGNORABLE_SHORT.txt
* CollationTest_SHIFTED_SHORT.txt
These files are in the zip archive at
http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip
}
{$mode objfpc}{$H+}
uses
SysUtils, Classes, Math,{$ifdef WINCE}StreamIO,{$endif}
unicodedata, unicodeducet;
var
TotalErrorCount : Integer = 0;
procedure DumpString(const AValue : UnicodeString);
var
k, c : Integer;
cp : Cardinal;
begin
c := Length(AValue);
k := 1;
while (k <= c) do begin
if (k = c) or not(UnicodeIsHighSurrogate(AValue[k])) or not(UnicodeIsLowSurrogate(AValue[k+1])) then
cp := Word(AValue[k])
else begin
cp := ToUCS4(AValue[k],AValue[k+1]);
Inc(k);
end;
Write(IntToHex(cp,4), ' ');
Inc(k);
end;
end;
procedure DumpKey(const AValue : TUCASortKey);
var
k, c : Integer;
begin
c := Length(AValue);
for k := 0 to c-1 do begin
Write(IntToHex(AValue[k],4),'|');
end;
end;
procedure CheckContent(ADataAStream : TMemoryStream; ACollation : PUCA_DataBook);
const LINE_LENGTH = 1024;
var
p : PAnsiChar;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
lineCount : Integer;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos) + 1;
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
if Result then
Inc(lineCount);
end;
procedure SkipSpace();
begin
while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
Inc(linePos);
end;
function NextToken() : ansistring;
const C_SEPARATORS = [';','#','.','[',']','*','@'];
var
k : Integer;
begin
SkipSpace();
k := linePos;
if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
Result := line[linePos];
Inc(linePos);
exit;
end;
while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
Inc(linePos);
if (linePos > k) then begin
if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
function ParseLine() : UnicodeString;
var
locCP : Cardinal;
s : ansistring;
k : Integer;
begin
SetLength(Result,24);
k := 0;
while True do begin
s := NextToken();
if (s = '') or (s[1] = '#') then
Break;
Inc(k);
if (k >= Length(Result)) then
SetLength(Result,(2*k));
locCP := StrToInt('$' + s);
if (locCP <= High(Word)) then
Word(Result[k]) := locCP
else begin
FromUCS4(locCP,PUnicodeChar(@Result[k])^,PUnicodeChar(@Result[k+1])^);
Inc(k);
end;
end;
SetLength(Result,k);
end;
procedure Prepare();
begin
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
var
a, b : UnicodeString;
ka, kb : TUCASortKey;
errorCount : Integer;
begin
errorCount := 0;
lineCount := 0;
SetLength(a,3);
FromUCS4($11143,PUnicodeChar(@a[1])^,PUnicodeChar(@a[2])^);
Word(a[3]):= $0334;
//Word(a[2]):= $0021;
{$define stop_on_error}
ka := ComputeSortKey(a,ACollation);
Prepare();
while NextLine() do begin
a := ParseLine();
if (a <> '') then
Break;
end;
ka := ComputeSortKey(a,ACollation);
while NextLine() do begin
b := ParseLine();
if (b = '') then
Break;
kb := ComputeSortKey(b,ACollation);
if (CompareSortKey(kb,ka) < 0) then begin
Inc(errorCount);
{$ifdef stop_on_error}
Inc(TotalErrorCount,errorCount);
WriteLn('Error #',errorCount, '; Line #',lineCount);
Write(' s1 = ');DumpString(a);Write(' ');DumpKey(ka); WriteLn();
Write(' s2 = ');DumpString(b);Write(' ');DumpKey(ComputeSortKey(b,ACollation)); WriteLn();
//Write(' s2 = ');DumpString(b);Write(' ');DumpKey(kb); WriteLn();
Exit;
{$endif stop_on_error}
end;
a := b;
ka := kb;
end;
WriteLn('Line Count = ',lineCount);
WriteLn('Error Count = ',errorCount);
Inc(TotalErrorCount,errorCount);
end;
var
stream : TMemoryStream;
collation : PUCA_DataBook;
{$ifdef WINCE}
fs : TFileStream;
s : string;
{$endif WINCE}
begin
{$ifdef WINCE}
s := ExtractFilePath(ParamStr(0))+'tuca1-log.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(Output,fs);
Rewrite(Output);
s := ExtractFilePath(ParamStr(0))+'tuca1-err.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(ErrOutput,fs);
Rewrite(ErrOutput);
{$endif WINCE}
collation := FindCollation('DUCET');
stream := TMemoryStream.Create();
try
collation^.VariableWeight := TUCA_VariableKind.ucaNonIgnorable;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_NON_IGNORABLE_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_NON_IGNORABLE_SHORT.txt ...');
CheckContent(stream,collation);
WriteLn();WriteLn();
collation^.VariableWeight := TUCA_VariableKind.ucaShifted;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_SHIFTED_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_SHIFTED_SHORT.txt ...');
CheckContent(stream,collation);
finally
stream.Free();
end;
if (TotalErrorCount > 0) then begin
WriteLn('Failed.');
Halt(1);
end;
end.

View File

@ -0,0 +1,238 @@
{ %FILES=CollationTest_NON_IGNORABLE_SHORT.txt CollationTest_SHIFTED_SHORT.txt}
program tuca2;
{ Test the Unicode Collation Algorithm (UCA) incremental implementation
This test uses the UCA test files :
* CollationTest_NON_IGNORABLE_SHORT.txt
* CollationTest_SHIFTED_SHORT.txt
These files are in the zip archive at
http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip
}
{$mode objfpc}{$H+}
uses
SysUtils, Classes, Math,{$ifdef WINCE}StreamIO,{$endif}
unicodedata, unicodeducet;
var
TotalErrorCount : Integer;
procedure DumpString(const AValue : UnicodeString);
var
k, c : Integer;
cp : Cardinal;
begin
c := Length(AValue);
k := 1;
while (k <= c) do begin
if (k = c) or not(UnicodeIsHighSurrogate(AValue[k])) or not(UnicodeIsLowSurrogate(AValue[k+1])) then
cp := Word(AValue[k])
else begin
cp := ToUCS4(AValue[k],AValue[k+1]);
Inc(k);
end;
Write(IntToHex(cp,4), ' ');
Inc(k);
end;
end;
procedure DumpKey(const AValue : TUCASortKey);
var
k, c : Integer;
begin
c := Length(AValue);
for k := 0 to c-1 do begin
Write(IntToHex(AValue[k],4),'|');
end;
end;
procedure CheckContent(ADataAStream : TMemoryStream; ACollation : PUCA_DataBook);
const LINE_LENGTH = 1024;
var
p : PAnsiChar;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
lineCount : Integer;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos) + 1;
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
if Result then
Inc(lineCount);
end;
procedure SkipSpace();
begin
while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
Inc(linePos);
end;
function NextToken() : ansistring;
const C_SEPARATORS = [';','#','.','[',']','*','@'];
var
k : Integer;
begin
SkipSpace();
k := linePos;
if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
Result := line[linePos];
Inc(linePos);
exit;
end;
while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
Inc(linePos);
if (linePos > k) then begin
if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
function ParseLine() : UnicodeString;
var
locCP : Cardinal;
s : ansistring;
k : Integer;
begin
SetLength(Result,24);
k := 0;
while True do begin
s := NextToken();
if (s = '') or (s[1] = '#') then
Break;
Inc(k);
if (k >= Length(Result)) then
SetLength(Result,(2*k));
locCP := StrToInt('$' + s);
if (locCP <= High(Word)) then
Word(Result[k]) := locCP
else begin
FromUCS4(locCP,PUnicodeChar(@Result[k])^,PUnicodeChar(@Result[k+1])^);
Inc(k);
end;
end;
SetLength(Result,k);
end;
procedure Prepare();
begin
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
var
a, b : UnicodeString;
errorCount : Integer;
begin
errorCount := 0;
lineCount := 0;
SetLength(a,3);
FromUCS4($11143,PUnicodeChar(@a[1])^,PUnicodeChar(@a[2])^);
Word(a[3]):= $0334;
//Word(a[2]):= $0021;
{ $define stop_on_error}
Prepare();
while NextLine() do begin
a := ParseLine();
if (a <> '') then
Break;
end;
while NextLine() do begin
b := ParseLine();
if (b = '') then
Break;
//if (CompareSortKey(kb,ka) < 0) then begin
if (IncrementalCompareString(b,a,ACollation) < 0) then begin
Inc(errorCount);
{$ifdef stop_on_error}
Inc(TotalErrorCount,errorCount);
//IncrementalCompareString(b,a,ACollation);
WriteLn('Error #',errorCount, '; Line #',lineCount);
Write(' s1 = ');DumpString(a);Write(' ');DumpKey(ComputeSortKey(a,ACollation)); WriteLn();
Write(' s2 = ');DumpString(b);Write(' ');DumpKey(ComputeSortKey(b,ACollation)); WriteLn();
Exit;
{$endif stop_on_error}
end;
a := b;
end;
WriteLn('Line Count = ',lineCount);
WriteLn('Error Count = ',errorCount);
Inc(TotalErrorCount,errorCount);
end;
var
stream : TMemoryStream;
collation : PUCA_DataBook;
{$ifdef WINCE}
fs : TFileStream;
s : string;
{$endif WINCE}
begin
{$ifdef WINCE}
s := ExtractFilePath(ParamStr(0))+'tuca1-log.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(Output,fs);
Rewrite(Output);
s := ExtractFilePath(ParamStr(0))+'tuca1-err.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(ErrOutput,fs);
Rewrite(ErrOutput);
{$endif WINCE}
collation := FindCollation('DUCET');
stream := TMemoryStream.Create();
try
collation^.VariableWeight := TUCA_VariableKind.ucaNonIgnorable;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_NON_IGNORABLE_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_NON_IGNORABLE_SHORT.txt ...');
CheckContent(stream,collation);
WriteLn();WriteLn();
collation^.VariableWeight := TUCA_VariableKind.ucaShifted;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_SHIFTED_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_SHIFTED_SHORT.txt ...');
CheckContent(stream,collation);
finally
stream.Free();
end;
if (TotalErrorCount > 0) then begin
WriteLn('Failed.');
Halt(1);
end;
end.

View File

@ -0,0 +1,240 @@
{ %FILES=CollationTest_NON_IGNORABLE_SHORT.txt CollationTest_SHIFTED_SHORT.txt}
program tucawsm;
{ Test the Unicode Collation Algorithm (UCA) incremental implementation
This test uses the UCA test files :
* CollationTest_NON_IGNORABLE_SHORT.txt
* CollationTest_SHIFTED_SHORT.txt
These files are in the zip archive at
http://www.unicode.org/Public/UCA/6.2.0/CollationAuxiliary.zip
}
{$mode objfpc}{$H+}
uses
SysUtils, Classes, Math,{$ifdef WINCE}StreamIO,{$endif}
unicodedata, unicodeducet, fpwidestring;
var
TotalErrorCount : Integer;
procedure DumpString(const AValue : UnicodeString);
var
k, c : Integer;
cp : Cardinal;
begin
c := Length(AValue);
k := 1;
while (k <= c) do begin
if (k = c) or not(UnicodeIsHighSurrogate(AValue[k])) or not(UnicodeIsLowSurrogate(AValue[k+1])) then
cp := Word(AValue[k])
else begin
cp := ToUCS4(AValue[k],AValue[k+1]);
Inc(k);
end;
Write(IntToHex(cp,4), ' ');
Inc(k);
end;
end;
procedure DumpKey(const AValue : TUCASortKey);
var
k, c : Integer;
begin
c := Length(AValue);
for k := 0 to c-1 do begin
Write(IntToHex(AValue[k],4),'|');
end;
end;
procedure CheckContent(ADataAStream : TMemoryStream);
const LINE_LENGTH = 1024;
var
p : PAnsiChar;
bufferLength, bufferPos, lineLength, linePos : Integer;
line : ansistring;
lineCount : Integer;
function NextLine() : Boolean;
var
locOldPos : Integer;
locOldPointer : PAnsiChar;
begin
Result := False;
locOldPointer := p;
locOldPos := bufferPos;
while (bufferPos < bufferLength) and (p^ <> #10) do begin
Inc(p);
Inc(bufferPos);
end;
if (locOldPos = bufferPos) and (p^ = #10) then begin
lineLength := 0;
Inc(p);
Inc(bufferPos);
linePos := 1;
Result := True;
end else if (locOldPos < bufferPos) then begin
lineLength := (bufferPos - locOldPos) + 1;
Move(locOldPointer^,line[1],lineLength);
if (p^ = #10) then begin
Dec(lineLength);
Inc(p);
Inc(bufferPos);
end;
linePos := 1;
Result := True;
end;
if Result then
Inc(lineCount);
end;
procedure SkipSpace();
begin
while (linePos < lineLength) and (line[linePos] in [' ',#9]) do
Inc(linePos);
end;
function NextToken() : ansistring;
const C_SEPARATORS = [';','#','.','[',']','*','@'];
var
k : Integer;
begin
SkipSpace();
k := linePos;
if (linePos <= lineLength) and (line[linePos] in C_SEPARATORS) then begin
Result := line[linePos];
Inc(linePos);
exit;
end;
while (linePos <= lineLength) and not(line[linePos] in (C_SEPARATORS+[' '])) do
Inc(linePos);
if (linePos > k) then begin
if (line[Min(linePos,lineLength)] in C_SEPARATORS) then
Result := Copy(line,k,(linePos-k))
else
Result := Copy(line,k,(linePos-k+1));
Result := Trim(Result);
end else begin
Result := '';
end;
end;
function ParseLine() : UnicodeString;
var
locCP : Cardinal;
s : ansistring;
k : Integer;
begin
SetLength(Result,24);
k := 0;
while True do begin
s := NextToken();
if (s = '') or (s[1] = '#') then
Break;
Inc(k);
if (k >= Length(Result)) then
SetLength(Result,(2*k));
locCP := StrToInt('$' + s);
if (locCP <= High(Word)) then
Word(Result[k]) := locCP
else begin
FromUCS4(locCP,PUnicodeChar(@Result[k])^,PUnicodeChar(@Result[k+1])^);
Inc(k);
end;
end;
SetLength(Result,k);
end;
procedure Prepare();
begin
bufferLength := ADataAStream.Size;
bufferPos := 0;
p := ADataAStream.Memory;
lineLength := 0;
SetLength(line,LINE_LENGTH);
end;
var
a, b : UnicodeString;
errorCount : Integer;
begin
errorCount := 0;
lineCount := 0;
SetLength(a,3);
FromUCS4($11143,PUnicodeChar(@a[1])^,PUnicodeChar(@a[2])^);
Word(a[3]):= $0334;
//Word(a[2]):= $0021;
{ $define stop_on_error}
Prepare();
while NextLine() do begin
a := ParseLine();
if (a <> '') then
Break;
end;
while NextLine() do begin
b := ParseLine();
if (b = '') then
Break;
//if (CompareSortKey(kb,ka) < 0) then begin
if (UnicodeCompareStr(b,a) < 0) then begin
Inc(errorCount);
{$ifdef stop_on_error}
Inc(TotalErrorCount,errorCount);
//IncrementalCompareString(b,a,ACollation);
WriteLn('Error #',errorCount, '; Line #',lineCount);
Write(' s1 = ');DumpString(a);Write(' ');DumpKey(ComputeSortKey(a,ACollation)); WriteLn();
Write(' s2 = ');DumpString(b);Write(' ');DumpKey(ComputeSortKey(b,ACollation)); WriteLn();
Exit;
{$endif stop_on_error}
end;
a := b;
end;
WriteLn('Line Count = ',lineCount);
WriteLn('Error Count = ',errorCount);
Inc(TotalErrorCount,errorCount);
end;
var
stream : TMemoryStream;
collation : PUCA_DataBook;
{$ifdef WINCE}
fs : TFileStream;
s : string;
{$endif WINCE}
begin
{$ifdef WINCE}
s := ExtractFilePath(ParamStr(0))+'tuca1-log.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(Output,fs);
Rewrite(Output);
s := ExtractFilePath(ParamStr(0))+'tuca1-err.txt';
DeleteFile(s);
fs := TFileStream.Create(s,fmCreate);
AssignStream(ErrOutput,fs);
Rewrite(ErrOutput);
{$endif WINCE}
DefaultCollationName := 'DUCET';
collation := FindCollation('DUCET');
SetActiveCollation(collation);
stream := TMemoryStream.Create();
try
collation^.VariableWeight := TUCA_VariableKind.ucaNonIgnorable;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_NON_IGNORABLE_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_NON_IGNORABLE_SHORT.txt ...');
CheckContent(stream);
WriteLn();WriteLn();
collation^.VariableWeight := TUCA_VariableKind.ucaShifted;
stream.LoadFromFile(ExtractFilePath(ParamStr(0)) + 'CollationTest_SHIFTED_SHORT.txt');
stream.Position := 0;
WriteLn('Testing CollationTest_SHIFTED_SHORT.txt ...');
CheckContent(stream);
finally
stream.Free();
end;
if (TotalErrorCount > 0) then begin
WriteLn('Failed.');
Halt(1);
end;
end.