rtl: a patch to cleanup a fixes to key forming functions from Inoussa (mantis #0022909)

git-svn-id: trunk@23767 -
This commit is contained in:
paul 2013-03-10 06:27:03 +00:00
parent 5e46732bc8
commit 02e0269764

View File

@ -25,7 +25,7 @@ unit unicodedata;
{$PACKENUM 1}
{$SCOPEDENUMS ON}
{$pointermath on}
{ $modeswitch advancedrecords}
{$define USE_INLINE}
{ $define uni_debug}
interface
@ -1379,9 +1379,9 @@ begin
c := Length(ACEList);
if (c = 0) then
exit(nil);
SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
ral := 0;
levelCount := Length(ACEList[0].Weights);
SetLength(r,(levelCount*c + levelCount));
ral := 0;
for i := 0 to levelCount - 1 do begin
if not ACollation^.Backwards[i] then begin
pce := @ACEList[0];
@ -1419,9 +1419,9 @@ begin
c := Length(ACEList);
if (c = 0) then
exit(nil);
SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
ral := 0;
levelCount := Length(ACEList[0].Weights);
SetLength(r,(levelCount*c + levelCount));
ral := 0;
for i := 0 to levelCount - 1 do begin
if not ACollation^.Backwards[i] then begin
pce := @ACEList[0];
@ -1460,9 +1460,9 @@ begin
c := Length(ACEList);
if (c = 0) then
exit(nil);
SetLength(r,((3+1{Level Separator})*c)); //SetLength(r,(3*c));
ral := 0;
levelCount := Length(ACEList[0].Weights);
SetLength(r,(levelCount*c + levelCount));
ral := 0;
for i := 0 to levelCount - 1 do begin
if not ACollation^.Backwards[i] then begin
variableState := False;
@ -1500,10 +1500,8 @@ begin
ral := ral + 1;
end;
ral := ral - 1;
//SetLength(r,ral);
//Result := r;
SetLength(Result,ral);
Move(r[0],Result[0],(ral*SizeOf(r[0])));
SetLength(r,ral);
Result := r;
end;
function FormKeyShiftedTrimmed(
@ -1553,314 +1551,6 @@ begin
Result := ComputeSortKey(@AString[1],Length(AString),ACollation);
end;
function ComputeSortKeyOLD(
const AStr : PUnicodeChar;
const ALength : SizeInt;
const ACollation : PUCA_DataBook
) : TUCASortKey;
var
r : TUCA_PropWeightsArray;
ral {used length of "r"}: Integer;
rl {capacity of "r"} : Integer;
procedure GrowKey(const AMinGrow : Integer = 0);inline;
begin
if (rl < AMinGrow) then
rl := rl + AMinGrow
else
rl := 2 * rl;
SetLength(r,rl);
end;
procedure AddWeights(AItem : PUCA_PropItemRec);inline;
begin
if ((ral + AItem^.WeightLength) > rl) then
GrowKey(AItem^.WeightLength);
AItem^.GetWeightArray(@r[ral]);
ral := ral + AItem^.WeightLength;
end;
procedure AddComputedWeights(ACodePoint : Cardinal);inline;
begin
if ((ral + 2) > rl) then
GrowKey();
DeriveWeight(ACodePoint,@r[ral]);
ral := ral + 2;
end;
var
i : Integer;
s : UnicodeString;
ps : PUnicodeChar;
cp : Cardinal;
pp : PUCA_PropItemRec;
ppLevel : Byte;
removedCharIndex : array of DWord;
removedCharIndexLength : DWord;
locHistory : array[0..24] of record
i : Integer;
pp : PUCA_PropItemRec;
ppLevel : Byte;
cp : Cardinal;
removedCharIndexLength : DWord;
end;
locHistoryTop : Integer;
procedure RecordStep();inline;
begin
Inc(locHistoryTop);
locHistory[locHistoryTop].i := i;
locHistory[locHistoryTop].pp := pp;
locHistory[locHistoryTop].ppLevel := ppLevel;
locHistory[locHistoryTop].cp := cp;
locHistory[locHistoryTop].removedCharIndexLength := removedCharIndexLength;
end;
procedure ClearHistory();inline;
begin
locHistoryTop := -1;
end;
function HasHistory() : Boolean;inline;
begin
Result := (locHistoryTop >= 0);
end;
procedure GoBack();inline;
begin
i := locHistory[locHistoryTop].i;
cp := locHistory[locHistoryTop].cp;
pp := locHistory[locHistoryTop].pp;
ppLevel := locHistory[locHistoryTop].ppLevel;
removedCharIndexLength := locHistory[locHistoryTop].removedCharIndexLength;
ps := @s[i];
Dec(locHistoryTop);
end;
var
c : Integer;
lastUnblockedNonstarterCCC : Byte;
function IsUnblockedNonstarter(const AStartFrom : Integer) : Boolean;
var
k : DWord;
pk : PUnicodeChar;
puk : PUC_Prop;
begin
k := AStartFrom;
if (k > c) then
exit(False);
if (IndexDWord(removedCharIndex[0],removedCharIndexLength,k) >= 0) then
exit(False);
{if (k = (i+1)) or
( (k = (i+2)) and UnicodeIsHighSurrogate(s[i]) )
then
lastUnblockedNonstarterCCC := 0;}
pk := @s[k];
if UnicodeIsHighSurrogate(pk^) then begin
if (k = c) then
exit(False);
if UnicodeIsLowSurrogate(pk[1]) then
puk := GetProps(pk[0],pk[1])
else
puk := GetProps(Word(pk^));
end else begin
puk := GetProps(Word(pk^));
end;
if (puk^.CCC = 0) or (lastUnblockedNonstarterCCC >= puk^.CCC) then
exit(False);
lastUnblockedNonstarterCCC := puk^.CCC;
Result := True;
end;
procedure RemoveChar(APos : Integer);inline;
begin
if (removedCharIndexLength >= Length(removedCharIndex)) then
SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
removedCharIndex[removedCharIndexLength] := APos;
Inc(removedCharIndexLength);
if UnicodeIsHighSurrogate(s[APos]) and (APos < c) and UnicodeIsLowSurrogate(s[APos+1]) then begin
if (removedCharIndexLength >= Length(removedCharIndex)) then
SetLength(removedCharIndex,(2*removedCharIndexLength + 2));
removedCharIndex[removedCharIndexLength] := APos+1;
Inc(removedCharIndexLength);
end;
end;
procedure Inc_I();
begin
if (removedCharIndexLength = 0) then begin
Inc(i);
Inc(ps);
exit;
end;
while True do begin
Inc(i);
Inc(ps);
if (IndexDWord(removedCharIndex[0],removedCharIndexLength,i) = -1) then
Break;
end;
end;
var
k : Integer;
pp1 : PUCA_PropItemRec;
locIsSurrogate, ok : Boolean;
pu : PUC_Prop;
begin
if (ALength = 0) then
exit(nil);
c := ALength;
s := NormalizeNFD(AStr,c);
c := Length(s);
rl := 3*c;
SetLength(r,rl);
ral := 0;
ps := @s[1];
pp := nil;
ppLevel := 0;
locHistoryTop := -1;
removedCharIndexLength := 0;
i := 1;
while (i <= c) do begin
if UnicodeIsHighSurrogate(ps[0]) then begin
if (i = c) then
Break;
if UnicodeIsLowSurrogate(ps[1]) then begin
locIsSurrogate := True;
cp := ToUCS4(ps[0],ps[1]);
end else begin
locIsSurrogate := False;
cp := Word(ps[0]);
end;
end else begin
locIsSurrogate := False;
cp := Word(ps[0]);
end;
if (pp = nil) then begin // Start Matching
ppLevel := 0;
if locIsSurrogate then
pp := GetPropUCA(ps[0],ps[1],ACollation)
else
pp := GetPropUCA(ps[0],ACollation);
if (pp = nil) then begin
AddComputedWeights(cp);
ClearHistory();
end else begin
if (pp^.ChildCount = 0) or
(pp^.IsValid() and (i = c))
then begin
AddWeights(pp);
ClearHistory();
pp := nil;
end else begin
RecordStep();
end;
end;
end else begin
ok := False;
pp1 := PUCA_PropItemRec(PtrUInt(pp) + pp^.GetSelfOnlySize());
for k := 0 to pp^.ChildCount - 1 do begin
if (cp = pp1^.CodePoint) then begin
ok := True;
Break;
end;
pp1 := PUCA_PropItemRec(PtrUInt(pp1) + pp1^.Size);
end;
if not ok then begin
// permutations !
pu := GetProps(cp);
if (pu^.CCC > 0) then begin
lastUnblockedNonstarterCCC := pu^.CCC;
if locIsSurrogate then
k := i + 2
else
k := i + 1;
while IsUnblockedNonstarter(k) do begin
ok := UnicodeIsHighSurrogate(s[k]) and (k<c) and UnicodeIsLowSurrogate(s[k+1]);
if ok then
pp1 := FindChild(ToUCS4(s[k],s[k+1]),pp)
else
pp1 := FindChild(Word(s[k]),pp);
if (pp1 <> nil) then begin
pp := pp1;
RemoveChar(k);
Inc(ppLevel);
RecordStep();
if (pp^.ChildCount = 0 ) then
Break;
end;
if ok then
Inc(k);
Inc(k);
end;
end;
if pp^.IsValid() then begin
AddWeights(pp);
//GoBack();
ClearHistory();
pp := nil;
ppLevel := 0;
Continue;
end else begin
//walk back
ok := False;
while HasHistory() do begin
GoBack();
if pp^.IsValid() then begin
AddWeights(pp);
ClearHistory();
pp := nil;
ppLevel := 0;
ok := True;
Break;
end;
end;
if ok then begin
if UnicodeIsHighSurrogate(ps[0]) and (i<c) and UnicodeIsLowSurrogate(ps[1]) then begin
Inc(i);
Inc(ps);
end;
Inc_I();
Continue;
end;
if (pp <> nil) then
AddComputedWeights(cp);
end;
end else begin
pp := pp1;
if (pp^.ChildCount = 0) then begin
AddWeights(pp);
ClearHistory();
pp := nil;
ppLevel := 0;
end else begin
Inc(ppLevel);
RecordStep();
end;
end;
end;
if locIsSurrogate then begin
Inc(ps);
Inc(i);
end;
//
Inc_I();
end;
SetLength(r,ral);
case ACollation^.VariableWeight of
TUCA_VariableKind.ucaShifted : Result := FormKeyShifted(r,ACollation);
TUCA_VariableKind.ucaBlanked : Result := FormKeyBlanked(r,ACollation);
TUCA_VariableKind.ucaNonIgnorable : Result := FormKeyNonIgnorable(r,ACollation);
TUCA_VariableKind.ucaShiftedTrimmed : Result := FormKeyShiftedTrimmed(r,ACollation);
else
Result := FormKeyShifted(r,ACollation);
end;
end;
//--------------------------------------------------------------------------
function ComputeRawSortKey(
const AStr : PUnicodeChar;
const ALength : SizeInt;