mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-16 06:09:29 +02:00
Kills martin 2, adds more commenting to martin 1, removes the table usage from martin 1
git-svn-id: trunk@32834 -
This commit is contained in:
parent
3208d981ee
commit
2371688c04
@ -68,7 +68,6 @@ function UnicodeLowercase(u: cardinal): cardinal;
|
||||
function UTF8LowerCaseMattias(const s: utf8string): utf8string;
|
||||
function UTF8LowerCaseMartin(const AInStr: utf8string): utf8string;
|
||||
function UTF8LowerCaseMartin(const AInStr, ALocale: utf8string): utf8string;
|
||||
function UTF8LowerCaseMartin2(const AInStr: utf8string; ALocale: utf8string=''): utf8string;
|
||||
{$endif}
|
||||
function UTF8LowerCase(const AInStr: utf8string): utf8string;
|
||||
function UTF8LowerCase(const AInStr, ALocale: utf8string): utf8string;
|
||||
@ -1174,20 +1173,6 @@ begin
|
||||
Result := UTF8LowerCaseMartin(AInStr, '');
|
||||
end;
|
||||
|
||||
var
|
||||
UTF8LowerCaseMartinTable: array [char] of integer;
|
||||
|
||||
procedure InitUTF8LowerCaseMartinTable;
|
||||
var
|
||||
c: Char;
|
||||
begin
|
||||
for c := #0 to #255 do begin
|
||||
UTF8LowerCaseMartinTable[c] := 0;
|
||||
if c in ['A'..'Z'] then UTF8LowerCaseMartinTable[c] := 1;
|
||||
if c in [#$C3, #$C4, #$C5..#$C8, #$CE, #$D0] then UTF8LowerCaseMartinTable[c] := 2;
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8LowerCaseMartin(const AInStr, ALocale: utf8string): utf8string;
|
||||
const
|
||||
ResultSizeIncr = 10;
|
||||
@ -1212,10 +1197,17 @@ begin
|
||||
InStr := PChar(AInStr);
|
||||
InStrEnd := InStr + length(AInStr); // points behind last char
|
||||
|
||||
while (InStr < InStrEnd) and (UTF8LowerCaseMartinTable[InStr^] = 0)
|
||||
do begin
|
||||
// Does a fast initial parsing of the string to maybe avoid doing
|
||||
// UniqueString if the resulting string will be identical
|
||||
while (InStr < InStrEnd) do
|
||||
begin
|
||||
c := InStr^;
|
||||
case c of
|
||||
'A'..'Z',#$C3, #$C4, #$C5..#$C8, #$CE, #$D0: Break;
|
||||
// already lower, or otherwhise not affected
|
||||
inc(InStr);
|
||||
else
|
||||
inc(InStr);
|
||||
end;
|
||||
end;
|
||||
|
||||
if InStr >= InStrEnd then
|
||||
@ -1605,273 +1597,6 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8LowerCaseMartin2(const AInStr: utf8string; ALocale: utf8string=''): utf8string;
|
||||
var
|
||||
i, CounterDiff: PtrInt;
|
||||
InStr, InStrEnd, OutStr: PChar;
|
||||
// Language identification
|
||||
IsTurkish: Boolean;
|
||||
c: Char;
|
||||
|
||||
begin
|
||||
Result:=AInStr;
|
||||
InStr := PChar(AInStr);
|
||||
InStrEnd := InStr + length(AInStr); // points behind last char
|
||||
|
||||
|
||||
// TODO: can be extended for some unicode chars too
|
||||
// use a pre-initialized (global): array [char] of boolean;
|
||||
while (InStr < InStrEnd) and (UTF8LowerCaseMartinTable[InStr^] = 0)
|
||||
do begin
|
||||
// already lower, or otherwhise not affected
|
||||
inc(InStr);
|
||||
end;
|
||||
|
||||
if InStr >= InStrEnd then
|
||||
exit;
|
||||
|
||||
// Language identification
|
||||
IsTurkish := ALocale = 'tu';
|
||||
|
||||
UniqueString(Result);
|
||||
OutStr := PChar(Result) + (InStr - PChar(AInStr));
|
||||
CounterDiff := 0;
|
||||
|
||||
while InStr < InStrEnd do
|
||||
begin
|
||||
c := InStr^;
|
||||
case c of // if NOT TABLE
|
||||
'A'..'Z':
|
||||
begin
|
||||
{ First ASCII chars }
|
||||
// Special turkish handling
|
||||
// capital undotted I to small undotted i
|
||||
if IsTurkish and (c = 'I') then
|
||||
begin
|
||||
OutStr := PChar(OutStr - PChar(Result));
|
||||
SetLength(Result,Length(Result)+1);// Increase the buffer
|
||||
OutStr := PtrInt(OutStr) + PChar(Result);
|
||||
OutStr^ := #$C4;
|
||||
inc(OutStr);
|
||||
OutStr^ := #$B1;
|
||||
dec(CounterDiff);
|
||||
inc(InStr);
|
||||
inc(OutStr);
|
||||
end
|
||||
else
|
||||
begin
|
||||
OutStr^ := chr(ord(c)+32);
|
||||
inc(InStr);
|
||||
inc(OutStr);
|
||||
end;
|
||||
end;
|
||||
|
||||
#$C3:
|
||||
begin
|
||||
// $C39F: ß already lowercase
|
||||
if InStr[1] in [#$80..#$9E] then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] := chr(ord(InStr[1]) + $20);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(InStr[1]) + $20);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=InStr[1];
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$C4:
|
||||
begin
|
||||
c := InStr[1];
|
||||
case c of
|
||||
#$81..#$A9, #$B2..#$B6: //0
|
||||
begin
|
||||
if ord(c) mod 2 = 0 then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
#$B8..#$FF: //1
|
||||
begin
|
||||
if ord(c) mod 2 = 1 then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
#$B0:
|
||||
begin
|
||||
OutStr^ := 'i';
|
||||
dec(OutStr);
|
||||
inc(CounterDiff, 1);
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$C5:
|
||||
begin
|
||||
c := InStr[1];
|
||||
case c of
|
||||
#$8A..#$B7: //0
|
||||
begin
|
||||
if ord(c) mod 2 = 0 then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
#$00..#$88, #$B9..#$FF: //1
|
||||
begin
|
||||
if ord(c) mod 2 = 1 then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
#$B8: // Ÿ
|
||||
begin
|
||||
OutStr[0] := #$C3;
|
||||
OutStr[1] := #$BF;
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$C6..#$C7:
|
||||
begin
|
||||
c := InStr[1];
|
||||
if ord(c) mod 2 = 1 then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end;
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$C8:
|
||||
begin
|
||||
c := InStr[1];
|
||||
if (c in [#$00..#$B3]) and (ord(c) mod 2 = 1) then begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
end;
|
||||
OutStr[1] := chr(ord(c) + 1);
|
||||
end else begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ :=InStr[0];
|
||||
OutStr[1] :=c;
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$CE:
|
||||
begin
|
||||
c := InStr[1];
|
||||
case c of
|
||||
#$91..#$9F:
|
||||
begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
end;
|
||||
OutStr[1] := chr(ord(c) + $20);
|
||||
end;
|
||||
#$A0..#$A9:
|
||||
begin
|
||||
OutStr^ := chr(ord(InStr[0])+1);
|
||||
OutStr[1] := chr(ord(c) - $10);
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
#$D0:
|
||||
begin
|
||||
c := InStr[1];
|
||||
case c of
|
||||
#$80..#$8F:
|
||||
begin
|
||||
OutStr^ := chr(ord(InStr[0])+1);
|
||||
OutStr[1] := chr(ord(c) + $10);
|
||||
end;
|
||||
#$90..#$9F:
|
||||
begin
|
||||
if (CounterDiff <> 0) then begin
|
||||
OutStr^ := InStr[0];
|
||||
end;
|
||||
OutStr[1] := chr(ord(c) + $20);
|
||||
end;
|
||||
#$A0..#$AF:
|
||||
begin
|
||||
OutStr^ := chr(ord(InStr[0])+1);
|
||||
OutStr[1] := chr(ord(c) - $10);
|
||||
end;
|
||||
end;
|
||||
inc(InStr, 2);
|
||||
inc(OutStr, 2);
|
||||
end;
|
||||
else
|
||||
begin
|
||||
// Copy the character if the string was disaligned by previous changes
|
||||
if (CounterDiff <> 0) then
|
||||
OutStr^:=c;
|
||||
inc(InStr);
|
||||
inc(OutStr);
|
||||
end;
|
||||
end; // Case InStr^
|
||||
end; // while
|
||||
|
||||
// Final correction of the buffer size
|
||||
SetLength(Result,OutStr - PChar(Result));
|
||||
end;
|
||||
|
||||
function UTF8LowerCase(const AInStr: utf8string): utf8string;
|
||||
begin
|
||||
Result := UTF8LowerCase(AInStr, '');
|
||||
@ -2182,7 +1907,6 @@ initialization
|
||||
InternalInit;
|
||||
{$ifdef LAZUTF8_USE_TABLES}
|
||||
InitUnicodeTables;
|
||||
InitUTF8LowerCaseMartinTable
|
||||
{$endif}
|
||||
|
||||
end.
|
||||
|
@ -44,7 +44,6 @@ procedure AssertStringOperationUTF8LowerCase(AMsg, ALocale, AStr1, AStrExpected2
|
||||
begin
|
||||
AssertStringOperation(AMsg, AStr1, UTF8LowerCase(AStr1, ALocale), AStrExpected2);
|
||||
AssertStringOperation('MARTIN:'+AMsg, AStr1, UTF8LowerCaseMartin(AStr1, ALocale), AStrExpected2);
|
||||
AssertStringOperation('MARTIN2:'+AMsg, AStr1, UTF8LowerCaseMartin2(AStr1, ALocale), AStrExpected2);
|
||||
end;
|
||||
|
||||
function DateTimeToMilliseconds(aDateTime: TDateTime): Int64;
|
||||
@ -77,6 +76,9 @@ begin
|
||||
// Cyrillic
|
||||
AssertStringOperationUTF8UpperCase('Russian UTF8UpperCase 1', '', 'АБВЕЁЖЗКЛМНОПРДЙГ СУФХЦЧШЩЪЫЬЭЮЯИТ', 'АБВЕЁЖЗКЛМНОПРДЙГ СУФХЦЧШЩЪЫЬЭЮЯИТ');
|
||||
AssertStringOperationUTF8UpperCase('Russian UTF8UpperCase 2', '', 'абвеёжзклмнопрдйг суфхцчшщъыьэюяит', 'АБВЕЁЖЗКЛМНОПРДЙГ СУФХЦЧШЩЪЫЬЭЮЯИТ');
|
||||
AssertStringOperationUTF8UpperCase('Cyrillic UTF8UpperCase 1', '', 'Ѡѡ Ѣѣ Ѥѥ Ѧѧ Ѩѩ Ѫѫ Ѭѭ Ѯѯ Ѱѱ Ѳѳ Ѵѵ Ѷѷ Ѹѹ Ѻѻ Ѽѽ Ѿѿ Ҁҁ ', 'ѡѡ ѣѣ ѥѥ ѧѧ ѩѩ ѫѫ ѭѭ ѯѯ ѱѱ ѳѳ ѵѵ ѷѷ ѹѹ ѻѻ ѽѽ ѿѿ ҁҁ');
|
||||
AssertStringOperationUTF8UpperCase('Cyrillic UTF8UpperCase 2', '', 'Ҋҋ Ҍҍ Ҏҏ Ґґ Ғғ Ҕҕ Җҗ Ҙҙ Ққ Ҝҝ Ҟҟ Ҡҡ Ңң Ҥҥ Ҧҧ Ҩҩ Ҫҫ Ҭҭ Үү Ұұ Ҳҳ Ҵҵ Ҷҷ Ҹҹ Һһ Ҽҽ Ҿҿ', 'ҋҋ ҍҍ ҏҏ ґґ ғғ ҕҕ җҗ ҙҙ ққ ҝҝ ҟҟ ҡҡ ңң ҥҥ ҧҧ ҩҩ ҫҫ ҭҭ үү ұұ ҳҳ ҵҵ ҷҷ ҹҹ һһ ҽҽ ҿҿ');
|
||||
|
||||
// What shouldnt change
|
||||
AssertStringOperationUTF8UpperCase('Chinese UTF8UpperCase 1', '', '名字叫嘉英,嘉陵江的嘉,英國的英', '名字叫嘉英,嘉陵江的嘉,英國的英');
|
||||
|
||||
@ -134,22 +136,6 @@ begin
|
||||
Write(Format(' %7d ms ', [DateTimeToMilliseconds(lTimeDiff)]));
|
||||
end;
|
||||
writeln;
|
||||
Write('Martin LowerCase2 Performance test took: ');
|
||||
for j := 0 to 5 do begin
|
||||
lStartTime := Now;
|
||||
for i := 0 to TimerLoop do
|
||||
begin
|
||||
if j = 0 then Str := UTF8LowerCaseMartin2('abcdefghijklmnopqrstuwvxyz');
|
||||
if j = 1 then Str := UTF8LowerCaseMartin2('ABCDEFGHIJKLMNOPQRSTUWVXYZ');
|
||||
if j = 2 then Str := UTF8LowerCaseMartin2('aąbcćdeęfghijklłmnńoóprsśtuwyzźż');
|
||||
if j = 3 then Str := UTF8LowerCaseMartin2('AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ');
|
||||
if j = 4 then Str := UTF8LowerCaseMartin2('АБВЕЁЖЗКЛМНОПРДЙГ');
|
||||
if j = 5 then Str := UTF8LowerCaseMartin2('名字叫嘉英,嘉陵江的嘉,英國的英');
|
||||
end;
|
||||
lTimeDiff := Now - lStartTime;
|
||||
Write(Format(' %7d ms ', [DateTimeToMilliseconds(lTimeDiff)]));
|
||||
end;
|
||||
writeln;
|
||||
// Performance test
|
||||
Write('Mattias LowerCase- Performance test took: ');
|
||||
for j := 0 to 5 do begin
|
||||
@ -190,6 +176,7 @@ begin
|
||||
TestUTF8UpperCase();
|
||||
WriteLn('======= LowerCase =======');
|
||||
TestUTF8LowerCase();
|
||||
WriteLn('Please press enter to continue');
|
||||
readln;
|
||||
end.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user