Removes my original LowerCase and now uses Martins version as the default one

git-svn-id: trunk@32836 -
This commit is contained in:
sekelsenmat 2011-10-12 06:30:35 +00:00
parent 1e5d7433ae
commit dd582c9852
2 changed files with 7 additions and 171 deletions

View File

@ -66,8 +66,6 @@ procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt
{$ifdef LAZUTF8_USE_TABLES}
function UnicodeLowercase(u: cardinal): cardinal;
function UTF8LowerCaseMattias(const s: utf8string): utf8string;
function UTF8LowerCaseMartin(const AInStr: utf8string): utf8string;
function UTF8LowerCaseMartin(const AInStr, ALocale: utf8string): utf8string;
{$endif}
function UTF8LowerCase(const AInStr: utf8string): utf8string;
function UTF8LowerCase(const AInStr, ALocale: utf8string): utf8string;
@ -1168,12 +1166,16 @@ begin
end;
{$endif}
function UTF8LowerCaseMartin(const AInStr: utf8string): utf8string;
function UTF8LowerCase(const AInStr: utf8string): utf8string;
begin
Result := UTF8LowerCaseMartin(AInStr, '');
Result := UTF8LowerCase(AInStr, '');
end;
function UTF8LowerCaseMartin(const AInStr, ALocale: utf8string): utf8string;
{
AInStr - The input string
ALocale - The locale. Use '' for maximum speed if one desires to ignore the locale
}
function UTF8LowerCase(const AInStr, ALocale: utf8string): utf8string;
const
ResultSizeIncr = 10;
var
@ -1597,154 +1599,6 @@ begin
end;
end;
function UTF8LowerCase(const AInStr: utf8string): utf8string;
begin
Result := UTF8LowerCase(AInStr, '');
end;
{
AInStr - The input string
ALocale - The locale. Use '' for maximum speed if one desires to ignore the locale
}
function UTF8LowerCase(const AInStr, ALocale: utf8string): utf8string;
var
i, InCounter, OutCounter: PtrInt;
OutStr: PChar = nil;
CharLen: integer;
CharProcessed: Boolean;
NewCharLen: integer;
NewChar, OldChar: Word;
TheStringIsUnique: Boolean = False;
// Language identification
IsTurkish: Boolean;
// This is an optimization for strings which are already fully lowercase in ASCII
procedure MakeUnique(); inline;
begin
if not TheStringIsUnique then
begin
UniqueString(Result);
OutStr := PChar(Result);
TheStringIsUnique := True;
end;
end;
begin
// Start with the same string, and progressively modify
Result:=AInStr;
// Language identification
IsTurkish := ALocale = 'tu';
InCounter:=1; // for AInStr
OutCounter := 0; // for Result
while InCounter<=length(AInStr) do
begin
{ First ASCII chars }
if (AInStr[InCounter] <= 'Z') and (AInStr[InCounter] >= 'A') then
begin
// Special turkish handling
// capital undotted I to small undotted i
if IsTurkish and (AInStr[InCounter] = 'I') then
begin
SetLength(Result,Length(Result)+1);// Increase the buffer
TheStringIsUnique := True;
OutStr := PChar(Result);
OutStr[OutCounter]:=#$C4;
OutStr[OutCounter+1]:=#$B1;
inc(InCounter);
inc(OutCounter,2);
end
else
begin
MakeUnique();
OutStr[OutCounter]:=chr(ord(AInStr[InCounter])+32);
inc(InCounter);
inc(OutCounter);
end;
end
{ Now fast ASCII }
else if AInStr[InCounter] <= #$7F then
begin
// Copy the character if the string was disaligned by previous changes
if (InCounter <> OutCounter+1) then
begin
MakeUnique();
OutStr[OutCounter]:=AInStr[InCounter];
end;
inc(InCounter);
inc(OutCounter);
end
{ Now everything else }
else
begin
CharLen := UTF8CharacterLength(@AInStr[InCounter]);
CharProcessed := False;
NewCharLen := CharLen;
if CharLen = 2 then
begin
OldChar := (Ord(AInStr[InCounter]) shl 8) or Ord(AInStr[InCounter+1]);
NewChar := 0;
// Major processing
case OldChar of
// Latin Characters 00000FFF http://en.wikibooks.org/wiki/Unicode/Character_reference/0000-0FFF
$C380..$C39E: NewChar := OldChar + $20;
// $C39F: ß already lowercase
$C481..$C4A9: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
// Turkish capital dotted i to small dotted i
$C4B0:
begin
MakeUnique();
OutStr[OutCounter]:='i';
NewCharLen := 1;
CharProcessed := True;
end;
// $C4B1 turkish lowercase undotted ı
$C4B2..$C4B6: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
// $C4B7: ĸ => K ?
$C4B8..$C588: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
// $C589 ʼn => ?
$C58A..$C5B7: if OldChar mod 2 = 0 then NewChar := OldChar + 1;
$C5B8: NewChar := $C3BF; // Ÿ
$C5B9..$C8B3: if OldChar mod 2 = 1 then NewChar := OldChar + 1;
//
$CE91..$CE9F: NewChar := OldChar + $20; // Greek Characters
$CEA0..$CEA9: NewChar := OldChar + $E0; // Greek Characters
$D080..$D08F: NewChar := OldChar + $110; // Cyrillic alphabet
$D090..$D09F: NewChar := OldChar + $20; // Cyrillic alphabet
$D0A0..$D0AF: NewChar := OldChar + $E0; // Cyrillic alphabet
end;
if NewChar <> 0 then
begin
MakeUnique();
OutStr[OutCounter] := Chr(Hi(NewChar));
OutStr[OutCounter+1]:= Chr(Lo(NewChar));
CharProcessed := True;
end;
end;
// Copy the character if the string was disaligned by previous changed
// and no processing was done in this character
if (InCounter <> OutCounter+1) and (not CharProcessed) then
begin
MakeUnique();
for i := 0 to CharLen-1 do
OutStr[OutCounter+i] :=AInStr[InCounter+i];
end;
inc(InCounter, CharLen);
inc(OutCounter, NewCharLen);
end; // case
end; // while
// Final correction of the buffer size
SetLength(Result,OutCounter);
end;
function UTF8UpperCase(const AInStr: utf8string): utf8string;
begin
Result := UTF8UpperCase(AInStr, '');

View File

@ -43,7 +43,6 @@ end;
procedure AssertStringOperationUTF8LowerCase(AMsg, ALocale, AStr1, AStrExpected2: utf8string);
begin
AssertStringOperation(AMsg, AStr1, UTF8LowerCase(AStr1, ALocale), AStrExpected2);
AssertStringOperation('MARTIN:'+AMsg, AStr1, UTF8LowerCaseMartin(AStr1, ALocale), AStrExpected2);
end;
function DateTimeToMilliseconds(aDateTime: TDateTime): Int64;
@ -119,23 +118,6 @@ begin
// What shouldnt change
AssertStringOperationUTF8LowerCase('Chinese UTF8LowerCase 1', '', '名字叫嘉英,嘉陵江的嘉,英國的英', '名字叫嘉英,嘉陵江的嘉,英國的英');
// Performance test
Write('Martin LowerCase- Performance test took: ');
for j := 0 to 5 do begin
lStartTime := Now;
for i := 0 to TimerLoop do
begin
if j = 0 then Str := UTF8LowerCaseMartin('abcdefghijklmnopqrstuwvxyz');
if j = 1 then Str := UTF8LowerCaseMartin('ABCDEFGHIJKLMNOPQRSTUWVXYZ');
if j = 2 then Str := UTF8LowerCaseMartin('aąbcćdeęfghijklłmnńoóprsśtuwyzźż');
if j = 3 then Str := UTF8LowerCaseMartin('AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ');
if j = 4 then Str := UTF8LowerCaseMartin('АБВЕЁЖЗКЛМНОПРДЙГ');
if j = 5 then Str := UTF8LowerCaseMartin('名字叫嘉英,嘉陵江的嘉,英國的英');
end;
lTimeDiff := Now - lStartTime;
Write(Format(' %7d ms ', [DateTimeToMilliseconds(lTimeDiff)]));
end;
writeln;
// Performance test
Write('Mattias LowerCase- Performance test took: ');
for j := 0 to 5 do begin