mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-06-22 17:48:28 +02:00
LazUtils: New implementation of UTF8StringReplace. Issue #41291.
This commit is contained in:
parent
602fb83512
commit
4c1a98b6fa
@ -134,7 +134,7 @@ function UTF8UpperCaseFast(const AText: String): String;
|
|||||||
function UTF8LowerCaseFast(const AText: String): String;
|
function UTF8LowerCaseFast(const AText: String): String;
|
||||||
|
|
||||||
function UTF8SwapCase(const AInStr: string; const ALanguage: string=''): string;
|
function UTF8SwapCase(const AInStr: string; const ALanguage: string=''): string;
|
||||||
// Capitalize the first letters of every word
|
// Capitalize the first letter of every word
|
||||||
function UTF8ProperCase(const AInStr: string; const WordDelims: TSysCharSet): string;
|
function UTF8ProperCase(const AInStr: string; const WordDelims: TSysCharSet): string;
|
||||||
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
|
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
|
||||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; deprecated 'Use FindInvalidUTF8Codepoint instead.';
|
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; deprecated 'Use FindInvalidUTF8Codepoint instead.';
|
||||||
@ -1210,96 +1210,53 @@ end;
|
|||||||
|
|
||||||
function UTF8StringReplace(const S, OldPattern, NewPattern: String;
|
function UTF8StringReplace(const S, OldPattern, NewPattern: String;
|
||||||
Flags: TReplaceFlags; out Count: Integer; const ALanguage: string=''): String;
|
Flags: TReplaceFlags; out Count: Integer; const ALanguage: string=''): String;
|
||||||
// same algorithm as fpc's StringReplace, but using UTF8LowerCase
|
|
||||||
// for case insensitive search
|
|
||||||
var
|
var
|
||||||
Srch, OldP: string;
|
SrcS, OldPtrn, TempS: string;
|
||||||
P, PrevP, PatLength, NewPatLength, Cnt: Integer;
|
PSrc, POrig: PChar;
|
||||||
c, d: PChar;
|
CharLen, OldPatLen: Integer;
|
||||||
|
OkToReplace: Boolean;
|
||||||
begin
|
begin
|
||||||
Srch := S;
|
|
||||||
OldP := OldPattern;
|
|
||||||
Count := 0;
|
Count := 0;
|
||||||
PatLength:=Length(OldPattern);
|
if OldPattern='' then
|
||||||
if PatLength=0 then
|
Exit(S);
|
||||||
|
if rfIgnoreCase in Flags then
|
||||||
begin
|
begin
|
||||||
Result:=S;
|
SrcS := UTF8LowerCase(S,ALanguage);
|
||||||
Exit;
|
OldPtrn := UTF8LowerCase(OldPattern,ALanguage);
|
||||||
end;
|
|
||||||
|
|
||||||
if (rfIgnoreCase in Flags) then
|
|
||||||
begin
|
|
||||||
Srch := UTF8LowerCase(Srch,ALanguage);
|
|
||||||
OldP := UTF8LowerCase(OldP,ALanguage);
|
|
||||||
end;
|
|
||||||
PatLength := Length(OldP);
|
|
||||||
|
|
||||||
if (Length(NewPattern) = PatLength) then
|
|
||||||
begin //length will not change
|
|
||||||
Result := S;
|
|
||||||
P := 1;
|
|
||||||
repeat
|
|
||||||
P := Pos(OldP,Srch,P);
|
|
||||||
if (P > 0) then
|
|
||||||
begin
|
|
||||||
Inc(Count);
|
|
||||||
Move(NewPattern[1],Result[P],PatLength*SizeOf(Char));
|
|
||||||
if not (rfReplaceAll in Flags) then Exit;
|
|
||||||
Inc(P,PatLength);
|
|
||||||
end;
|
|
||||||
until (P = 0);
|
|
||||||
end
|
end
|
||||||
else
|
else begin
|
||||||
|
SrcS := S;
|
||||||
|
OldPtrn := OldPattern;
|
||||||
|
end;
|
||||||
|
OldPatLen := Length(OldPtrn);
|
||||||
|
PSrc := PChar(SrcS);
|
||||||
|
POrig := PChar(S);
|
||||||
|
Result := '';
|
||||||
|
OkToReplace := True;
|
||||||
|
while PSrc < PChar(SrcS) + Length(SrcS) do
|
||||||
begin
|
begin
|
||||||
//Different pattern length -> Result length will change
|
if OkToReplace and (CompareByte(PSrc^, OldPtrn[1], OldPatLen) = 0) then
|
||||||
//To avoid creating a lot of temporary strings, we count how many
|
|
||||||
//replacements we're going to make.
|
|
||||||
P := 1;
|
|
||||||
repeat
|
|
||||||
P:=Pos(OldP,Srch,P);
|
|
||||||
if (P > 0) then
|
|
||||||
begin
|
|
||||||
Inc(P,PatLength);
|
|
||||||
Inc(Count);
|
|
||||||
if not (rfReplaceAll in Flags) then Break;
|
|
||||||
end;
|
|
||||||
until (P = 0);
|
|
||||||
if (Count = 0) then
|
|
||||||
begin
|
begin
|
||||||
Result:=S;
|
// Found: Replace with NewPattern and move forward
|
||||||
Exit;
|
Inc(Count);
|
||||||
|
Result := Result + NewPattern;
|
||||||
|
Inc(PSrc, OldPatLen); // Skip the found string
|
||||||
|
// Move forward also in original string.
|
||||||
|
Inc(POrig, Length(OldPattern));
|
||||||
|
if not (rfReplaceAll in Flags) then
|
||||||
|
OkToReplace := False; // Replace only once.
|
||||||
|
end
|
||||||
|
else begin
|
||||||
|
// Move forward in possibly lowercased string
|
||||||
|
CharLen := UTF8CodepointSize(PSrc);
|
||||||
|
Inc(PSrc, CharLen); // Next Codepoint
|
||||||
|
// Copy a codepoint from original string and move forward
|
||||||
|
CharLen := UTF8CodepointSize(POrig);
|
||||||
|
SetLength(TempS{%H-}, CharLen);
|
||||||
|
System.Move(POrig^, TempS[1], CharLen);
|
||||||
|
Result := Result + TempS; // Copy one codepoint from original string
|
||||||
|
Inc(POrig, CharLen); // Next Codepoint
|
||||||
end;
|
end;
|
||||||
NewPatLength := Length(NewPattern);
|
|
||||||
SetLength(Result, Length(S) + Count*(NewPatLength - PatLength));
|
|
||||||
P := 1;
|
|
||||||
PrevP := 0;
|
|
||||||
c := PChar(Result);
|
|
||||||
d := PChar(S);
|
|
||||||
repeat
|
|
||||||
P:=Pos(OldP, Srch, P);
|
|
||||||
if (P > 0) then
|
|
||||||
begin
|
|
||||||
Cnt := P - PrevP - 1;
|
|
||||||
if (Cnt > 0) then
|
|
||||||
begin
|
|
||||||
Move(d^, c^, Cnt*SizeOf(Char));
|
|
||||||
Inc(c,Cnt);
|
|
||||||
Inc(d,Cnt);
|
|
||||||
end;
|
|
||||||
if (NewPatLength > 0) then
|
|
||||||
begin
|
|
||||||
Move(NewPattern[1], c^, NewPatLength*SizeOf(Char));
|
|
||||||
Inc(c,NewPatLength);
|
|
||||||
end;
|
|
||||||
Inc(P,PatLength);
|
|
||||||
Inc(d,PatLength);
|
|
||||||
PrevP:=P-1;
|
|
||||||
if not (rfReplaceAll in Flags) then Break;
|
|
||||||
end;
|
|
||||||
until (P = 0);
|
|
||||||
Cnt := Length(S) - PrevP;
|
|
||||||
if (Cnt > 0) then
|
|
||||||
Move(d^, c^, Cnt*SizeOf(Char));
|
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user