mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-17 18:19:34 +02:00
LazUtf8: Refactor UTF8FindNearestCharStart. Resolves Issue #0029851.
git-svn-id: trunk@51973 -
This commit is contained in:
parent
34be9ae2d0
commit
b192fb9760
@ -643,45 +643,22 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
{ Find the start of the UTF8 character which contains BytePos,
|
{ Find the start of the UTF8 character which contains BytePos,
|
||||||
|
if BytePos is not part of a valid Utf8Codepoint the function returns BytePos
|
||||||
Len is length in byte, BytePos starts at 0 }
|
Len is length in byte, BytePos starts at 0 }
|
||||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt
|
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt): SizeInt;
|
||||||
): SizeInt;
|
var
|
||||||
|
CurPos: PChar;
|
||||||
|
CharLen: Integer;
|
||||||
begin
|
begin
|
||||||
Result:=0;
|
if (BytePos > Len-1) then BytePos := Len - 1;
|
||||||
if (UTF8Str<>nil) and (Len>0) and (BytePos>=0) then begin
|
CurPos := Utf8Str + BytePos;
|
||||||
Result:=BytePos;
|
//No need to check the result value, since when it retuns False CurPos will be reset
|
||||||
if Result>Len then Result:=Len-1;
|
//to it's original value, and that's what we want to return in that case
|
||||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
Utf8TryFindCodepointStart(Utf8Str, CurPos, CharLen);
|
||||||
dec(Result);
|
Result := CurPos - Utf8Str;
|
||||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
|
||||||
dec(Result);
|
|
||||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
|
||||||
dec(Result);
|
|
||||||
// should be four byte character
|
|
||||||
if (ord(UTF8Str[Result]) and %11111000<>%11110000) then begin
|
|
||||||
// broken UTF8 character
|
|
||||||
inc(Result,3);
|
|
||||||
end else begin
|
|
||||||
// is four byte character
|
|
||||||
end;
|
|
||||||
end else if (ord(UTF8Str[Result]) and %11110000<>%11100000) then begin
|
|
||||||
// broken UTF8 character, should be three byte
|
|
||||||
inc(Result,2);
|
|
||||||
end else
|
|
||||||
begin
|
|
||||||
// is three byte character
|
|
||||||
end;
|
|
||||||
end else if (ord(UTF8Str[Result]) and %11100000<>%11000000) then begin
|
|
||||||
// broken UTF8 character, should be two byte
|
|
||||||
inc(Result);
|
|
||||||
end else
|
|
||||||
begin
|
|
||||||
// is two byte character
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
{ Len is the length in bytes of UTF8Str
|
{ Len is the length in bytes of UTF8Str
|
||||||
CharIndex is the position of the desired char (starting at 0), in chars
|
CharIndex is the position of the desired char (starting at 0), in chars
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user