mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-06-09 17:58:18 +02:00
LCL: improved UTF8FindNearestCharStart
git-svn-id: trunk@27831 -
This commit is contained in:
parent
b3b03bfaea
commit
af460b1644
@ -3316,27 +3316,48 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
{ Find the start of the UTF8 character which contains BytePos,
|
||||||
|
Len is length in byte, BytePos starts at 0 }
|
||||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
|
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
|
||||||
BytePos: integer): integer;
|
BytePos: integer): integer;
|
||||||
var
|
|
||||||
CharLen: LongInt;
|
|
||||||
begin
|
begin
|
||||||
Result:=0;
|
Result:=0;
|
||||||
if UTF8Str<>nil then begin
|
if (UTF8Str<>nil) and (Len>0) and (BytePos>=0) then begin
|
||||||
if BytePos>Len then BytePos:=Len;
|
Result:=BytePos;
|
||||||
while (BytePos>0) do begin
|
if Result>Len then Result:=Len-1;
|
||||||
CharLen:=UTF8CharacterLength(UTF8Str);
|
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||||
dec(BytePos,CharLen);
|
dec(Result);
|
||||||
if (BytePos<0) then exit;
|
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||||
inc(Result,CharLen);
|
dec(Result);
|
||||||
if (BytePos=0) then exit;
|
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||||
inc(UTF8Str,CharLen);
|
dec(Result);
|
||||||
|
// should be four byte character
|
||||||
|
if (ord(UTF8Str[Result]) and %11111000<>%11110000) then begin
|
||||||
|
// broken UTF8 character
|
||||||
|
inc(Result,3);
|
||||||
|
end else begin
|
||||||
|
// is four byte character
|
||||||
|
end;
|
||||||
|
end else if (ord(UTF8Str[Result]) and %11110000<>%11100000) then begin
|
||||||
|
// broken UTF8 character, should be three byte
|
||||||
|
inc(Result,2);
|
||||||
|
end else
|
||||||
|
begin
|
||||||
|
// is three byte character
|
||||||
|
end;
|
||||||
|
end else if (ord(UTF8Str[Result]) and %11100000<>%11000000) then begin
|
||||||
|
// broken UTF8 character, should be two byte
|
||||||
|
inc(Result);
|
||||||
|
end else
|
||||||
|
begin
|
||||||
|
// is two byte character
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ Len is the length in bytes of UTF8Str
|
{ Len is the length in bytes of UTF8Str
|
||||||
CharIndex is the position of the desired char, in chars
|
CharIndex is the position of the desired char (starting at 0), in chars
|
||||||
|
|
||||||
This function is similar to UTF8FindNearestCharStart
|
This function is similar to UTF8FindNearestCharStart
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user