mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-07 01:57:57 +02:00
Merged revision(s) 51970 #ae57408e17, 51972-51973 #34be9ae2d0-#34be9ae2d0 from trunk:
docs: add fugue icons to acknowledgements ........ LazUtf8: implement Utf8TryFindCodepointStart. ........ LazUtf8: Refactor UTF8FindNearestCharStart. Resolves Issue #0029851. ........ git-svn-id: branches/fixes_1_6@51995 -
This commit is contained in:
parent
d1619263b1
commit
7c789e1ef4
@ -78,6 +78,8 @@ function UTF8ToDoubleByteString(const s: string): string;
|
||||
function UTF8ToDoubleByte(UTF8Str: PChar; Len: PtrInt; DBStr: PByte): PtrInt;
|
||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt;
|
||||
BytePos: SizeInt): SizeInt;
|
||||
function Utf8TryFindCodepointStart(AString: PChar; var CurPos: PChar; out CharLen: Integer): Boolean;
|
||||
function Utf8TryFindCodepointStart(const AString: String; var Index: Integer; out CharLen: Integer): Boolean;
|
||||
// find the n-th UTF8 character, ignoring BIDI
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
||||
@ -579,46 +581,81 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Find the start of the UTF8 character which contains BytePos,
|
||||
Len is length in byte, BytePos starts at 0 }
|
||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt
|
||||
): SizeInt;
|
||||
|
||||
{ Tries to find the start of a valid UTF8 codepoint that contains the character pointed to by CurPos
|
||||
- AString: pointer to the (start of the) string
|
||||
- CurPos: pointer to the character inside AString that we want to get the information off
|
||||
* if the function succeeds, CurPos wil point to the start of the valid UTF8 codepoint
|
||||
* if the function fails, CurPos will not be changed
|
||||
Note: if CurPos points beyond the end of AString you will get a crash!
|
||||
- CharLen: the length of the UTF8 codepoint in bytes, if the function succeeds
|
||||
- Returns:
|
||||
True if the character pointed to by Curpos is part of a valid UTF8 codepoint (1 to 4 bytes),
|
||||
otherwise it returns False. }
|
||||
function Utf8TryFindCodepointStart(AString: PChar; var CurPos: PChar; out CharLen: Integer): Boolean;
|
||||
var
|
||||
SavedPos: PChar;
|
||||
begin
|
||||
Result:=0;
|
||||
if (UTF8Str<>nil) and (Len>0) and (BytePos>=0) then begin
|
||||
Result:=BytePos;
|
||||
if Result>Len then Result:=Len-1;
|
||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||
dec(Result);
|
||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||
dec(Result);
|
||||
if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin
|
||||
dec(Result);
|
||||
// should be four byte character
|
||||
if (ord(UTF8Str[Result]) and %11111000<>%11110000) then begin
|
||||
// broken UTF8 character
|
||||
inc(Result,3);
|
||||
end else begin
|
||||
// is four byte character
|
||||
end;
|
||||
end else if (ord(UTF8Str[Result]) and %11110000<>%11100000) then begin
|
||||
// broken UTF8 character, should be three byte
|
||||
inc(Result,2);
|
||||
end else
|
||||
begin
|
||||
// is three byte character
|
||||
end;
|
||||
end else if (ord(UTF8Str[Result]) and %11100000<>%11000000) then begin
|
||||
// broken UTF8 character, should be two byte
|
||||
inc(Result);
|
||||
end else
|
||||
Result := False;
|
||||
CharLen := 0;
|
||||
if (not (Assigned(AString) and Assigned(CurPos)))
|
||||
or (CurPos < AString) then Exit;
|
||||
SavedPos := CurPos;
|
||||
//Note: UTF8CharacterStrictLength will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
if (CharLen > 0) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
Dec(CurPos); //-1
|
||||
//is it second byte of 2..4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
if (CharLen > 1) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
Dec(CurPos); //-2
|
||||
//is it third byte of 3..4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
if (CharLen > 2) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
// is two byte character
|
||||
Dec(CurPos); //-3
|
||||
//is it fouth byte of 4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
if (CharLen = 4) then Exit(True);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
//At this point we failed: we are NOT inside a valid UTF8 codepoint!
|
||||
CurPos := SavedPos;
|
||||
end;
|
||||
|
||||
function Utf8TryFindCodepointStart(const AString: String; var Index: Integer; out CharLen: Integer): Boolean;
|
||||
var
|
||||
CurPos, SavedCurPos: PChar;
|
||||
begin
|
||||
CurPos := @AString[Index];
|
||||
SavedCurPos := CurPos;
|
||||
Result := Utf8TryFindCodepointStart(PChar(AString), CurPos, CharLen);
|
||||
Index := Index - (SavedCurPos - CurPos);
|
||||
end;
|
||||
|
||||
{ Find the start of the UTF8 character which contains BytePos,
|
||||
if BytePos is not part of a valid Utf8Codepoint the function returns BytePos
|
||||
Len is length in byte, BytePos starts at 0 }
|
||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt): SizeInt;
|
||||
var
|
||||
CurPos: PChar;
|
||||
CharLen: Integer;
|
||||
begin
|
||||
if (BytePos > Len-1) then BytePos := Len - 1;
|
||||
CurPos := Utf8Str + BytePos;
|
||||
//No need to check the result value, since when it retuns False CurPos will be reset
|
||||
//to it's original value, and that's what we want to return in that case
|
||||
Utf8TryFindCodepointStart(Utf8Str, CurPos, CharLen);
|
||||
Result := CurPos - Utf8Str;
|
||||
end;
|
||||
|
||||
|
||||
{ Len is the length in bytes of UTF8Str
|
||||
CharIndex is the position of the desired char (starting at 0), in chars
|
||||
}
|
||||
|
@ -15,6 +15,9 @@ http://www.famfamfam.com/lab/icons/silk/
|
||||
Tango Icon Library
|
||||
http://tango.freedesktop.org/Tango_Icon_Library
|
||||
|
||||
Fugue Icons
|
||||
http://p.yusukekamiyamane.com/
|
||||
|
||||
|
||||
#RemObjects Pascal Script
|
||||
Created by Carlo Kok / RemObjects Software
|
||||
|
Loading…
Reference in New Issue
Block a user