mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-06-02 11:32:49 +02:00
LazUtils: Change "Character" to "Codepoint" in LazUTF8 function names to be more accurate and to avoid confusion.
git-svn-id: trunk@56692 -
This commit is contained in:
parent
f0981a06af
commit
6810c626df
@ -206,7 +206,7 @@ var
|
||||
begin
|
||||
Result:=s;
|
||||
if Result='' then exit;
|
||||
i:=FindInvalidUTF8Character(PChar(Result),length(Result));
|
||||
i:=FindInvalidUTF8Codepoint(PChar(Result),length(Result));
|
||||
if i<0 then exit;
|
||||
Result:=ISO_8859_1ToUTF8(Result);
|
||||
end;
|
||||
|
@ -5341,7 +5341,7 @@ var
|
||||
var
|
||||
l: LongInt;
|
||||
begin
|
||||
l:=UTF8CharacterLength(@Src[APos]);
|
||||
l:=UTF8CodepointSize(@Src[APos]);
|
||||
inc(APos);
|
||||
dec(l);
|
||||
while (l>0) and (APos<ParsedLen) do begin
|
||||
|
@ -91,7 +91,7 @@ type
|
||||
// *** Wrappers for LazUTF8 ***
|
||||
function UTF8ToSys(const s: string): string; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function SysToUTF8(const s: string): string; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CharacterLength(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointSize(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
// environment
|
||||
function ParamStrUTF8(Param: Integer): string; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function GetEnvironmentStringUTF8(Index : Integer): String; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
@ -380,9 +380,9 @@ begin
|
||||
Result:=LazUTF8.SysToUTF8(s);
|
||||
end;
|
||||
|
||||
function UTF8CharacterLength(p: PChar): integer;
|
||||
function UTF8CodepointSize(p: PChar): integer;
|
||||
begin
|
||||
Result:=LazUTF8.UTF8CharacterLength(p);
|
||||
Result:=LazUTF8.UTF8CodepointSize(p);
|
||||
end;
|
||||
|
||||
function ParamStrUTF8(Param: Integer): string;
|
||||
@ -1700,7 +1700,7 @@ function FilenameIsMatching(const Mask, Filename: string; MatchExactly: boolean
|
||||
{$ENDIF}
|
||||
if FileP^ in [#0,PathDelim] then exit;
|
||||
inc(MaskP);
|
||||
inc(FileP,LazUTF8.UTF8CharacterLength(FileP));
|
||||
inc(FileP,LazUTF8.UTF8CodepointSize(FileP));
|
||||
end;
|
||||
'*':
|
||||
begin
|
||||
@ -1808,8 +1808,8 @@ function FilenameIsMatching(const Mask, Filename: string; MatchExactly: boolean
|
||||
while not (MaskP^ in [#0,SpecialChar,PathDelim,'?','*','{',',','}']) do
|
||||
begin
|
||||
if FileP^ in [#0,PathDelim] then exit;
|
||||
inc(MaskP,LazUTF8.UTF8CharacterLength(MaskP));
|
||||
inc(FileP,LazUTF8.UTF8CharacterLength(FileP));
|
||||
inc(MaskP,LazUTF8.UTF8CodepointSize(MaskP));
|
||||
inc(FileP,LazUTF8.UTF8CodepointSize(FileP));
|
||||
end;
|
||||
if LazFileUtils.CompareFilenames(MaskStart,MaskP-MaskStart,FileStart,FileP-FileStart)<>0 then
|
||||
exit;
|
||||
|
@ -214,7 +214,7 @@ begin
|
||||
if AString[n] = '''' then Result := Result + '''';
|
||||
end;
|
||||
#192..#255: begin // Maybe utf8
|
||||
u := UTF8CharacterLength(@AString[n]);
|
||||
u := UTF8CodepointSize(@AString[n]);
|
||||
if (u > 0) and (n+u-1 <= l) then begin
|
||||
if not InString then
|
||||
ToggleInString;
|
||||
|
@ -964,7 +964,7 @@ begin
|
||||
Result := 0;
|
||||
SetLength(Desc, Length(S));
|
||||
while i<=Length(s) do begin
|
||||
b := UTF8CharacterStrictLength(@S[i]);
|
||||
b := UTF8CodepointStrictSize(@S[i]);
|
||||
inc(i,b);
|
||||
inc(Result);
|
||||
Desc[Result] := Char(b);
|
||||
@ -1096,7 +1096,7 @@ begin
|
||||
SpcSize := 0;
|
||||
while i<=Length(str) do
|
||||
begin
|
||||
b := UTF8CharacterStrictLength(@Str[i]);
|
||||
b := UTF8CodepointStrictSize(@Str[i]);
|
||||
spc := (b=1) and (str[i]=' ');
|
||||
inc(len);
|
||||
j := Length(result)-1;
|
||||
|
@ -217,7 +217,7 @@ begin
|
||||
end
|
||||
else
|
||||
begin
|
||||
Unicode := UTF8CharacterToUnicode(Src, CharLen);
|
||||
Unicode := UTF8CodepointToUnicode(Src, CharLen);
|
||||
Inc(Src, CharLen);
|
||||
i := UTF8CharConvFunc(Unicode);
|
||||
//writeln(Format('%X', [i]));
|
||||
|
@ -544,8 +544,8 @@ begin
|
||||
exit;
|
||||
end;
|
||||
|
||||
charlen := UTF8CharacterLength(pstr);
|
||||
glyphCode := UTF8CharacterToUnicode(pstr, charlen);
|
||||
charlen := UTF8CodepointSize(pstr);
|
||||
glyphCode := UTF8CodepointToUnicode(pstr, charlen);
|
||||
inc(pstr,charlen);
|
||||
|
||||
glyphWidth := CharWidthFromUnicode(glyphCode);
|
||||
@ -1487,7 +1487,7 @@ begin
|
||||
left := length(AText);
|
||||
while left > 0 do
|
||||
begin
|
||||
charcode := UTF8CharacterToUnicode(pstr, charlen);
|
||||
charcode := UTF8CodepointToUnicode(pstr, charlen);
|
||||
inc(pstr,charlen);
|
||||
dec(left,charlen);
|
||||
g := Glyph[CharIndex[charcode]];
|
||||
@ -1572,7 +1572,7 @@ begin
|
||||
left := length(AText);
|
||||
while left > 0 do
|
||||
begin
|
||||
charcode := UTF8CharacterToUnicode(pstr, charlen);
|
||||
charcode := UTF8CodepointToUnicode(pstr, charlen);
|
||||
inc(pstr,charlen);
|
||||
dec(left,charlen);
|
||||
g := Glyph[CharIndex[charcode]];
|
||||
@ -1647,7 +1647,7 @@ begin
|
||||
resultIndex := 0;
|
||||
while left > 0 do
|
||||
begin
|
||||
charcode := UTF8CharacterToUnicode(pstr, charlen);
|
||||
charcode := UTF8CodepointToUnicode(pstr, charlen);
|
||||
inc(pstr,charlen);
|
||||
dec(left,charlen);
|
||||
|
||||
@ -1757,7 +1757,7 @@ begin
|
||||
if left <= 0 then break;
|
||||
end;
|
||||
end;
|
||||
charcode := UTF8CharacterToUnicode(pstr, charlen);
|
||||
charcode := UTF8CodepointToUnicode(pstr, charlen);
|
||||
inc(pstr,charlen);
|
||||
dec(left,charlen);
|
||||
g := Glyph[CharIndex[charcode]];
|
||||
|
@ -507,15 +507,15 @@ var
|
||||
DstP:=PChar(DstChars);
|
||||
Item:=List;
|
||||
for i:=1 to Count do begin
|
||||
Item^.SrcLen:=UTF8CharacterLength(SrcP);
|
||||
Item^.SrcLen:=UTF8CodepointSize(SrcP);
|
||||
Move(SrcP^,Item^.Src[0],Item^.SrcLen);
|
||||
if (DstP^<>#0) or (DstP-PChar(DstChars)<length(DstChars)) then begin
|
||||
Item^.DstLen:=UTF8CharacterLength(DstP);
|
||||
Item^.DstLen:=UTF8CodepointSize(DstP);
|
||||
Move(DstP^,Item^.Dst[0],Item^.DstLen);
|
||||
end;
|
||||
inc(Item);
|
||||
inc(SrcP,UTF8CharacterLength(SrcP));
|
||||
inc(DstP,UTF8CharacterLength(DstP));
|
||||
inc(SrcP,UTF8CodepointSize(SrcP));
|
||||
inc(DstP,UTF8CodepointSize(DstP));
|
||||
end;
|
||||
end;
|
||||
|
||||
@ -565,7 +565,7 @@ var
|
||||
while true do begin
|
||||
c:=Src^;
|
||||
if (c=#0) and (Src-PChar(s)=length(s)) then break;
|
||||
clen:=UTF8CharacterLength(Src);
|
||||
clen:=UTF8CodepointSize(Src);
|
||||
NewCharP:=Src;
|
||||
NewCharLen:=clen;
|
||||
// do a quick test via Pos
|
||||
@ -613,7 +613,7 @@ var
|
||||
while true do begin
|
||||
c:=p^;
|
||||
if (c=#0) and (p-PChar(s)=length(s)) then break;
|
||||
clen:=UTF8CharacterLength(p);
|
||||
clen:=UTF8CodepointSize(p);
|
||||
// do a quick test via Pos
|
||||
i:=Pos(c,SrcChars);
|
||||
if i>0 then begin
|
||||
|
@ -187,7 +187,7 @@ begin
|
||||
else
|
||||
Result := 1
|
||||
{$ELSE}
|
||||
Result := UTF8CharacterLengthFast(p);
|
||||
Result := UTF8CodepointSizeFast(p);
|
||||
{$ENDIF}
|
||||
end;
|
||||
|
||||
|
@ -1031,7 +1031,7 @@ begin
|
||||
case s[SrcPos] of
|
||||
#192..#240:
|
||||
begin
|
||||
OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen);
|
||||
OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
|
||||
NewCode:=UnicodeLowercase(OldCode);
|
||||
if NewCode=OldCode then begin
|
||||
inc(DstPos,CharLen);
|
||||
@ -1054,7 +1054,7 @@ begin
|
||||
case s[SrcPos] of
|
||||
#192..#240:
|
||||
begin
|
||||
OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen);
|
||||
OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
|
||||
NewCode:=UnicodeLowercase(OldCode);
|
||||
if NewCode=OldCode then begin
|
||||
System.Move(s[SrcPos],Result[DstPos],CharLen);
|
||||
@ -1107,7 +1107,7 @@ begin
|
||||
|
||||
#192..#240: // Now chars with multiple bytes
|
||||
begin
|
||||
OldCode:=UTF8CharacterToUnicode(p,CharLen);
|
||||
OldCode:=UTF8CodepointToUnicode(p,CharLen);
|
||||
NewCode:=UnicodeLowercase(OldCode);
|
||||
if NewCode<>OldCode then begin
|
||||
if not Changed then begin
|
||||
|
@ -73,9 +73,11 @@ function GetEnvironmentVariableUTF8(const EnvVar: string): String;
|
||||
function SysErrorMessageUTF8(ErrorCode: Integer): String;
|
||||
|
||||
// Returns the size of one codepoint in bytes.
|
||||
function UTF8CharacterLength(p: PChar): integer; inline;
|
||||
// Fast version of UTF8CharacterLength. Assumes the UTF-8 codepoint is valid.
|
||||
function UTF8CharacterLengthFast(p: PChar): integer; inline;
|
||||
function UTF8CodepointSize(p: PChar): integer; inline;
|
||||
function UTF8CharacterLength(p: PChar): integer; deprecated 'Use UTF8CodepointSize instead.';
|
||||
// Fast version of UTF8CodepointSize. Assumes the UTF-8 codepoint is valid.
|
||||
function UTF8CodepointSizeFast(p: PChar): integer; inline;
|
||||
function UTF8CharacterLengthFast(p: PChar): integer; deprecated 'Use UTF8CodepointSizeFast instead.';
|
||||
|
||||
function UTF8Length(const s: string): PtrInt; inline;
|
||||
function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
|
||||
@ -84,7 +86,8 @@ function UTF8LengthFast(const s: string): PtrInt; inline;
|
||||
function UTF8LengthFast(p: PChar; ByteCount: PtrInt): PtrInt;
|
||||
|
||||
// Functions dealing with unicode number U+xxx.
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; deprecated 'Use UTF8CodepointToUnicode instead.';
|
||||
function UnicodeToUTF8(CodePoint: cardinal): string; // UTF32 to UTF8
|
||||
function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8
|
||||
function UnicodeToUTF8SkipErrors(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8
|
||||
@ -95,13 +98,16 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt;
|
||||
BytePos: SizeInt): SizeInt;
|
||||
function Utf8TryFindCodepointStart(AString: PChar; var CurPos: PChar; out CharLen: Integer): Boolean;
|
||||
function Utf8TryFindCodepointStart(const AString: String; var Index: Integer; out CharLen: Integer): Boolean;
|
||||
// find the n-th UTF8 character, ignoring BIDI
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
// find the n-th UTF8 codepoint, ignoring BIDI
|
||||
function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar;
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; deprecated 'Use UTF8CodepointStart instead.';
|
||||
// find the byte index of the n-th UTF8 codepoint, ignoring BIDI (byte len of substr)
|
||||
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; deprecated 'Use UTF8CodepointToByteIndex instead.';
|
||||
procedure UTF8FixBroken(P: PChar); overload;
|
||||
procedure UTF8FixBroken(var S: string); overload;
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
function UTF8CodepointStrictSize(P: PChar): integer;
|
||||
function UTF8CharacterStrictLength(P: PChar): integer; deprecated 'Use UTF8CodepointStrictSize instead.';
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||
|
||||
function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
|
||||
@ -124,8 +130,8 @@ function UTF8LowerString(const s: string): string;
|
||||
function UTF8UpperCase(const AInStr: string; ALanguage: string=''): string;
|
||||
function UTF8UpperString(const s: string): string;
|
||||
function UTF8SwapCase(const AInStr: string; ALanguage: string=''): string;
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
|
||||
StopOnNonUTF8: Boolean = true): PtrInt;
|
||||
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; deprecated 'Use FindInvalidUTF8Codepoint instead.';
|
||||
function UTF8StringOfChar(AUtf8Char: String; N: Integer): String;
|
||||
function UTF8AddChar(AUtf8Char: String; const S: String; N: Integer): String;
|
||||
function UTF8AddCharR(AUtf8Char: String; const S: String; N: Integer): String;
|
||||
@ -395,7 +401,7 @@ begin
|
||||
Result := SysToUTF8(SysUtils.SysErrorMessage(ErrorCode));
|
||||
end;
|
||||
|
||||
function UTF8CharacterLengthFull(p: PChar): integer;
|
||||
function UTF8CodepointSizeFull(p: PChar): integer;
|
||||
begin
|
||||
case p^ of
|
||||
#0..#191: // %11000000
|
||||
@ -433,14 +439,19 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8CharacterLength(p: PChar): integer; inline;
|
||||
function UTF8CodepointSize(p: PChar): integer; inline;
|
||||
begin
|
||||
if p=nil then exit(0);
|
||||
if p^<#192 then exit(1);
|
||||
Result:=UTF8CharacterLengthFull(p);
|
||||
Result:=UTF8CodepointSizeFull(p);
|
||||
end;
|
||||
|
||||
function UTF8CharacterLengthFast(p: PChar): integer;
|
||||
function UTF8CharacterLength(p: PChar): integer;
|
||||
begin
|
||||
Result := UTF8CodepointSize(p);
|
||||
end;
|
||||
|
||||
function UTF8CodepointSizeFast(p: PChar): integer;
|
||||
begin
|
||||
case p^ of
|
||||
#0..#191 : Result := 1;
|
||||
@ -459,6 +470,11 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8CharacterLengthFast(p: PChar): integer;
|
||||
begin
|
||||
Result := UTF8CodepointSizeFast(p);
|
||||
end;
|
||||
|
||||
function UTF8Length(const s: string): PtrInt;
|
||||
begin
|
||||
Result:=UTF8Length(PChar(s),length(s));
|
||||
@ -471,7 +487,7 @@ begin
|
||||
Result:=0;
|
||||
while (ByteCount>0) do begin
|
||||
inc(Result);
|
||||
CharLen:=UTF8CharacterLength(p);
|
||||
CharLen:=UTF8CodepointSize(p);
|
||||
inc(p,CharLen);
|
||||
dec(ByteCount,CharLen);
|
||||
end;
|
||||
@ -534,7 +550,7 @@ begin
|
||||
Result := ByteCount - Result;
|
||||
end;
|
||||
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
{ if p=nil then CharLen=0 otherwise CharLen>0
|
||||
If there is an encoding error the Result is 0 and CharLen=1.
|
||||
Use UTF8FixBroken to fix UTF-8 encoding.
|
||||
@ -608,6 +624,11 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
begin
|
||||
Result := UTF8CodepointToUnicode(p, CharLen);
|
||||
end;
|
||||
|
||||
function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer;
|
||||
|
||||
procedure RaiseInvalidUnicode;
|
||||
@ -697,7 +718,7 @@ begin
|
||||
DestPos:=DBStr;
|
||||
Result:=0;
|
||||
while Len>0 do begin
|
||||
u:=UTF8CharacterToUnicode(SrcPos,CharLen);
|
||||
u:=UTF8CodepointToUnicode(SrcPos,CharLen);
|
||||
DestPos^:=byte((u shr 8) and $ff);
|
||||
inc(DestPos);
|
||||
DestPos^:=byte(u and $ff);
|
||||
@ -728,26 +749,26 @@ begin
|
||||
if (not (Assigned(AString) and Assigned(CurPos)))
|
||||
or (CurPos < AString) then Exit;
|
||||
SavedPos := CurPos;
|
||||
//Note: UTF8CharacterStrictLength will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
//Note: UTF8CodepointStrictSize will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings
|
||||
CharLen := UTF8CodepointStrictSize(CurPos);
|
||||
if (CharLen > 0) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
Dec(CurPos); //-1
|
||||
//is it second byte of 2..4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
CharLen := UTF8CodepointStrictSize(CurPos);
|
||||
if (CharLen > 1) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
Dec(CurPos); //-2
|
||||
//is it third byte of 3..4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
CharLen := UTF8CodepointStrictSize(CurPos);
|
||||
if (CharLen > 2) then Exit(True);
|
||||
if (CurPos > AString) then
|
||||
begin
|
||||
Dec(CurPos); //-3
|
||||
//is it fouth byte of 4 byte codepoint?
|
||||
CharLen := UTF8CharacterStrictLength(CurPos);
|
||||
CharLen := UTF8CodepointStrictSize(CurPos);
|
||||
if (CharLen = 4) then Exit(True);
|
||||
end;
|
||||
end;
|
||||
@ -784,35 +805,45 @@ end;
|
||||
|
||||
|
||||
{ Len is the length in bytes of UTF8Str
|
||||
CharIndex is the position of the desired char (starting at 0), in chars
|
||||
CodepointIndex is the position of the desired codepoint (starting at 0), in chars
|
||||
}
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar;
|
||||
var
|
||||
CharLen: LongInt;
|
||||
begin
|
||||
Result:=UTF8Str;
|
||||
if Result<>nil then begin
|
||||
while (CharIndex>0) and (Len>0) do begin
|
||||
CharLen:=UTF8CharacterLength(Result);
|
||||
while (CodepointIndex>0) and (Len>0) do begin
|
||||
CharLen:=UTF8CodepointSize(Result);
|
||||
dec(Len,CharLen);
|
||||
dec(CharIndex);
|
||||
dec(CodepointIndex);
|
||||
inc(Result,CharLen);
|
||||
end;
|
||||
if (CharIndex<>0) or (Len<0) then
|
||||
if (CodepointIndex<>0) or (Len<0) then
|
||||
Result:=nil;
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
begin
|
||||
Result := UTF8CodepointStart(UTF8Str, Len, CharIndex);
|
||||
end;
|
||||
|
||||
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
var
|
||||
p: PChar;
|
||||
begin
|
||||
p := UTF8CharStart(UTF8Str, Len, CharIndex);
|
||||
p := UTF8CodepointStart(UTF8Str, Len, CharIndex);
|
||||
if p = nil
|
||||
then Result := -1
|
||||
else Result := p - UTF8Str;
|
||||
end;
|
||||
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
begin
|
||||
Result := UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex);
|
||||
end;
|
||||
|
||||
{ fix any broken UTF8 sequences with spaces }
|
||||
procedure UTF8FixBroken(P: PChar);
|
||||
var
|
||||
@ -882,12 +913,12 @@ end;
|
||||
procedure UTF8FixBroken(var S: string);
|
||||
begin
|
||||
if S='' then exit;
|
||||
if FindInvalidUTF8Character(PChar(S),length(S))<0 then exit;
|
||||
if FindInvalidUTF8Codepoint(PChar(S),length(S))<0 then exit;
|
||||
UniqueString(S);
|
||||
UTF8FixBroken(PChar(S));
|
||||
end;
|
||||
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
function UTF8CodepointStrictSize(P: PChar): integer;
|
||||
var
|
||||
c: Char;
|
||||
begin
|
||||
@ -928,6 +959,11 @@ begin
|
||||
exit(0);
|
||||
end;
|
||||
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
begin
|
||||
Result := UTF8CodepointStrictSize(P);
|
||||
end;
|
||||
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||
var
|
||||
Source: PChar;
|
||||
@ -1008,7 +1044,7 @@ begin
|
||||
else if StartPos>1 then
|
||||
begin
|
||||
// skip
|
||||
StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
|
||||
StartPosP:=UTF8CodepointStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
|
||||
if StartPosP=nil then exit;
|
||||
// search
|
||||
p:=UTF8PosP(PChar(SearchForText),length(SearchForText),
|
||||
@ -1049,12 +1085,12 @@ var
|
||||
EndBytePos: PChar;
|
||||
MaxBytes: PtrInt;
|
||||
begin
|
||||
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
|
||||
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
|
||||
if StartBytePos=nil then
|
||||
Result:=''
|
||||
else begin
|
||||
MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos);
|
||||
EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount);
|
||||
EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount);
|
||||
if EndBytePos=nil then
|
||||
Result:=copy(s,StartBytePos-PChar(s)+1,MaxBytes)
|
||||
else
|
||||
@ -1088,11 +1124,11 @@ var
|
||||
EndBytePos: PChar;
|
||||
MaxBytes: PtrInt;
|
||||
begin
|
||||
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
|
||||
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
|
||||
if StartBytePos <> nil then
|
||||
begin
|
||||
MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos);
|
||||
EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount);
|
||||
EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount);
|
||||
if EndBytePos=nil then
|
||||
Delete(s,StartBytePos-PChar(s)+1,MaxBytes)
|
||||
else
|
||||
@ -1107,7 +1143,7 @@ procedure UTF8Insert(const source: UTF8String; var s: UTF8string;
|
||||
var
|
||||
StartBytePos: PChar;
|
||||
begin
|
||||
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
|
||||
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
|
||||
if StartBytePos <> nil then
|
||||
Insert(source, s, StartBytePos-PChar(s)+1);
|
||||
end;
|
||||
@ -1117,7 +1153,7 @@ procedure UTF8Insert(const source: String; var s: String; StartCharIndex: PtrInt
|
||||
var
|
||||
StartBytePos: PChar;
|
||||
begin
|
||||
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
|
||||
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
|
||||
if StartBytePos <> nil then
|
||||
Insert(source, s, StartBytePos-PChar(s)+1);
|
||||
end;
|
||||
@ -2424,7 +2460,7 @@ begin
|
||||
{ Now everything else }
|
||||
else
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(@AInStr[InCounter]);
|
||||
CharLen := UTF8CodepointSize(@AInStr[InCounter]);
|
||||
CharProcessed := False;
|
||||
NewCharLen := CharLen;
|
||||
|
||||
@ -2735,8 +2771,7 @@ begin
|
||||
end;
|
||||
|
||||
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
|
||||
StopOnNonUTF8: Boolean): PtrInt;
|
||||
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean): PtrInt;
|
||||
// return -1 if ok
|
||||
var
|
||||
CharLen: Integer;
|
||||
@ -2803,6 +2838,11 @@ begin
|
||||
Result:=-1;
|
||||
end;
|
||||
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
|
||||
begin
|
||||
Result := FindInvalidUTF8Codepoint(p, Count, StopOnNonUTF8);
|
||||
end;
|
||||
|
||||
function ValidUTF8String(const s: String): String; inline;
|
||||
begin
|
||||
Result := Utf8EscapeControlChars(s, emPascal);
|
||||
@ -2848,7 +2888,7 @@ var
|
||||
Ch: Char;
|
||||
i: Integer;
|
||||
begin
|
||||
if FindInvalidUTF8Character(PChar(S), Length(S)) <> -1 then
|
||||
if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then
|
||||
begin
|
||||
UTF8FixBroken(S);
|
||||
end;
|
||||
@ -3041,7 +3081,7 @@ begin
|
||||
rBytePos := ByteCount + 1;
|
||||
while (rBytePos > 1) do
|
||||
begin
|
||||
CharLen:=UTF8CharacterLength(p);
|
||||
CharLen:=UTF8CodepointSize(p);
|
||||
Dec(rBytePos, CharLen);
|
||||
System.Move(p^, Result[rBytePos], CharLen);
|
||||
Inc(p, CharLen);
|
||||
@ -3093,7 +3133,7 @@ begin
|
||||
P := PChar(S);
|
||||
while P^ <> #0 do
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(P);
|
||||
CharLen := UTF8CodepointSize(P);
|
||||
i := 1;
|
||||
j := 0;
|
||||
ResultLen := Length(Result);
|
||||
@ -3168,7 +3208,7 @@ begin
|
||||
#128..#255:
|
||||
begin
|
||||
if KeepAllNonASCII then break;
|
||||
u:=UTF8CharacterToUnicode(p,l);
|
||||
u:=UTF8CodepointToUnicode(p,l);
|
||||
if (l<=1) then break; // invalid character
|
||||
case u of
|
||||
128..159, // C1 set of control codes
|
||||
@ -3216,7 +3256,7 @@ begin
|
||||
begin
|
||||
if KeepAllNonASCII then break;
|
||||
StartP:=UTF8FindNearestCharStart(PChar(Result),length(Result),p-PChar(Result));
|
||||
u:=UTF8CharacterToUnicode(PChar(Result)+StartP,l);
|
||||
u:=UTF8CodepointToUnicode(PChar(Result)+StartP,l);
|
||||
if (l<=1) then break; // invalid character
|
||||
case u of
|
||||
128..159, // C1 set of control codes
|
||||
|
@ -7029,7 +7029,7 @@ begin
|
||||
inc(Src);
|
||||
dec(len);
|
||||
end else begin
|
||||
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
|
||||
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
|
||||
inc(Src,CharLen);
|
||||
dec(len,CharLen);
|
||||
i:=UTF8CharConvFunc(Unicode);
|
||||
@ -7068,7 +7068,7 @@ begin
|
||||
inc(Src);
|
||||
dec(len);
|
||||
end else begin
|
||||
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
|
||||
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
|
||||
inc(Src,CharLen);
|
||||
dec(len,CharLen);
|
||||
if Unicode<=$ffff then begin
|
||||
@ -7108,7 +7108,7 @@ begin
|
||||
inc(Src);
|
||||
dec(len);
|
||||
end else begin
|
||||
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
|
||||
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
|
||||
inc(Src,CharLen);
|
||||
dec(len,CharLen);
|
||||
if Unicode<=$ffff then begin
|
||||
@ -7265,7 +7265,7 @@ begin
|
||||
end;
|
||||
inc(p);
|
||||
end else begin
|
||||
i:=UTF8CharacterStrictLength(p);
|
||||
i:=UTF8CodepointStrictSize(p);
|
||||
//DebugLn(['GuessEncoding ',i,' ',DbgStr(s[p])]);
|
||||
if i=0 then begin
|
||||
{$IFDEF VerboseIDEEncoding}
|
||||
|
@ -96,9 +96,9 @@ var
|
||||
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
|
||||
begin
|
||||
Result := '';
|
||||
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||
p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||
//determine the length in bytes of this UTF-8 character
|
||||
PLen := UTF8CharacterLength(p);
|
||||
PLen := UTF8CodepointSize(p);
|
||||
Res := p;
|
||||
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
|
||||
SetLength(Res,PLen);
|
||||
|
@ -189,7 +189,7 @@ begin
|
||||
p:=Str;
|
||||
if p=nil then exit(0);
|
||||
while p^<>#0 do begin
|
||||
l:=UTF8CharacterLength(p);
|
||||
l:=UTF8CodepointSize(p);
|
||||
inc(Result);
|
||||
inc(p,l);
|
||||
end;
|
||||
|
@ -1362,7 +1362,7 @@ var
|
||||
// po requires special characters as #number
|
||||
p:=1;
|
||||
while p<=length(Value) do begin
|
||||
j := UTF8CharacterLength(pchar(@Value[p]));
|
||||
j := UTF8CodepointSize(pchar(@Value[p]));
|
||||
if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then
|
||||
Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value))
|
||||
else
|
||||
|
@ -17,7 +17,7 @@ begin
|
||||
WideCount := Min(WideCount, StrLen);
|
||||
while (CharCount < WideCount) do
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(P);
|
||||
CharLen := UTF8CodepointSize(P);
|
||||
Inc(P, CharLen);
|
||||
Inc(Result, CharLen);
|
||||
Inc(CharCount);
|
||||
|
@ -971,7 +971,7 @@ begin
|
||||
// po requires special characters as #number
|
||||
p:=1;
|
||||
while p<=length(Value) do begin
|
||||
j := UTF8CharacterLength(pchar(@Value[p]));
|
||||
j := UTF8CodepointSize(pchar(@Value[p]));
|
||||
if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then
|
||||
Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value))
|
||||
else
|
||||
|
@ -131,7 +131,7 @@ begin
|
||||
if ShowLineNumbers then s2 := Format('%4d: ',[i]);
|
||||
l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin);
|
||||
l := l - Length(s2); // s2 has only single byte
|
||||
l := UTF8CharToByteIndex(PChar(s), length(s), l);
|
||||
l := UTF8CodepointToByteIndex(PChar(s), length(s), l);
|
||||
while (l > MIN_LINE_LEN) and (l < length(s)) do begin
|
||||
l2 := l;
|
||||
while (l2 > MIN_LINE_LEN) and
|
||||
@ -144,14 +144,14 @@ begin
|
||||
// find utf8 start
|
||||
while (l2 > 1) and (ord(s[l2]) >= 128) and (ord(s[l2+1]) >= 128) and (ord(s[l2+1]) < 192) do
|
||||
dec(l2);
|
||||
if l2 = 0 then l2 := UTF8CharToByteIndex(PChar(s), length(s), MIN_LINE_LEN);
|
||||
if l2 = 0 then l2 := UTF8CodepointToByteIndex(PChar(s), length(s), MIN_LINE_LEN);
|
||||
Text[j] := copy(s, 1, l2);
|
||||
delete(s, 1, l2);
|
||||
inc(j);
|
||||
Text.InsertObject(j, '', nil);
|
||||
l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin);
|
||||
l := l - Length(s2);
|
||||
l := UTF8CharToByteIndex(PChar(s), length(s), l);
|
||||
l := UTF8CodepointToByteIndex(PChar(s), length(s), l);
|
||||
end;
|
||||
Text[j] := s;
|
||||
inc(i);
|
||||
|
@ -463,7 +463,7 @@ begin
|
||||
|
||||
{$IF FPC_FULLVERSION >= 20701}
|
||||
if p^ <= #127 then exit;
|
||||
i := UTF8CharacterLength(p);
|
||||
i := UTF8CodepointSize(p);
|
||||
SetLength(u, i);
|
||||
// wide chars of UTF-16 <= bytes of UTF-8 string
|
||||
if ConvertUTF8ToUTF16(PWideChar(u), i + 1, p, i, [toInvalidCharToSymbol], L) = trNoError
|
||||
|
@ -212,7 +212,7 @@ end; { SpaceProc }
|
||||
procedure TSynTeXSyn.TextProc;
|
||||
begin
|
||||
fTokenID:=tkText;
|
||||
inc(Run,UTF8CharacterLength(@fLine[Run]));
|
||||
inc(Run,UTF8CodepointSize(@fLine[Run]));
|
||||
end; { TextProc }
|
||||
|
||||
procedure TSynTeXSyn.LFProc;
|
||||
|
@ -211,7 +211,7 @@ begin
|
||||
delete(Input, 1, 1);
|
||||
Continue;
|
||||
end;
|
||||
l := UTF8CharacterLength(@Input[1]);
|
||||
l := UTF8CodepointSize(@Input[1]);
|
||||
if l < 1 then Break;
|
||||
CommandProcessor(ecChar, copy(Input, 1, l), nil);
|
||||
delete(Input, 1, l);
|
||||
|
@ -542,7 +542,7 @@ begin
|
||||
if (p^ in WFCAllowedChars) and (p[1] in WFCAllowedChars)
|
||||
and (p[2] in WFCAllowedChars) then
|
||||
break; // the next three are normal characters -> stop encoding as base64
|
||||
CharLen:=UTF8CharacterLength(p);
|
||||
CharLen:=UTF8CodepointSize(p);
|
||||
{$ifdef VerboseWikiFileCode}
|
||||
writeln('UTF8ToWikiFileCode sequence UTF8CharLen=',CharLen);
|
||||
{$endif}
|
||||
@ -706,7 +706,7 @@ begin
|
||||
end else
|
||||
raise Exception.Create('invalid wiki file code: invalid character');
|
||||
until false;
|
||||
if FindInvalidUTF8Character(PChar(Result),length(Result))>=0 then
|
||||
if FindInvalidUTF8Codepoint(PChar(Result),length(Result))>=0 then
|
||||
raise Exception.Create('invalid wiki file code: result is not UTF-8');
|
||||
end;
|
||||
|
||||
|
@ -1645,8 +1645,8 @@ begin
|
||||
end;
|
||||
end;
|
||||
end else begin
|
||||
CharLen:=UTF8CharacterLength(PageP);
|
||||
UpCharLen:=UTF8CharacterLength(PageUpP);
|
||||
CharLen:=UTF8CodepointSize(PageP);
|
||||
UpCharLen:=UTF8CodepointSize(PageUpP);
|
||||
if (CharLen>1) or (PageP^ in ['a'..'z','A'..'Z']) then begin
|
||||
if (CharLen=UpCharLen) and CompareMem(PageP,PageUpP,CharLen) then
|
||||
CaseFlags:=CaseFlags+'u'
|
||||
|
@ -347,7 +347,7 @@ begin
|
||||
//debugln(['TextToHTMLSnipped phrase "',Phrase,'" found at ',LoTxtP-PChar(LoTxt)]);
|
||||
CurPhraseP:=PChar(Phrase);
|
||||
while (CurPhraseP^<>#0) do begin
|
||||
l:=UTF8CharacterLength(CurPhraseP);
|
||||
l:=UTF8CodepointSize(CurPhraseP);
|
||||
inc(LoTxtP,l);
|
||||
inc(CurPhraseP,l);
|
||||
BoldP^+=1;
|
||||
@ -356,7 +356,7 @@ begin
|
||||
continue;
|
||||
end;
|
||||
end;
|
||||
inc(LoTxtP,UTF8CharacterLength(LoTxtP));
|
||||
inc(LoTxtP,UTF8CodepointSize(LoTxtP));
|
||||
inc(BoldP);
|
||||
end;
|
||||
end;
|
||||
@ -367,7 +367,7 @@ begin
|
||||
BoldP:=Bold;
|
||||
while LoTxtP^<>#0 do begin
|
||||
dbgout([' ',dbgstr(LoTxtP^),':',BoldP^]);
|
||||
inc(LoTxtP,UTF8CharacterLength(LoTxtP));
|
||||
inc(LoTxtP,UTF8CodepointSize(LoTxtP));
|
||||
inc(BoldP);
|
||||
end;
|
||||
debugln;
|
||||
@ -447,7 +447,7 @@ begin
|
||||
ReplaceSubstring(Result,i,1,'>');
|
||||
inc(i,length('>'));
|
||||
end else
|
||||
inc(i,UTF8CharacterLength(@Result[i]));
|
||||
inc(i,UTF8CodepointSize(@Result[i]));
|
||||
inc(BoldP);
|
||||
end;
|
||||
if IsBold then
|
||||
|
@ -307,7 +307,7 @@ begin
|
||||
if (p = 0) or (p = Length(aText)) then Break;
|
||||
if aText[p+1] <> '&' then // '&&' is reduced to '&' by widgetset GUI.
|
||||
begin
|
||||
UTF8Len := UTF8CharacterLength(@aText[p+1]);
|
||||
UTF8Len := UTF8CodepointSize(@aText[p+1]);
|
||||
accelStr := UTF8UpperCase(Copy(aText, p+1, UTF8Len)); // force uppercase
|
||||
// ToDo: Use the whole UTF-8 character in accelStr. How?
|
||||
aShortcut := KeyToShortCut(Ord(accelStr[1]),
|
||||
|
@ -993,7 +993,7 @@ begin
|
||||
ExceptMsg := AExceptionText;
|
||||
// if AExceptionText is not a valid UTF8 string,
|
||||
// then assume it has the ansi encoding and convert it
|
||||
if FindInvalidUTF8Character(pchar(ExceptMsg),length(ExceptMsg)) > 0 then
|
||||
if FindInvalidUTF8Codepoint(pchar(ExceptMsg),length(ExceptMsg)) > 0 then
|
||||
ExceptMsg := AnsiToUtf8(ExceptMsg);
|
||||
msg := Format(lisProjectSRaisedExceptionClassSWithMessageSS,
|
||||
[GetTitle, AExceptionClass, LineEnding, ExceptMsg]);
|
||||
|
@ -293,7 +293,7 @@ begin
|
||||
CondSynEdit.GetWordBoundsAtRowCol(XY,StartX,EndX);
|
||||
if EndX<=XY.X then exit;
|
||||
Line := CondSynEdit.Lines[XY.Y - 1];
|
||||
inc(XY.X,UTF8CharacterLength(@Line[XY.X-1]));
|
||||
inc(XY.X,UTF8CodepointSize(@Line[XY.X-1]));
|
||||
CondSynEdit.LogicalCaretXY:=XY;
|
||||
end;
|
||||
|
||||
|
@ -367,7 +367,7 @@ begin
|
||||
l := length(ExtractFileExt(NewName));
|
||||
if (l > 0) and (l+1 < Length(NewName)) then
|
||||
NewName := Copy(NewName, 1, Length(NewName) - l);
|
||||
l := UTF8CharacterLength(PChar(NewName));
|
||||
l := UTF8CodepointSize(PChar(NewName));
|
||||
if l > 0 then
|
||||
NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName));
|
||||
|
||||
|
@ -545,7 +545,7 @@ begin
|
||||
l := length(ExtractFileExt(NewName));
|
||||
if (l > 0) and (l+1 < Length(NewName)) then
|
||||
NewName := Copy(NewName, 1, Length(NewName) - l);
|
||||
l := UTF8CharacterLength(PChar(NewName));
|
||||
l := UTF8CodepointSize(PChar(NewName));
|
||||
if l > 0 then
|
||||
NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName));
|
||||
|
||||
|
@ -136,7 +136,7 @@ begin
|
||||
s := Trim(s);
|
||||
{$ifdef windows}
|
||||
//cfg file is made by Windows installer and probably is Windows default codepage
|
||||
if FindInvalidUTF8Character(PChar(s), Length(s), True) > 0 then
|
||||
if FindInvalidUTF8Codepoint(PChar(s), Length(s), True) > 0 then
|
||||
s := WinCPToUtf8(s);
|
||||
{$endif windows}
|
||||
ParamsAndCfgFileContent.Add(s)
|
||||
|
@ -1630,7 +1630,7 @@ function TabsToSpaces(const s: string; TabWidth: integer; UseUTF8: boolean): str
|
||||
Dest[DestPos]:=Src[SrcPos];
|
||||
inc(PhysicalX);
|
||||
if UseUTF8 then
|
||||
CharLen:=UTF8CharacterLength(@s[SrcPos])
|
||||
CharLen:=UTF8CodepointSize(@s[SrcPos])
|
||||
else
|
||||
CharLen:=1;
|
||||
for i:=1 to CharLen do begin
|
||||
|
@ -2556,7 +2556,7 @@ begin
|
||||
if LogCaret.Y>=Editor.Lines.Count then exit;
|
||||
Line:=Editor.Lines[LogCaret.Y-1];
|
||||
if LogCaret.X>length(Line) then exit;
|
||||
CharLen:=UTF8CharacterLength(@Line[LogCaret.X]);
|
||||
CharLen:=UTF8CodepointSize(@Line[LogCaret.X]);
|
||||
AddPrefix:=copy(Line,LogCaret.X,CharLen);
|
||||
NewPrefix:=CurrentString+AddPrefix;
|
||||
//debugln('TSourceNotebook.OnSynCompletionNextChar NewPrefix="',NewPrefix,'" LogCaret.X=',dbgs(LogCaret.X));
|
||||
|
@ -1570,7 +1570,7 @@ var
|
||||
begin
|
||||
if AppNoExceptionMessages in FFlags then exit;
|
||||
Msg := E.Message;
|
||||
if FindInvalidUTF8Character(PChar(Msg), Length(Msg)) > 0 then
|
||||
if FindInvalidUTF8Codepoint(PChar(Msg), Length(Msg)) > 0 then
|
||||
Msg := AnsiToUtf8(Msg);
|
||||
if (Msg <> '') and (Msg[length(Msg)] <> '.') then Msg := Msg + '.';
|
||||
if (not Terminated) and (Self <> nil) and (AppInitialized in FFlags) then
|
||||
|
@ -5758,7 +5758,7 @@ var
|
||||
Result:=LineStart;
|
||||
LineWidth:=0;
|
||||
repeat
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
CharWidth:=GetLineWidthInPixel(Result,charLen);
|
||||
inc(LineWidth,CharWidth);
|
||||
if LineWidth>MaxWidthInPixel then break;
|
||||
@ -5767,7 +5767,7 @@ var
|
||||
until false;
|
||||
// at least one char
|
||||
if Result=LineStart then begin
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
inc(Result,charLen);
|
||||
end;
|
||||
end;
|
||||
|
@ -6272,7 +6272,7 @@ var
|
||||
Result:=LineStart;
|
||||
LineWidth:=0;
|
||||
repeat
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
CharWidth:=GetLineWidthInPixel(Result,charLen);
|
||||
inc(LineWidth,CharWidth);
|
||||
if LineWidth>MaxWidthInPixel then break;
|
||||
@ -6281,7 +6281,7 @@ var
|
||||
until false;
|
||||
// at least one char
|
||||
if Result=LineStart then begin
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
inc(Result,charLen);
|
||||
end;
|
||||
end;
|
||||
|
@ -2658,7 +2658,7 @@ var
|
||||
Points[0].cX := LeftPos + Points[0].cX;
|
||||
Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1;
|
||||
|
||||
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]);
|
||||
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]);
|
||||
Points[1].cX := Points[0].cX + Points[1].cX;
|
||||
Points[1].cY := Points[0].cY;
|
||||
|
||||
@ -3796,7 +3796,7 @@ var
|
||||
CurScreenX := X;
|
||||
while CurCount > 0 do
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(CurStr);
|
||||
CharLen := UTF8CodepointSize(CurStr);
|
||||
DevCtx.DrawTextWithColors(CurStr, CharLen, CurScreenX, Y, Foreground, BackgroundColor);
|
||||
inc(CurScreenX, CurDx^);
|
||||
inc(CurDx);
|
||||
|
@ -2022,7 +2022,7 @@ var
|
||||
Result:=LineStart;
|
||||
LineWidth:=0;
|
||||
repeat
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
CharWidth:=GetLineWidthInPixel(Result,charLen);
|
||||
inc(LineWidth,CharWidth);
|
||||
if LineWidth>MaxWidthInPixel then break;
|
||||
@ -2031,7 +2031,7 @@ var
|
||||
until false;
|
||||
// at least one char
|
||||
if Result=LineStart then begin
|
||||
charLen:=UTF8CharacterLength(@AText[result]);
|
||||
charLen:=UTF8CodepointSize(@AText[result]);
|
||||
inc(Result,charLen);
|
||||
end;
|
||||
end;
|
||||
|
@ -919,7 +919,7 @@ var
|
||||
Points[0].cX := LeftPos + Points[0].cX;
|
||||
Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1;
|
||||
|
||||
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]);
|
||||
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]);
|
||||
Points[1].cX := Points[0].cX + Points[1].cX;
|
||||
Points[1].cY := Points[0].cY;
|
||||
|
||||
|
@ -3500,7 +3500,7 @@ begin
|
||||
{$endif}
|
||||
InputEvent := QInputMethodEventH(Event);
|
||||
QInputMethodEvent_commitString(InputEvent, @WStr);
|
||||
UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen);
|
||||
UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen);
|
||||
{$IFDEF VerboseQtKeys}
|
||||
writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:');
|
||||
writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar,
|
||||
|
@ -2216,7 +2216,7 @@ var
|
||||
CurX := X;
|
||||
while CurCount > 0 do
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(CurStr);
|
||||
CharLen := UTF8CodepointSize(CurStr);
|
||||
W := {%H-}Copy(CurStr, 1, CharLen);
|
||||
if AClipped then
|
||||
QtDC.drawText(CurX, Y, Rect^.Right - Rect^.Left, Rect^.Bottom - Rect^.Top,
|
||||
|
@ -3504,7 +3504,7 @@ begin
|
||||
{$endif}
|
||||
InputEvent := QInputMethodEventH(Event);
|
||||
QInputMethodEvent_commitString(InputEvent, @WStr);
|
||||
UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen);
|
||||
UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen);
|
||||
{$IFDEF VerboseQtKeys}
|
||||
writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:');
|
||||
writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar,
|
||||
|
@ -2182,7 +2182,7 @@ var
|
||||
CurX := X;
|
||||
while CurCount > 0 do
|
||||
begin
|
||||
CharLen := UTF8CharacterLength(CurStr);
|
||||
CharLen := UTF8CodepointSize(CurStr);
|
||||
if AClipped then
|
||||
begin
|
||||
W := GetUTF8String(Copy(CurStr, 1, CharLen));
|
||||
|
@ -336,10 +336,10 @@ function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
|
||||
function UnicodeToUTF16(u: cardinal): UTF16String;
|
||||
|
||||
{$IFDEF EnableWrapperFunctions}
|
||||
function UTF8CharacterLength(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointSize(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8Length(const s: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UnicodeToUTF8(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UnicodeToUTF8SkipErrors(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UnicodeToUTF8(u: cardinal): shortstring; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
@ -348,11 +348,11 @@ function UTF8ToDoubleByte(UTF8Str: PChar; Len: PtrInt; DBStr: PByte): PtrInt; in
|
||||
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
|
||||
BytePos: integer): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
// find the n-th UTF8 character, ignoring BIDI
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
procedure UTF8FixBroken(P: PChar); inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CharacterStrictLength(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CodepointStrictSize(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
@ -360,7 +360,7 @@ procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt); inline;
|
||||
procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt); inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8LowerCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function UTF8UpperCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
|
||||
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt;
|
||||
StopOnNonASCII: Boolean = true): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
function ValidUTF8String(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
|
||||
|
||||
@ -2744,9 +2744,9 @@ begin
|
||||
end;
|
||||
|
||||
{$IFDEF EnableWrapperFunctions}
|
||||
function UTF8CharacterLength(p: PChar): integer;
|
||||
function UTF8CodepointSize(p: PChar): integer;
|
||||
begin
|
||||
Result := LazUTF8.UTF8CharacterLength(p);
|
||||
Result := LazUTF8.UTF8CodepointSize(p);
|
||||
end;
|
||||
|
||||
function UTF8Length(const s: string): PtrInt;
|
||||
@ -2759,9 +2759,9 @@ begin
|
||||
Result := LazUTF8.UTF8Length(p, ByteCount);
|
||||
end;
|
||||
|
||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||
begin
|
||||
Result := LazUTF8.UTF8CharacterToUnicode(p, CharLen);
|
||||
Result := LazUTF8.UTF8CodepointToUnicode(p, CharLen);
|
||||
end;
|
||||
|
||||
function UnicodeToUTF8(u: cardinal; Buf: PChar): integer;
|
||||
@ -2803,14 +2803,14 @@ end;
|
||||
|
||||
This function is similar to UTF8FindNearestCharStart
|
||||
}
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
begin
|
||||
Result := LazUTF8.UTF8CharStart(UTF8Str, Len, CharIndex);
|
||||
Result := LazUTF8.UTF8CodepointStart(UTF8Str, Len, CharIndex);
|
||||
end;
|
||||
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
begin
|
||||
Result := LazUTF8.UTF8CharToByteIndex(UTF8Str, Len, CharIndex);
|
||||
Result := LazUTF8.UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex);
|
||||
end;
|
||||
|
||||
{ fix any broken UTF8 sequences with spaces }
|
||||
@ -2819,9 +2819,9 @@ begin
|
||||
LazUTF8.UTF8FixBroken(P);
|
||||
end;
|
||||
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
function UTF8CodepointStrictSize(P: PChar): integer;
|
||||
begin
|
||||
Result := LazUTF8.UTF8CharacterStrictLength(P);
|
||||
Result := LazUTF8.UTF8CodepointStrictSize(P);
|
||||
end;
|
||||
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||
@ -2859,11 +2859,11 @@ begin
|
||||
Result := LazUTF8.UTF8UpperCase(s);
|
||||
end;
|
||||
|
||||
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
|
||||
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt;
|
||||
StopOnNonASCII: Boolean): PtrInt;
|
||||
// return -1 if ok
|
||||
begin
|
||||
Result := LazUTF8.FindInvalidUTF8Character(p, Count, StopOnNonASCII);
|
||||
Result := LazUTF8.FindInvalidUTF8Codepoint(p, Count, StopOnNonASCII);
|
||||
end;
|
||||
|
||||
function ValidUTF8String(const s: String): String;
|
||||
|
@ -372,9 +372,9 @@ var
|
||||
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
|
||||
begin
|
||||
Result := '';
|
||||
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||
p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||
//determine the length in bytes of this UTF-8 character
|
||||
PLen := UTF8CharacterLength(p);
|
||||
PLen := UTF8CodepointSize(p);
|
||||
Res := p;
|
||||
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
|
||||
SetLength(Res,PLen);
|
||||
|
@ -2352,7 +2352,7 @@ var
|
||||
Result := LineStart;
|
||||
LineWidth := 0;
|
||||
repeat
|
||||
charLen := UTF8CharacterLength(@AText[Result]);
|
||||
charLen := UTF8CodepointSize(@AText[Result]);
|
||||
CharWidth := TextWidth(MidStr(AText, Result, charLen));
|
||||
Inc(LineWidth, CharWidth);
|
||||
if LineWidth > MaxWidthInPixel then
|
||||
@ -2364,7 +2364,7 @@ var
|
||||
// at least one char
|
||||
if Result = LineStart then
|
||||
begin
|
||||
charLen := UTF8CharacterLength(@AText[Result]);
|
||||
charLen := UTF8CodepointSize(@AText[Result]);
|
||||
Inc(Result, charLen);
|
||||
end;
|
||||
end;
|
||||
|
@ -142,7 +142,7 @@ begin
|
||||
for i:=0 to $10FFFF do
|
||||
begin
|
||||
s:=UnicodeToUTF8(i);
|
||||
u:=UTF8CharacterToUnicode(PChar(s), dum);
|
||||
u:=UTF8CodepointToUnicode(PChar(s), dum);
|
||||
AssertEquals('got (hexidecimal): ' + InttoHex(u,6), i, u);
|
||||
end;
|
||||
end;
|
||||
|
@ -66,7 +66,7 @@ procedure TTestLazUTF8.TestFindInvalidUTF8;
|
||||
var
|
||||
Actual: PtrInt;
|
||||
begin
|
||||
Actual:=FindInvalidUTF8Character(PChar(s),length(s));
|
||||
Actual:=FindInvalidUTF8Codepoint(PChar(s),length(s));
|
||||
AssertEquals(Title+': '+dbgMemRange(Pointer(s),length(s)),Expected,Actual);
|
||||
end;
|
||||
|
||||
|
@ -44,7 +44,7 @@ procedure TTestLConvEncoding.Test_CP_UTF8_CP;
|
||||
AssertEquals('CodePage '+CodePageName+' to UTF8 creates empty string for character #'+IntToStr(ord(c)),true,false);
|
||||
Back:=ConvertEncodingFromUTF8(AsUTF8,CodePageName,Encoded);
|
||||
if Back<>c then
|
||||
AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CharacterToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back));
|
||||
AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CodepointToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back));
|
||||
end;
|
||||
end;
|
||||
|
||||
|
@ -182,7 +182,7 @@ begin
|
||||
s:=SortedTable[i];
|
||||
if (length(s)=1) and (ord(s[1])<=127) then begin
|
||||
end else if s<>'' then begin
|
||||
UniCode:=UTF8CharacterToUnicode(@s[1],CharLen);
|
||||
UniCode:=UTF8CodepointToUnicode(@s[1],CharLen);
|
||||
TableIndex:=StrToTableIndex(s);
|
||||
j:=1;
|
||||
while (i+j<256) do begin
|
||||
@ -191,11 +191,11 @@ begin
|
||||
' SortedTable[i]=',ToStringConstant(s),
|
||||
' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]),
|
||||
' UniCode[i]=',UniCode,
|
||||
' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen),
|
||||
' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen),
|
||||
' TableIndex[i]=',TableIndex,
|
||||
' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]),
|
||||
'');}
|
||||
if UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then
|
||||
if UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then
|
||||
break;
|
||||
if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then
|
||||
break;
|
||||
|
@ -138,7 +138,7 @@ begin
|
||||
SL.LoadFromFile(FilenameUTF8);
|
||||
s:=SL[0];
|
||||
if s<>'' then begin
|
||||
DBCSToUTF8[Index]:=UTF8CharacterToUnicode(PChar(s),CharLen);
|
||||
DBCSToUTF8[Index]:=UTF8CodepointToUnicode(PChar(s),CharLen);
|
||||
if CharLen=0 then DBCSToUTF8[Index]:=0;
|
||||
writeln(IntToStr(Index)+'='+IntToStr(DBCSToUTF8[Index])+' s='+ToStringConstant(s)+' '+IntToStr(DBCSToUTF8[Index]-DBCSToUTF8[Index-1]-1));
|
||||
end;
|
||||
@ -319,7 +319,7 @@ begin
|
||||
s:=SortedTable[i];
|
||||
if (length(s)=1) and (ord(s[1])<=127) then begin
|
||||
end else if s<>'' then begin
|
||||
UniCode:=UTF8CharacterToUnicode(@s[1],CharLen);
|
||||
UniCode:=UTF8CodepointToUnicode(@s[1],CharLen);
|
||||
TableIndex:=StrToTableIndex(s);
|
||||
j:=1;
|
||||
while (i+j<256) do begin
|
||||
@ -328,11 +328,11 @@ begin
|
||||
' SortedTable[i]=',ToStringConstant(s),
|
||||
' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]),
|
||||
' UniCode[i]=',UniCode,
|
||||
' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen),
|
||||
' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen),
|
||||
' TableIndex[i]=',TableIndex,
|
||||
' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]),
|
||||
'');*)
|
||||
if integer(UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then
|
||||
if integer(UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then
|
||||
break;
|
||||
if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user