LazUtils: Change "Character" to "Codepoint" in LazUTF8 function names to be more accurate and to avoid confusion.

git-svn-id: trunk@56692 -
This commit is contained in:
juha 2017-12-11 19:44:22 +00:00
parent f0981a06af
commit 6810c626df
50 changed files with 194 additions and 154 deletions

View File

@ -206,7 +206,7 @@ var
begin
Result:=s;
if Result='' then exit;
i:=FindInvalidUTF8Character(PChar(Result),length(Result));
i:=FindInvalidUTF8Codepoint(PChar(Result),length(Result));
if i<0 then exit;
Result:=ISO_8859_1ToUTF8(Result);
end;

View File

@ -5341,7 +5341,7 @@ var
var
l: LongInt;
begin
l:=UTF8CharacterLength(@Src[APos]);
l:=UTF8CodepointSize(@Src[APos]);
inc(APos);
dec(l);
while (l>0) and (APos<ParsedLen) do begin

View File

@ -91,7 +91,7 @@ type
// *** Wrappers for LazUTF8 ***
function UTF8ToSys(const s: string): string; inline; deprecated 'Use the function in LazUTF8 unit';
function SysToUTF8(const s: string): string; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CharacterLength(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointSize(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
// environment
function ParamStrUTF8(Param: Integer): string; inline; deprecated 'Use the function in LazUTF8 unit';
function GetEnvironmentStringUTF8(Index : Integer): String; inline; deprecated 'Use the function in LazUTF8 unit';
@ -380,9 +380,9 @@ begin
Result:=LazUTF8.SysToUTF8(s);
end;
function UTF8CharacterLength(p: PChar): integer;
function UTF8CodepointSize(p: PChar): integer;
begin
Result:=LazUTF8.UTF8CharacterLength(p);
Result:=LazUTF8.UTF8CodepointSize(p);
end;
function ParamStrUTF8(Param: Integer): string;
@ -1700,7 +1700,7 @@ function FilenameIsMatching(const Mask, Filename: string; MatchExactly: boolean
{$ENDIF}
if FileP^ in [#0,PathDelim] then exit;
inc(MaskP);
inc(FileP,LazUTF8.UTF8CharacterLength(FileP));
inc(FileP,LazUTF8.UTF8CodepointSize(FileP));
end;
'*':
begin
@ -1808,8 +1808,8 @@ function FilenameIsMatching(const Mask, Filename: string; MatchExactly: boolean
while not (MaskP^ in [#0,SpecialChar,PathDelim,'?','*','{',',','}']) do
begin
if FileP^ in [#0,PathDelim] then exit;
inc(MaskP,LazUTF8.UTF8CharacterLength(MaskP));
inc(FileP,LazUTF8.UTF8CharacterLength(FileP));
inc(MaskP,LazUTF8.UTF8CodepointSize(MaskP));
inc(FileP,LazUTF8.UTF8CodepointSize(FileP));
end;
if LazFileUtils.CompareFilenames(MaskStart,MaskP-MaskStart,FileStart,FileP-FileStart)<>0 then
exit;

View File

@ -214,7 +214,7 @@ begin
if AString[n] = '''' then Result := Result + '''';
end;
#192..#255: begin // Maybe utf8
u := UTF8CharacterLength(@AString[n]);
u := UTF8CodepointSize(@AString[n]);
if (u > 0) and (n+u-1 <= l) then begin
if not InString then
ToggleInString;

View File

@ -964,7 +964,7 @@ begin
Result := 0;
SetLength(Desc, Length(S));
while i<=Length(s) do begin
b := UTF8CharacterStrictLength(@S[i]);
b := UTF8CodepointStrictSize(@S[i]);
inc(i,b);
inc(Result);
Desc[Result] := Char(b);
@ -1096,7 +1096,7 @@ begin
SpcSize := 0;
while i<=Length(str) do
begin
b := UTF8CharacterStrictLength(@Str[i]);
b := UTF8CodepointStrictSize(@Str[i]);
spc := (b=1) and (str[i]=' ');
inc(len);
j := Length(result)-1;

View File

@ -217,7 +217,7 @@ begin
end
else
begin
Unicode := UTF8CharacterToUnicode(Src, CharLen);
Unicode := UTF8CodepointToUnicode(Src, CharLen);
Inc(Src, CharLen);
i := UTF8CharConvFunc(Unicode);
//writeln(Format('%X', [i]));

View File

@ -544,8 +544,8 @@ begin
exit;
end;
charlen := UTF8CharacterLength(pstr);
glyphCode := UTF8CharacterToUnicode(pstr, charlen);
charlen := UTF8CodepointSize(pstr);
glyphCode := UTF8CodepointToUnicode(pstr, charlen);
inc(pstr,charlen);
glyphWidth := CharWidthFromUnicode(glyphCode);
@ -1487,7 +1487,7 @@ begin
left := length(AText);
while left > 0 do
begin
charcode := UTF8CharacterToUnicode(pstr, charlen);
charcode := UTF8CodepointToUnicode(pstr, charlen);
inc(pstr,charlen);
dec(left,charlen);
g := Glyph[CharIndex[charcode]];
@ -1572,7 +1572,7 @@ begin
left := length(AText);
while left > 0 do
begin
charcode := UTF8CharacterToUnicode(pstr, charlen);
charcode := UTF8CodepointToUnicode(pstr, charlen);
inc(pstr,charlen);
dec(left,charlen);
g := Glyph[CharIndex[charcode]];
@ -1647,7 +1647,7 @@ begin
resultIndex := 0;
while left > 0 do
begin
charcode := UTF8CharacterToUnicode(pstr, charlen);
charcode := UTF8CodepointToUnicode(pstr, charlen);
inc(pstr,charlen);
dec(left,charlen);
@ -1757,7 +1757,7 @@ begin
if left <= 0 then break;
end;
end;
charcode := UTF8CharacterToUnicode(pstr, charlen);
charcode := UTF8CodepointToUnicode(pstr, charlen);
inc(pstr,charlen);
dec(left,charlen);
g := Glyph[CharIndex[charcode]];

View File

@ -507,15 +507,15 @@ var
DstP:=PChar(DstChars);
Item:=List;
for i:=1 to Count do begin
Item^.SrcLen:=UTF8CharacterLength(SrcP);
Item^.SrcLen:=UTF8CodepointSize(SrcP);
Move(SrcP^,Item^.Src[0],Item^.SrcLen);
if (DstP^<>#0) or (DstP-PChar(DstChars)<length(DstChars)) then begin
Item^.DstLen:=UTF8CharacterLength(DstP);
Item^.DstLen:=UTF8CodepointSize(DstP);
Move(DstP^,Item^.Dst[0],Item^.DstLen);
end;
inc(Item);
inc(SrcP,UTF8CharacterLength(SrcP));
inc(DstP,UTF8CharacterLength(DstP));
inc(SrcP,UTF8CodepointSize(SrcP));
inc(DstP,UTF8CodepointSize(DstP));
end;
end;
@ -565,7 +565,7 @@ var
while true do begin
c:=Src^;
if (c=#0) and (Src-PChar(s)=length(s)) then break;
clen:=UTF8CharacterLength(Src);
clen:=UTF8CodepointSize(Src);
NewCharP:=Src;
NewCharLen:=clen;
// do a quick test via Pos
@ -613,7 +613,7 @@ var
while true do begin
c:=p^;
if (c=#0) and (p-PChar(s)=length(s)) then break;
clen:=UTF8CharacterLength(p);
clen:=UTF8CodepointSize(p);
// do a quick test via Pos
i:=Pos(c,SrcChars);
if i>0 then begin

View File

@ -187,7 +187,7 @@ begin
else
Result := 1
{$ELSE}
Result := UTF8CharacterLengthFast(p);
Result := UTF8CodepointSizeFast(p);
{$ENDIF}
end;

View File

@ -1031,7 +1031,7 @@ begin
case s[SrcPos] of
#192..#240:
begin
OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen);
OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
NewCode:=UnicodeLowercase(OldCode);
if NewCode=OldCode then begin
inc(DstPos,CharLen);
@ -1054,7 +1054,7 @@ begin
case s[SrcPos] of
#192..#240:
begin
OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen);
OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen);
NewCode:=UnicodeLowercase(OldCode);
if NewCode=OldCode then begin
System.Move(s[SrcPos],Result[DstPos],CharLen);
@ -1107,7 +1107,7 @@ begin
#192..#240: // Now chars with multiple bytes
begin
OldCode:=UTF8CharacterToUnicode(p,CharLen);
OldCode:=UTF8CodepointToUnicode(p,CharLen);
NewCode:=UnicodeLowercase(OldCode);
if NewCode<>OldCode then begin
if not Changed then begin

View File

@ -73,9 +73,11 @@ function GetEnvironmentVariableUTF8(const EnvVar: string): String;
function SysErrorMessageUTF8(ErrorCode: Integer): String;
// Returns the size of one codepoint in bytes.
function UTF8CharacterLength(p: PChar): integer; inline;
// Fast version of UTF8CharacterLength. Assumes the UTF-8 codepoint is valid.
function UTF8CharacterLengthFast(p: PChar): integer; inline;
function UTF8CodepointSize(p: PChar): integer; inline;
function UTF8CharacterLength(p: PChar): integer; deprecated 'Use UTF8CodepointSize instead.';
// Fast version of UTF8CodepointSize. Assumes the UTF-8 codepoint is valid.
function UTF8CodepointSizeFast(p: PChar): integer; inline;
function UTF8CharacterLengthFast(p: PChar): integer; deprecated 'Use UTF8CodepointSizeFast instead.';
function UTF8Length(const s: string): PtrInt; inline;
function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
@ -84,7 +86,8 @@ function UTF8LengthFast(const s: string): PtrInt; inline;
function UTF8LengthFast(p: PChar; ByteCount: PtrInt): PtrInt;
// Functions dealing with unicode number U+xxx.
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; deprecated 'Use UTF8CodepointToUnicode instead.';
function UnicodeToUTF8(CodePoint: cardinal): string; // UTF32 to UTF8
function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8
function UnicodeToUTF8SkipErrors(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8
@ -95,13 +98,16 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt;
BytePos: SizeInt): SizeInt;
function Utf8TryFindCodepointStart(AString: PChar; var CurPos: PChar; out CharLen: Integer): Boolean;
function Utf8TryFindCodepointStart(const AString: String; var Index: Integer; out CharLen: Integer): Boolean;
// find the n-th UTF8 character, ignoring BIDI
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
// find the n-th UTF8 codepoint, ignoring BIDI
function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar;
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; deprecated 'Use UTF8CodepointStart instead.';
// find the byte index of the n-th UTF8 codepoint, ignoring BIDI (byte len of substr)
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; deprecated 'Use UTF8CodepointToByteIndex instead.';
procedure UTF8FixBroken(P: PChar); overload;
procedure UTF8FixBroken(var S: string); overload;
function UTF8CharacterStrictLength(P: PChar): integer;
function UTF8CodepointStrictSize(P: PChar): integer;
function UTF8CharacterStrictLength(P: PChar): integer; deprecated 'Use UTF8CodepointStrictSize instead.';
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
@ -124,8 +130,8 @@ function UTF8LowerString(const s: string): string;
function UTF8UpperCase(const AInStr: string; ALanguage: string=''): string;
function UTF8UpperString(const s: string): string;
function UTF8SwapCase(const AInStr: string; ALanguage: string=''): string;
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
StopOnNonUTF8: Boolean = true): PtrInt;
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; deprecated 'Use FindInvalidUTF8Codepoint instead.';
function UTF8StringOfChar(AUtf8Char: String; N: Integer): String;
function UTF8AddChar(AUtf8Char: String; const S: String; N: Integer): String;
function UTF8AddCharR(AUtf8Char: String; const S: String; N: Integer): String;
@ -395,7 +401,7 @@ begin
Result := SysToUTF8(SysUtils.SysErrorMessage(ErrorCode));
end;
function UTF8CharacterLengthFull(p: PChar): integer;
function UTF8CodepointSizeFull(p: PChar): integer;
begin
case p^ of
#0..#191: // %11000000
@ -433,14 +439,19 @@ begin
end;
end;
function UTF8CharacterLength(p: PChar): integer; inline;
function UTF8CodepointSize(p: PChar): integer; inline;
begin
if p=nil then exit(0);
if p^<#192 then exit(1);
Result:=UTF8CharacterLengthFull(p);
Result:=UTF8CodepointSizeFull(p);
end;
function UTF8CharacterLengthFast(p: PChar): integer;
function UTF8CharacterLength(p: PChar): integer;
begin
Result := UTF8CodepointSize(p);
end;
function UTF8CodepointSizeFast(p: PChar): integer;
begin
case p^ of
#0..#191 : Result := 1;
@ -459,6 +470,11 @@ begin
end;
end;
function UTF8CharacterLengthFast(p: PChar): integer;
begin
Result := UTF8CodepointSizeFast(p);
end;
function UTF8Length(const s: string): PtrInt;
begin
Result:=UTF8Length(PChar(s),length(s));
@ -471,7 +487,7 @@ begin
Result:=0;
while (ByteCount>0) do begin
inc(Result);
CharLen:=UTF8CharacterLength(p);
CharLen:=UTF8CodepointSize(p);
inc(p,CharLen);
dec(ByteCount,CharLen);
end;
@ -534,7 +550,7 @@ begin
Result := ByteCount - Result;
end;
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
{ if p=nil then CharLen=0 otherwise CharLen>0
If there is an encoding error the Result is 0 and CharLen=1.
Use UTF8FixBroken to fix UTF-8 encoding.
@ -608,6 +624,11 @@ begin
end;
end;
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
begin
Result := UTF8CodepointToUnicode(p, CharLen);
end;
function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer;
procedure RaiseInvalidUnicode;
@ -697,7 +718,7 @@ begin
DestPos:=DBStr;
Result:=0;
while Len>0 do begin
u:=UTF8CharacterToUnicode(SrcPos,CharLen);
u:=UTF8CodepointToUnicode(SrcPos,CharLen);
DestPos^:=byte((u shr 8) and $ff);
inc(DestPos);
DestPos^:=byte(u and $ff);
@ -728,26 +749,26 @@ begin
if (not (Assigned(AString) and Assigned(CurPos)))
or (CurPos < AString) then Exit;
SavedPos := CurPos;
//Note: UTF8CharacterStrictLength will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings
CharLen := UTF8CharacterStrictLength(CurPos);
//Note: UTF8CodepointStrictSize will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings
CharLen := UTF8CodepointStrictSize(CurPos);
if (CharLen > 0) then Exit(True);
if (CurPos > AString) then
begin
Dec(CurPos); //-1
//is it second byte of 2..4 byte codepoint?
CharLen := UTF8CharacterStrictLength(CurPos);
CharLen := UTF8CodepointStrictSize(CurPos);
if (CharLen > 1) then Exit(True);
if (CurPos > AString) then
begin
Dec(CurPos); //-2
//is it third byte of 3..4 byte codepoint?
CharLen := UTF8CharacterStrictLength(CurPos);
CharLen := UTF8CodepointStrictSize(CurPos);
if (CharLen > 2) then Exit(True);
if (CurPos > AString) then
begin
Dec(CurPos); //-3
//is it fouth byte of 4 byte codepoint?
CharLen := UTF8CharacterStrictLength(CurPos);
CharLen := UTF8CodepointStrictSize(CurPos);
if (CharLen = 4) then Exit(True);
end;
end;
@ -784,35 +805,45 @@ end;
{ Len is the length in bytes of UTF8Str
CharIndex is the position of the desired char (starting at 0), in chars
CodepointIndex is the position of the desired codepoint (starting at 0), in chars
}
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar;
var
CharLen: LongInt;
begin
Result:=UTF8Str;
if Result<>nil then begin
while (CharIndex>0) and (Len>0) do begin
CharLen:=UTF8CharacterLength(Result);
while (CodepointIndex>0) and (Len>0) do begin
CharLen:=UTF8CodepointSize(Result);
dec(Len,CharLen);
dec(CharIndex);
dec(CodepointIndex);
inc(Result,CharLen);
end;
if (CharIndex<>0) or (Len<0) then
if (CodepointIndex<>0) or (Len<0) then
Result:=nil;
end;
end;
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
begin
Result := UTF8CodepointStart(UTF8Str, Len, CharIndex);
end;
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
var
p: PChar;
begin
p := UTF8CharStart(UTF8Str, Len, CharIndex);
p := UTF8CodepointStart(UTF8Str, Len, CharIndex);
if p = nil
then Result := -1
else Result := p - UTF8Str;
end;
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
begin
Result := UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex);
end;
{ fix any broken UTF8 sequences with spaces }
procedure UTF8FixBroken(P: PChar);
var
@ -882,12 +913,12 @@ end;
procedure UTF8FixBroken(var S: string);
begin
if S='' then exit;
if FindInvalidUTF8Character(PChar(S),length(S))<0 then exit;
if FindInvalidUTF8Codepoint(PChar(S),length(S))<0 then exit;
UniqueString(S);
UTF8FixBroken(PChar(S));
end;
function UTF8CharacterStrictLength(P: PChar): integer;
function UTF8CodepointStrictSize(P: PChar): integer;
var
c: Char;
begin
@ -928,6 +959,11 @@ begin
exit(0);
end;
function UTF8CharacterStrictLength(P: PChar): integer;
begin
Result := UTF8CodepointStrictSize(P);
end;
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
var
Source: PChar;
@ -1008,7 +1044,7 @@ begin
else if StartPos>1 then
begin
// skip
StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
StartPosP:=UTF8CodepointStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
if StartPosP=nil then exit;
// search
p:=UTF8PosP(PChar(SearchForText),length(SearchForText),
@ -1049,12 +1085,12 @@ var
EndBytePos: PChar;
MaxBytes: PtrInt;
begin
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
if StartBytePos=nil then
Result:=''
else begin
MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos);
EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount);
EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount);
if EndBytePos=nil then
Result:=copy(s,StartBytePos-PChar(s)+1,MaxBytes)
else
@ -1088,11 +1124,11 @@ var
EndBytePos: PChar;
MaxBytes: PtrInt;
begin
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
if StartBytePos <> nil then
begin
MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos);
EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount);
EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount);
if EndBytePos=nil then
Delete(s,StartBytePos-PChar(s)+1,MaxBytes)
else
@ -1107,7 +1143,7 @@ procedure UTF8Insert(const source: UTF8String; var s: UTF8string;
var
StartBytePos: PChar;
begin
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
if StartBytePos <> nil then
Insert(source, s, StartBytePos-PChar(s)+1);
end;
@ -1117,7 +1153,7 @@ procedure UTF8Insert(const source: String; var s: String; StartCharIndex: PtrInt
var
StartBytePos: PChar;
begin
StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1);
StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1);
if StartBytePos <> nil then
Insert(source, s, StartBytePos-PChar(s)+1);
end;
@ -2424,7 +2460,7 @@ begin
{ Now everything else }
else
begin
CharLen := UTF8CharacterLength(@AInStr[InCounter]);
CharLen := UTF8CodepointSize(@AInStr[InCounter]);
CharProcessed := False;
NewCharLen := CharLen;
@ -2735,8 +2771,7 @@ begin
end;
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
StopOnNonUTF8: Boolean): PtrInt;
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean): PtrInt;
// return -1 if ok
var
CharLen: Integer;
@ -2803,6 +2838,11 @@ begin
Result:=-1;
end;
function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt;
begin
Result := FindInvalidUTF8Codepoint(p, Count, StopOnNonUTF8);
end;
function ValidUTF8String(const s: String): String; inline;
begin
Result := Utf8EscapeControlChars(s, emPascal);
@ -2848,7 +2888,7 @@ var
Ch: Char;
i: Integer;
begin
if FindInvalidUTF8Character(PChar(S), Length(S)) <> -1 then
if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then
begin
UTF8FixBroken(S);
end;
@ -3041,7 +3081,7 @@ begin
rBytePos := ByteCount + 1;
while (rBytePos > 1) do
begin
CharLen:=UTF8CharacterLength(p);
CharLen:=UTF8CodepointSize(p);
Dec(rBytePos, CharLen);
System.Move(p^, Result[rBytePos], CharLen);
Inc(p, CharLen);
@ -3093,7 +3133,7 @@ begin
P := PChar(S);
while P^ <> #0 do
begin
CharLen := UTF8CharacterLength(P);
CharLen := UTF8CodepointSize(P);
i := 1;
j := 0;
ResultLen := Length(Result);
@ -3168,7 +3208,7 @@ begin
#128..#255:
begin
if KeepAllNonASCII then break;
u:=UTF8CharacterToUnicode(p,l);
u:=UTF8CodepointToUnicode(p,l);
if (l<=1) then break; // invalid character
case u of
128..159, // C1 set of control codes
@ -3216,7 +3256,7 @@ begin
begin
if KeepAllNonASCII then break;
StartP:=UTF8FindNearestCharStart(PChar(Result),length(Result),p-PChar(Result));
u:=UTF8CharacterToUnicode(PChar(Result)+StartP,l);
u:=UTF8CodepointToUnicode(PChar(Result)+StartP,l);
if (l<=1) then break; // invalid character
case u of
128..159, // C1 set of control codes

View File

@ -7029,7 +7029,7 @@ begin
inc(Src);
dec(len);
end else begin
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
inc(Src,CharLen);
dec(len,CharLen);
i:=UTF8CharConvFunc(Unicode);
@ -7068,7 +7068,7 @@ begin
inc(Src);
dec(len);
end else begin
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
inc(Src,CharLen);
dec(len,CharLen);
if Unicode<=$ffff then begin
@ -7108,7 +7108,7 @@ begin
inc(Src);
dec(len);
end else begin
Unicode:=UTF8CharacterToUnicode(Src,CharLen);
Unicode:=UTF8CodepointToUnicode(Src,CharLen);
inc(Src,CharLen);
dec(len,CharLen);
if Unicode<=$ffff then begin
@ -7265,7 +7265,7 @@ begin
end;
inc(p);
end else begin
i:=UTF8CharacterStrictLength(p);
i:=UTF8CodepointStrictSize(p);
//DebugLn(['GuessEncoding ',i,' ',DbgStr(s[p])]);
if i=0 then begin
{$IFDEF VerboseIDEEncoding}

View File

@ -96,9 +96,9 @@ var
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
begin
Result := '';
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call
//determine the length in bytes of this UTF-8 character
PLen := UTF8CharacterLength(p);
PLen := UTF8CodepointSize(p);
Res := p;
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
SetLength(Res,PLen);

View File

@ -189,7 +189,7 @@ begin
p:=Str;
if p=nil then exit(0);
while p^<>#0 do begin
l:=UTF8CharacterLength(p);
l:=UTF8CodepointSize(p);
inc(Result);
inc(p,l);
end;

View File

@ -1362,7 +1362,7 @@ var
// po requires special characters as #number
p:=1;
while p<=length(Value) do begin
j := UTF8CharacterLength(pchar(@Value[p]));
j := UTF8CodepointSize(pchar(@Value[p]));
if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then
Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value))
else

View File

@ -17,7 +17,7 @@ begin
WideCount := Min(WideCount, StrLen);
while (CharCount < WideCount) do
begin
CharLen := UTF8CharacterLength(P);
CharLen := UTF8CodepointSize(P);
Inc(P, CharLen);
Inc(Result, CharLen);
Inc(CharCount);

View File

@ -971,7 +971,7 @@ begin
// po requires special characters as #number
p:=1;
while p<=length(Value) do begin
j := UTF8CharacterLength(pchar(@Value[p]));
j := UTF8CodepointSize(pchar(@Value[p]));
if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then
Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value))
else

View File

@ -131,7 +131,7 @@ begin
if ShowLineNumbers then s2 := Format('%4d: ',[i]);
l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin);
l := l - Length(s2); // s2 has only single byte
l := UTF8CharToByteIndex(PChar(s), length(s), l);
l := UTF8CodepointToByteIndex(PChar(s), length(s), l);
while (l > MIN_LINE_LEN) and (l < length(s)) do begin
l2 := l;
while (l2 > MIN_LINE_LEN) and
@ -144,14 +144,14 @@ begin
// find utf8 start
while (l2 > 1) and (ord(s[l2]) >= 128) and (ord(s[l2+1]) >= 128) and (ord(s[l2+1]) < 192) do
dec(l2);
if l2 = 0 then l2 := UTF8CharToByteIndex(PChar(s), length(s), MIN_LINE_LEN);
if l2 = 0 then l2 := UTF8CodepointToByteIndex(PChar(s), length(s), MIN_LINE_LEN);
Text[j] := copy(s, 1, l2);
delete(s, 1, l2);
inc(j);
Text.InsertObject(j, '', nil);
l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin);
l := l - Length(s2);
l := UTF8CharToByteIndex(PChar(s), length(s), l);
l := UTF8CodepointToByteIndex(PChar(s), length(s), l);
end;
Text[j] := s;
inc(i);

View File

@ -463,7 +463,7 @@ begin
{$IF FPC_FULLVERSION >= 20701}
if p^ <= #127 then exit;
i := UTF8CharacterLength(p);
i := UTF8CodepointSize(p);
SetLength(u, i);
// wide chars of UTF-16 <= bytes of UTF-8 string
if ConvertUTF8ToUTF16(PWideChar(u), i + 1, p, i, [toInvalidCharToSymbol], L) = trNoError

View File

@ -212,7 +212,7 @@ end; { SpaceProc }
procedure TSynTeXSyn.TextProc;
begin
fTokenID:=tkText;
inc(Run,UTF8CharacterLength(@fLine[Run]));
inc(Run,UTF8CodepointSize(@fLine[Run]));
end; { TextProc }
procedure TSynTeXSyn.LFProc;

View File

@ -211,7 +211,7 @@ begin
delete(Input, 1, 1);
Continue;
end;
l := UTF8CharacterLength(@Input[1]);
l := UTF8CodepointSize(@Input[1]);
if l < 1 then Break;
CommandProcessor(ecChar, copy(Input, 1, l), nil);
delete(Input, 1, l);

View File

@ -542,7 +542,7 @@ begin
if (p^ in WFCAllowedChars) and (p[1] in WFCAllowedChars)
and (p[2] in WFCAllowedChars) then
break; // the next three are normal characters -> stop encoding as base64
CharLen:=UTF8CharacterLength(p);
CharLen:=UTF8CodepointSize(p);
{$ifdef VerboseWikiFileCode}
writeln('UTF8ToWikiFileCode sequence UTF8CharLen=',CharLen);
{$endif}
@ -706,7 +706,7 @@ begin
end else
raise Exception.Create('invalid wiki file code: invalid character');
until false;
if FindInvalidUTF8Character(PChar(Result),length(Result))>=0 then
if FindInvalidUTF8Codepoint(PChar(Result),length(Result))>=0 then
raise Exception.Create('invalid wiki file code: result is not UTF-8');
end;

View File

@ -1645,8 +1645,8 @@ begin
end;
end;
end else begin
CharLen:=UTF8CharacterLength(PageP);
UpCharLen:=UTF8CharacterLength(PageUpP);
CharLen:=UTF8CodepointSize(PageP);
UpCharLen:=UTF8CodepointSize(PageUpP);
if (CharLen>1) or (PageP^ in ['a'..'z','A'..'Z']) then begin
if (CharLen=UpCharLen) and CompareMem(PageP,PageUpP,CharLen) then
CaseFlags:=CaseFlags+'u'

View File

@ -347,7 +347,7 @@ begin
//debugln(['TextToHTMLSnipped phrase "',Phrase,'" found at ',LoTxtP-PChar(LoTxt)]);
CurPhraseP:=PChar(Phrase);
while (CurPhraseP^<>#0) do begin
l:=UTF8CharacterLength(CurPhraseP);
l:=UTF8CodepointSize(CurPhraseP);
inc(LoTxtP,l);
inc(CurPhraseP,l);
BoldP^+=1;
@ -356,7 +356,7 @@ begin
continue;
end;
end;
inc(LoTxtP,UTF8CharacterLength(LoTxtP));
inc(LoTxtP,UTF8CodepointSize(LoTxtP));
inc(BoldP);
end;
end;
@ -367,7 +367,7 @@ begin
BoldP:=Bold;
while LoTxtP^<>#0 do begin
dbgout([' ',dbgstr(LoTxtP^),':',BoldP^]);
inc(LoTxtP,UTF8CharacterLength(LoTxtP));
inc(LoTxtP,UTF8CodepointSize(LoTxtP));
inc(BoldP);
end;
debugln;
@ -447,7 +447,7 @@ begin
ReplaceSubstring(Result,i,1,'&gt;');
inc(i,length('&gt;'));
end else
inc(i,UTF8CharacterLength(@Result[i]));
inc(i,UTF8CodepointSize(@Result[i]));
inc(BoldP);
end;
if IsBold then

View File

@ -307,7 +307,7 @@ begin
if (p = 0) or (p = Length(aText)) then Break;
if aText[p+1] <> '&' then // '&&' is reduced to '&' by widgetset GUI.
begin
UTF8Len := UTF8CharacterLength(@aText[p+1]);
UTF8Len := UTF8CodepointSize(@aText[p+1]);
accelStr := UTF8UpperCase(Copy(aText, p+1, UTF8Len)); // force uppercase
// ToDo: Use the whole UTF-8 character in accelStr. How?
aShortcut := KeyToShortCut(Ord(accelStr[1]),

View File

@ -993,7 +993,7 @@ begin
ExceptMsg := AExceptionText;
// if AExceptionText is not a valid UTF8 string,
// then assume it has the ansi encoding and convert it
if FindInvalidUTF8Character(pchar(ExceptMsg),length(ExceptMsg)) > 0 then
if FindInvalidUTF8Codepoint(pchar(ExceptMsg),length(ExceptMsg)) > 0 then
ExceptMsg := AnsiToUtf8(ExceptMsg);
msg := Format(lisProjectSRaisedExceptionClassSWithMessageSS,
[GetTitle, AExceptionClass, LineEnding, ExceptMsg]);

View File

@ -293,7 +293,7 @@ begin
CondSynEdit.GetWordBoundsAtRowCol(XY,StartX,EndX);
if EndX<=XY.X then exit;
Line := CondSynEdit.Lines[XY.Y - 1];
inc(XY.X,UTF8CharacterLength(@Line[XY.X-1]));
inc(XY.X,UTF8CodepointSize(@Line[XY.X-1]));
CondSynEdit.LogicalCaretXY:=XY;
end;

View File

@ -367,7 +367,7 @@ begin
l := length(ExtractFileExt(NewName));
if (l > 0) and (l+1 < Length(NewName)) then
NewName := Copy(NewName, 1, Length(NewName) - l);
l := UTF8CharacterLength(PChar(NewName));
l := UTF8CodepointSize(PChar(NewName));
if l > 0 then
NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName));

View File

@ -545,7 +545,7 @@ begin
l := length(ExtractFileExt(NewName));
if (l > 0) and (l+1 < Length(NewName)) then
NewName := Copy(NewName, 1, Length(NewName) - l);
l := UTF8CharacterLength(PChar(NewName));
l := UTF8CodepointSize(PChar(NewName));
if l > 0 then
NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName));

View File

@ -136,7 +136,7 @@ begin
s := Trim(s);
{$ifdef windows}
//cfg file is made by Windows installer and probably is Windows default codepage
if FindInvalidUTF8Character(PChar(s), Length(s), True) > 0 then
if FindInvalidUTF8Codepoint(PChar(s), Length(s), True) > 0 then
s := WinCPToUtf8(s);
{$endif windows}
ParamsAndCfgFileContent.Add(s)

View File

@ -1630,7 +1630,7 @@ function TabsToSpaces(const s: string; TabWidth: integer; UseUTF8: boolean): str
Dest[DestPos]:=Src[SrcPos];
inc(PhysicalX);
if UseUTF8 then
CharLen:=UTF8CharacterLength(@s[SrcPos])
CharLen:=UTF8CodepointSize(@s[SrcPos])
else
CharLen:=1;
for i:=1 to CharLen do begin

View File

@ -2556,7 +2556,7 @@ begin
if LogCaret.Y>=Editor.Lines.Count then exit;
Line:=Editor.Lines[LogCaret.Y-1];
if LogCaret.X>length(Line) then exit;
CharLen:=UTF8CharacterLength(@Line[LogCaret.X]);
CharLen:=UTF8CodepointSize(@Line[LogCaret.X]);
AddPrefix:=copy(Line,LogCaret.X,CharLen);
NewPrefix:=CurrentString+AddPrefix;
//debugln('TSourceNotebook.OnSynCompletionNextChar NewPrefix="',NewPrefix,'" LogCaret.X=',dbgs(LogCaret.X));

View File

@ -1570,7 +1570,7 @@ var
begin
if AppNoExceptionMessages in FFlags then exit;
Msg := E.Message;
if FindInvalidUTF8Character(PChar(Msg), Length(Msg)) > 0 then
if FindInvalidUTF8Codepoint(PChar(Msg), Length(Msg)) > 0 then
Msg := AnsiToUtf8(Msg);
if (Msg <> '') and (Msg[length(Msg)] <> '.') then Msg := Msg + '.';
if (not Terminated) and (Self <> nil) and (AppInitialized in FFlags) then

View File

@ -5758,7 +5758,7 @@ var
Result:=LineStart;
LineWidth:=0;
repeat
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
CharWidth:=GetLineWidthInPixel(Result,charLen);
inc(LineWidth,CharWidth);
if LineWidth>MaxWidthInPixel then break;
@ -5767,7 +5767,7 @@ var
until false;
// at least one char
if Result=LineStart then begin
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
inc(Result,charLen);
end;
end;

View File

@ -6272,7 +6272,7 @@ var
Result:=LineStart;
LineWidth:=0;
repeat
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
CharWidth:=GetLineWidthInPixel(Result,charLen);
inc(LineWidth,CharWidth);
if LineWidth>MaxWidthInPixel then break;
@ -6281,7 +6281,7 @@ var
until false;
// at least one char
if Result=LineStart then begin
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
inc(Result,charLen);
end;
end;

View File

@ -2658,7 +2658,7 @@ var
Points[0].cX := LeftPos + Points[0].cX;
Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1;
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]);
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]);
Points[1].cX := Points[0].cX + Points[1].cX;
Points[1].cY := Points[0].cY;
@ -3796,7 +3796,7 @@ var
CurScreenX := X;
while CurCount > 0 do
begin
CharLen := UTF8CharacterLength(CurStr);
CharLen := UTF8CodepointSize(CurStr);
DevCtx.DrawTextWithColors(CurStr, CharLen, CurScreenX, Y, Foreground, BackgroundColor);
inc(CurScreenX, CurDx^);
inc(CurDx);

View File

@ -2022,7 +2022,7 @@ var
Result:=LineStart;
LineWidth:=0;
repeat
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
CharWidth:=GetLineWidthInPixel(Result,charLen);
inc(LineWidth,CharWidth);
if LineWidth>MaxWidthInPixel then break;
@ -2031,7 +2031,7 @@ var
until false;
// at least one char
if Result=LineStart then begin
charLen:=UTF8CharacterLength(@AText[result]);
charLen:=UTF8CodepointSize(@AText[result]);
inc(Result,charLen);
end;
end;

View File

@ -919,7 +919,7 @@ var
Points[0].cX := LeftPos + Points[0].cX;
Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1;
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]);
GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]);
Points[1].cX := Points[0].cX + Points[1].cX;
Points[1].cY := Points[0].cY;

View File

@ -3500,7 +3500,7 @@ begin
{$endif}
InputEvent := QInputMethodEventH(Event);
QInputMethodEvent_commitString(InputEvent, @WStr);
UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen);
UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen);
{$IFDEF VerboseQtKeys}
writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:');
writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar,

View File

@ -2216,7 +2216,7 @@ var
CurX := X;
while CurCount > 0 do
begin
CharLen := UTF8CharacterLength(CurStr);
CharLen := UTF8CodepointSize(CurStr);
W := {%H-}Copy(CurStr, 1, CharLen);
if AClipped then
QtDC.drawText(CurX, Y, Rect^.Right - Rect^.Left, Rect^.Bottom - Rect^.Top,

View File

@ -3504,7 +3504,7 @@ begin
{$endif}
InputEvent := QInputMethodEventH(Event);
QInputMethodEvent_commitString(InputEvent, @WStr);
UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen);
UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen);
{$IFDEF VerboseQtKeys}
writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:');
writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar,

View File

@ -2182,7 +2182,7 @@ var
CurX := X;
while CurCount > 0 do
begin
CharLen := UTF8CharacterLength(CurStr);
CharLen := UTF8CodepointSize(CurStr);
if AClipped then
begin
W := GetUTF8String(Copy(CurStr, 1, CharLen));

View File

@ -336,10 +336,10 @@ function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal;
function UnicodeToUTF16(u: cardinal): UTF16String;
{$IFDEF EnableWrapperFunctions}
function UTF8CharacterLength(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointSize(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8Length(const s: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit';
function UnicodeToUTF8(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UnicodeToUTF8SkipErrors(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UnicodeToUTF8(u: cardinal): shortstring; inline; deprecated 'Use the function in LazUTF8 unit';
@ -348,11 +348,11 @@ function UTF8ToDoubleByte(UTF8Str: PChar; Len: PtrInt; DBStr: PByte): PtrInt; in
function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
BytePos: integer): integer; inline; deprecated 'Use the function in LazUTF8 unit';
// find the n-th UTF8 character, ignoring BIDI
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit';
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
procedure UTF8FixBroken(P: PChar); inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CharacterStrictLength(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CodepointStrictSize(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string; inline; deprecated 'Use the function in LazUTF8 unit';
@ -360,7 +360,7 @@ procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt); inline;
procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt); inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8LowerCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
function UTF8UpperCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt;
StopOnNonASCII: Boolean = true): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit';
function ValidUTF8String(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit';
@ -2744,9 +2744,9 @@ begin
end;
{$IFDEF EnableWrapperFunctions}
function UTF8CharacterLength(p: PChar): integer;
function UTF8CodepointSize(p: PChar): integer;
begin
Result := LazUTF8.UTF8CharacterLength(p);
Result := LazUTF8.UTF8CodepointSize(p);
end;
function UTF8Length(const s: string): PtrInt;
@ -2759,9 +2759,9 @@ begin
Result := LazUTF8.UTF8Length(p, ByteCount);
end;
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal;
begin
Result := LazUTF8.UTF8CharacterToUnicode(p, CharLen);
Result := LazUTF8.UTF8CodepointToUnicode(p, CharLen);
end;
function UnicodeToUTF8(u: cardinal; Buf: PChar): integer;
@ -2803,14 +2803,14 @@ end;
This function is similar to UTF8FindNearestCharStart
}
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
begin
Result := LazUTF8.UTF8CharStart(UTF8Str, Len, CharIndex);
Result := LazUTF8.UTF8CodepointStart(UTF8Str, Len, CharIndex);
end;
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
begin
Result := LazUTF8.UTF8CharToByteIndex(UTF8Str, Len, CharIndex);
Result := LazUTF8.UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex);
end;
{ fix any broken UTF8 sequences with spaces }
@ -2819,9 +2819,9 @@ begin
LazUTF8.UTF8FixBroken(P);
end;
function UTF8CharacterStrictLength(P: PChar): integer;
function UTF8CodepointStrictSize(P: PChar): integer;
begin
Result := LazUTF8.UTF8CharacterStrictLength(P);
Result := LazUTF8.UTF8CodepointStrictSize(P);
end;
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
@ -2859,11 +2859,11 @@ begin
Result := LazUTF8.UTF8UpperCase(s);
end;
function FindInvalidUTF8Character(p: PChar; Count: PtrInt;
function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt;
StopOnNonASCII: Boolean): PtrInt;
// return -1 if ok
begin
Result := LazUTF8.FindInvalidUTF8Character(p, Count, StopOnNonASCII);
Result := LazUTF8.FindInvalidUTF8Codepoint(p, Count, StopOnNonASCII);
end;
function ValidUTF8String(const s: String): String;

View File

@ -372,9 +372,9 @@ var
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
begin
Result := '';
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call
//determine the length in bytes of this UTF-8 character
PLen := UTF8CharacterLength(p);
PLen := UTF8CodepointSize(p);
Res := p;
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
SetLength(Res,PLen);

View File

@ -2352,7 +2352,7 @@ var
Result := LineStart;
LineWidth := 0;
repeat
charLen := UTF8CharacterLength(@AText[Result]);
charLen := UTF8CodepointSize(@AText[Result]);
CharWidth := TextWidth(MidStr(AText, Result, charLen));
Inc(LineWidth, CharWidth);
if LineWidth > MaxWidthInPixel then
@ -2364,7 +2364,7 @@ var
// at least one char
if Result = LineStart then
begin
charLen := UTF8CharacterLength(@AText[Result]);
charLen := UTF8CodepointSize(@AText[Result]);
Inc(Result, charLen);
end;
end;

View File

@ -142,7 +142,7 @@ begin
for i:=0 to $10FFFF do
begin
s:=UnicodeToUTF8(i);
u:=UTF8CharacterToUnicode(PChar(s), dum);
u:=UTF8CodepointToUnicode(PChar(s), dum);
AssertEquals('got (hexidecimal): ' + InttoHex(u,6), i, u);
end;
end;

View File

@ -66,7 +66,7 @@ procedure TTestLazUTF8.TestFindInvalidUTF8;
var
Actual: PtrInt;
begin
Actual:=FindInvalidUTF8Character(PChar(s),length(s));
Actual:=FindInvalidUTF8Codepoint(PChar(s),length(s));
AssertEquals(Title+': '+dbgMemRange(Pointer(s),length(s)),Expected,Actual);
end;

View File

@ -44,7 +44,7 @@ procedure TTestLConvEncoding.Test_CP_UTF8_CP;
AssertEquals('CodePage '+CodePageName+' to UTF8 creates empty string for character #'+IntToStr(ord(c)),true,false);
Back:=ConvertEncodingFromUTF8(AsUTF8,CodePageName,Encoded);
if Back<>c then
AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CharacterToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back));
AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CodepointToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back));
end;
end;

View File

@ -182,7 +182,7 @@ begin
s:=SortedTable[i];
if (length(s)=1) and (ord(s[1])<=127) then begin
end else if s<>'' then begin
UniCode:=UTF8CharacterToUnicode(@s[1],CharLen);
UniCode:=UTF8CodepointToUnicode(@s[1],CharLen);
TableIndex:=StrToTableIndex(s);
j:=1;
while (i+j<256) do begin
@ -191,11 +191,11 @@ begin
' SortedTable[i]=',ToStringConstant(s),
' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]),
' UniCode[i]=',UniCode,
' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen),
' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen),
' TableIndex[i]=',TableIndex,
' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]),
'');}
if UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then
if UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then
break;
if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then
break;

View File

@ -138,7 +138,7 @@ begin
SL.LoadFromFile(FilenameUTF8);
s:=SL[0];
if s<>'' then begin
DBCSToUTF8[Index]:=UTF8CharacterToUnicode(PChar(s),CharLen);
DBCSToUTF8[Index]:=UTF8CodepointToUnicode(PChar(s),CharLen);
if CharLen=0 then DBCSToUTF8[Index]:=0;
writeln(IntToStr(Index)+'='+IntToStr(DBCSToUTF8[Index])+' s='+ToStringConstant(s)+' '+IntToStr(DBCSToUTF8[Index]-DBCSToUTF8[Index-1]-1));
end;
@ -319,7 +319,7 @@ begin
s:=SortedTable[i];
if (length(s)=1) and (ord(s[1])<=127) then begin
end else if s<>'' then begin
UniCode:=UTF8CharacterToUnicode(@s[1],CharLen);
UniCode:=UTF8CodepointToUnicode(@s[1],CharLen);
TableIndex:=StrToTableIndex(s);
j:=1;
while (i+j<256) do begin
@ -328,11 +328,11 @@ begin
' SortedTable[i]=',ToStringConstant(s),
' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]),
' UniCode[i]=',UniCode,
' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen),
' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen),
' TableIndex[i]=',TableIndex,
' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]),
'');*)
if integer(UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then
if integer(UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then
break;
if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then
break;