diff --git a/components/aarre/src/aarrepkglist.pas b/components/aarre/src/aarrepkglist.pas index 499d8717ef..43db97f357 100644 --- a/components/aarre/src/aarrepkglist.pas +++ b/components/aarre/src/aarrepkglist.pas @@ -206,7 +206,7 @@ var begin Result:=s; if Result='' then exit; - i:=FindInvalidUTF8Character(PChar(Result),length(Result)); + i:=FindInvalidUTF8Codepoint(PChar(Result),length(Result)); if i<0 then exit; Result:=ISO_8859_1ToUTF8(Result); end; diff --git a/components/codetools/basiccodetools.pas b/components/codetools/basiccodetools.pas index bdb8155448..43966fda31 100644 --- a/components/codetools/basiccodetools.pas +++ b/components/codetools/basiccodetools.pas @@ -5341,7 +5341,7 @@ var var l: LongInt; begin - l:=UTF8CharacterLength(@Src[APos]); + l:=UTF8CodepointSize(@Src[APos]); inc(APos); dec(l); while (l>0) and (APos0 then exit; diff --git a/components/lazdebuggergdbmi/debugutils.pp b/components/lazdebuggergdbmi/debugutils.pp index 0ed6d342f3..bbcd60d083 100644 --- a/components/lazdebuggergdbmi/debugutils.pp +++ b/components/lazdebuggergdbmi/debugutils.pp @@ -214,7 +214,7 @@ begin if AString[n] = '''' then Result := Result + ''''; end; #192..#255: begin // Maybe utf8 - u := UTF8CharacterLength(@AString[n]); + u := UTF8CodepointSize(@AString[n]); if (u > 0) and (n+u-1 <= l) then begin if not InString then ToggleInString; diff --git a/components/lazreport/source/lr_utils.pas b/components/lazreport/source/lr_utils.pas index 4b1372d58c..72117a095a 100644 --- a/components/lazreport/source/lr_utils.pas +++ b/components/lazreport/source/lr_utils.pas @@ -964,7 +964,7 @@ begin Result := 0; SetLength(Desc, Length(S)); while i<=Length(s) do begin - b := UTF8CharacterStrictLength(@S[i]); + b := UTF8CodepointStrictSize(@S[i]); inc(i,b); inc(Result); Desc[Result] := Char(b); @@ -1096,7 +1096,7 @@ begin SpcSize := 0; while i<=Length(str) do begin - b := UTF8CharacterStrictLength(@Str[i]); + b := UTF8CodepointStrictSize(@Str[i]); spc := (b=1) and (str[i]=' '); inc(len); j := Length(result)-1; diff --git a/components/lazutils/asiancodepagefunctions.inc b/components/lazutils/asiancodepagefunctions.inc index b10b31a71c..a2cc78d203 100644 --- a/components/lazutils/asiancodepagefunctions.inc +++ b/components/lazutils/asiancodepagefunctions.inc @@ -217,7 +217,7 @@ begin end else begin - Unicode := UTF8CharacterToUnicode(Src, CharLen); + Unicode := UTF8CodepointToUnicode(Src, CharLen); Inc(Src, CharLen); i := UTF8CharConvFunc(Unicode); //writeln(Format('%X', [i])); diff --git a/components/lazutils/easylazfreetype.pas b/components/lazutils/easylazfreetype.pas index 1145caefd2..68ea5a717a 100644 --- a/components/lazutils/easylazfreetype.pas +++ b/components/lazutils/easylazfreetype.pas @@ -544,8 +544,8 @@ begin exit; end; - charlen := UTF8CharacterLength(pstr); - glyphCode := UTF8CharacterToUnicode(pstr, charlen); + charlen := UTF8CodepointSize(pstr); + glyphCode := UTF8CodepointToUnicode(pstr, charlen); inc(pstr,charlen); glyphWidth := CharWidthFromUnicode(glyphCode); @@ -1487,7 +1487,7 @@ begin left := length(AText); while left > 0 do begin - charcode := UTF8CharacterToUnicode(pstr, charlen); + charcode := UTF8CodepointToUnicode(pstr, charlen); inc(pstr,charlen); dec(left,charlen); g := Glyph[CharIndex[charcode]]; @@ -1572,7 +1572,7 @@ begin left := length(AText); while left > 0 do begin - charcode := UTF8CharacterToUnicode(pstr, charlen); + charcode := UTF8CodepointToUnicode(pstr, charlen); inc(pstr,charlen); dec(left,charlen); g := Glyph[CharIndex[charcode]]; @@ -1647,7 +1647,7 @@ begin resultIndex := 0; while left > 0 do begin - charcode := UTF8CharacterToUnicode(pstr, charlen); + charcode := UTF8CodepointToUnicode(pstr, charlen); inc(pstr,charlen); dec(left,charlen); @@ -1757,7 +1757,7 @@ begin if left <= 0 then break; end; end; - charcode := UTF8CharacterToUnicode(pstr, charlen); + charcode := UTF8CodepointToUnicode(pstr, charlen); inc(pstr,charlen); dec(left,charlen); g := Glyph[CharIndex[charcode]]; diff --git a/components/lazutils/laz2_xmlutils.pas b/components/lazutils/laz2_xmlutils.pas index 253e479877..fc42c26216 100644 --- a/components/lazutils/laz2_xmlutils.pas +++ b/components/lazutils/laz2_xmlutils.pas @@ -507,15 +507,15 @@ var DstP:=PChar(DstChars); Item:=List; for i:=1 to Count do begin - Item^.SrcLen:=UTF8CharacterLength(SrcP); + Item^.SrcLen:=UTF8CodepointSize(SrcP); Move(SrcP^,Item^.Src[0],Item^.SrcLen); if (DstP^<>#0) or (DstP-PChar(DstChars)0 then begin diff --git a/components/lazutils/lazunicode.pas b/components/lazutils/lazunicode.pas index c840c92826..c7e7ce2331 100644 --- a/components/lazutils/lazunicode.pas +++ b/components/lazutils/lazunicode.pas @@ -187,7 +187,7 @@ begin else Result := 1 {$ELSE} - Result := UTF8CharacterLengthFast(p); + Result := UTF8CodepointSizeFast(p); {$ENDIF} end; diff --git a/components/lazutils/lazutf16.pas b/components/lazutils/lazutf16.pas index a9aa56fb1f..8101a6249f 100644 --- a/components/lazutils/lazutf16.pas +++ b/components/lazutils/lazutf16.pas @@ -1031,7 +1031,7 @@ begin case s[SrcPos] of #192..#240: begin - OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen); + OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen); NewCode:=UnicodeLowercase(OldCode); if NewCode=OldCode then begin inc(DstPos,CharLen); @@ -1054,7 +1054,7 @@ begin case s[SrcPos] of #192..#240: begin - OldCode:=UTF8CharacterToUnicode(@s[SrcPos],CharLen); + OldCode:=UTF8CodepointToUnicode(@s[SrcPos],CharLen); NewCode:=UnicodeLowercase(OldCode); if NewCode=OldCode then begin System.Move(s[SrcPos],Result[DstPos],CharLen); @@ -1107,7 +1107,7 @@ begin #192..#240: // Now chars with multiple bytes begin - OldCode:=UTF8CharacterToUnicode(p,CharLen); + OldCode:=UTF8CodepointToUnicode(p,CharLen); NewCode:=UnicodeLowercase(OldCode); if NewCode<>OldCode then begin if not Changed then begin diff --git a/components/lazutils/lazutf8.pas b/components/lazutils/lazutf8.pas index ee26c0493f..5258521fec 100644 --- a/components/lazutils/lazutf8.pas +++ b/components/lazutils/lazutf8.pas @@ -73,9 +73,11 @@ function GetEnvironmentVariableUTF8(const EnvVar: string): String; function SysErrorMessageUTF8(ErrorCode: Integer): String; // Returns the size of one codepoint in bytes. -function UTF8CharacterLength(p: PChar): integer; inline; -// Fast version of UTF8CharacterLength. Assumes the UTF-8 codepoint is valid. -function UTF8CharacterLengthFast(p: PChar): integer; inline; +function UTF8CodepointSize(p: PChar): integer; inline; +function UTF8CharacterLength(p: PChar): integer; deprecated 'Use UTF8CodepointSize instead.'; +// Fast version of UTF8CodepointSize. Assumes the UTF-8 codepoint is valid. +function UTF8CodepointSizeFast(p: PChar): integer; inline; +function UTF8CharacterLengthFast(p: PChar): integer; deprecated 'Use UTF8CodepointSizeFast instead.'; function UTF8Length(const s: string): PtrInt; inline; function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; @@ -84,7 +86,8 @@ function UTF8LengthFast(const s: string): PtrInt; inline; function UTF8LengthFast(p: PChar; ByteCount: PtrInt): PtrInt; // Functions dealing with unicode number U+xxx. -function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; +function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; +function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; deprecated 'Use UTF8CodepointToUnicode instead.'; function UnicodeToUTF8(CodePoint: cardinal): string; // UTF32 to UTF8 function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8 function UnicodeToUTF8SkipErrors(CodePoint: cardinal; Buf: PChar): integer; // UTF32 to UTF8 @@ -95,13 +98,16 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt): SizeInt; function Utf8TryFindCodepointStart(AString: PChar; var CurPos: PChar; out CharLen: Integer): Boolean; function Utf8TryFindCodepointStart(const AString: String; var Index: Integer; out CharLen: Integer): Boolean; -// find the n-th UTF8 character, ignoring BIDI -function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; -// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr) -function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; +// find the n-th UTF8 codepoint, ignoring BIDI +function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar; +function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; deprecated 'Use UTF8CodepointStart instead.'; +// find the byte index of the n-th UTF8 codepoint, ignoring BIDI (byte len of substr) +function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; +function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; deprecated 'Use UTF8CodepointToByteIndex instead.'; procedure UTF8FixBroken(P: PChar); overload; procedure UTF8FixBroken(var S: string); overload; -function UTF8CharacterStrictLength(P: PChar): integer; +function UTF8CodepointStrictSize(P: PChar): integer; +function UTF8CharacterStrictLength(P: PChar): integer; deprecated 'Use UTF8CodepointStrictSize instead.'; function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt; @@ -124,8 +130,8 @@ function UTF8LowerString(const s: string): string; function UTF8UpperCase(const AInStr: string; ALanguage: string=''): string; function UTF8UpperString(const s: string): string; function UTF8SwapCase(const AInStr: string; ALanguage: string=''): string; -function FindInvalidUTF8Character(p: PChar; Count: PtrInt; - StopOnNonUTF8: Boolean = true): PtrInt; +function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; +function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; deprecated 'Use FindInvalidUTF8Codepoint instead.'; function UTF8StringOfChar(AUtf8Char: String; N: Integer): String; function UTF8AddChar(AUtf8Char: String; const S: String; N: Integer): String; function UTF8AddCharR(AUtf8Char: String; const S: String; N: Integer): String; @@ -395,7 +401,7 @@ begin Result := SysToUTF8(SysUtils.SysErrorMessage(ErrorCode)); end; -function UTF8CharacterLengthFull(p: PChar): integer; +function UTF8CodepointSizeFull(p: PChar): integer; begin case p^ of #0..#191: // %11000000 @@ -433,14 +439,19 @@ begin end; end; -function UTF8CharacterLength(p: PChar): integer; inline; +function UTF8CodepointSize(p: PChar): integer; inline; begin if p=nil then exit(0); if p^<#192 then exit(1); - Result:=UTF8CharacterLengthFull(p); + Result:=UTF8CodepointSizeFull(p); end; -function UTF8CharacterLengthFast(p: PChar): integer; +function UTF8CharacterLength(p: PChar): integer; +begin + Result := UTF8CodepointSize(p); +end; + +function UTF8CodepointSizeFast(p: PChar): integer; begin case p^ of #0..#191 : Result := 1; @@ -459,6 +470,11 @@ begin end; end; +function UTF8CharacterLengthFast(p: PChar): integer; +begin + Result := UTF8CodepointSizeFast(p); +end; + function UTF8Length(const s: string): PtrInt; begin Result:=UTF8Length(PChar(s),length(s)); @@ -471,7 +487,7 @@ begin Result:=0; while (ByteCount>0) do begin inc(Result); - CharLen:=UTF8CharacterLength(p); + CharLen:=UTF8CodepointSize(p); inc(p,CharLen); dec(ByteCount,CharLen); end; @@ -534,7 +550,7 @@ begin Result := ByteCount - Result; end; -function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; +function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; { if p=nil then CharLen=0 otherwise CharLen>0 If there is an encoding error the Result is 0 and CharLen=1. Use UTF8FixBroken to fix UTF-8 encoding. @@ -608,6 +624,11 @@ begin end; end; +function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; +begin + Result := UTF8CodepointToUnicode(p, CharLen); +end; + function UnicodeToUTF8(CodePoint: cardinal; Buf: PChar): integer; procedure RaiseInvalidUnicode; @@ -697,7 +718,7 @@ begin DestPos:=DBStr; Result:=0; while Len>0 do begin - u:=UTF8CharacterToUnicode(SrcPos,CharLen); + u:=UTF8CodepointToUnicode(SrcPos,CharLen); DestPos^:=byte((u shr 8) and $ff); inc(DestPos); DestPos^:=byte(u and $ff); @@ -728,26 +749,26 @@ begin if (not (Assigned(AString) and Assigned(CurPos))) or (CurPos < AString) then Exit; SavedPos := CurPos; - //Note: UTF8CharacterStrictLength will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings - CharLen := UTF8CharacterStrictLength(CurPos); + //Note: UTF8CodepointStrictSize will NOT "look" beyond the terminating #0 of a PChar, so this is safe with AnsiStrings + CharLen := UTF8CodepointStrictSize(CurPos); if (CharLen > 0) then Exit(True); if (CurPos > AString) then begin Dec(CurPos); //-1 //is it second byte of 2..4 byte codepoint? - CharLen := UTF8CharacterStrictLength(CurPos); + CharLen := UTF8CodepointStrictSize(CurPos); if (CharLen > 1) then Exit(True); if (CurPos > AString) then begin Dec(CurPos); //-2 //is it third byte of 3..4 byte codepoint? - CharLen := UTF8CharacterStrictLength(CurPos); + CharLen := UTF8CodepointStrictSize(CurPos); if (CharLen > 2) then Exit(True); if (CurPos > AString) then begin Dec(CurPos); //-3 //is it fouth byte of 4 byte codepoint? - CharLen := UTF8CharacterStrictLength(CurPos); + CharLen := UTF8CodepointStrictSize(CurPos); if (CharLen = 4) then Exit(True); end; end; @@ -784,35 +805,45 @@ end; { Len is the length in bytes of UTF8Str - CharIndex is the position of the desired char (starting at 0), in chars + CodepointIndex is the position of the desired codepoint (starting at 0), in chars } -function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; +function UTF8CodepointStart(UTF8Str: PChar; Len, CodepointIndex: PtrInt): PChar; var CharLen: LongInt; begin Result:=UTF8Str; if Result<>nil then begin - while (CharIndex>0) and (Len>0) do begin - CharLen:=UTF8CharacterLength(Result); + while (CodepointIndex>0) and (Len>0) do begin + CharLen:=UTF8CodepointSize(Result); dec(Len,CharLen); - dec(CharIndex); + dec(CodepointIndex); inc(Result,CharLen); end; - if (CharIndex<>0) or (Len<0) then + if (CodepointIndex<>0) or (Len<0) then Result:=nil; end; end; -function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; +function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; +begin + Result := UTF8CodepointStart(UTF8Str, Len, CharIndex); +end; + +function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; var p: PChar; begin - p := UTF8CharStart(UTF8Str, Len, CharIndex); + p := UTF8CodepointStart(UTF8Str, Len, CharIndex); if p = nil then Result := -1 else Result := p - UTF8Str; end; +function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; +begin + Result := UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex); +end; + { fix any broken UTF8 sequences with spaces } procedure UTF8FixBroken(P: PChar); var @@ -882,12 +913,12 @@ end; procedure UTF8FixBroken(var S: string); begin if S='' then exit; - if FindInvalidUTF8Character(PChar(S),length(S))<0 then exit; + if FindInvalidUTF8Codepoint(PChar(S),length(S))<0 then exit; UniqueString(S); UTF8FixBroken(PChar(S)); end; -function UTF8CharacterStrictLength(P: PChar): integer; +function UTF8CodepointStrictSize(P: PChar): integer; var c: Char; begin @@ -928,6 +959,11 @@ begin exit(0); end; +function UTF8CharacterStrictLength(P: PChar): integer; +begin + Result := UTF8CodepointStrictSize(P); +end; + function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; var Source: PChar; @@ -1008,7 +1044,7 @@ begin else if StartPos>1 then begin // skip - StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1); + StartPosP:=UTF8CodepointStart(PChar(SearchInText),Length(SearchInText),StartPos-1); if StartPosP=nil then exit; // search p:=UTF8PosP(PChar(SearchForText),length(SearchForText), @@ -1049,12 +1085,12 @@ var EndBytePos: PChar; MaxBytes: PtrInt; begin - StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1); + StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1); if StartBytePos=nil then Result:='' else begin MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos); - EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount); + EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount); if EndBytePos=nil then Result:=copy(s,StartBytePos-PChar(s)+1,MaxBytes) else @@ -1088,11 +1124,11 @@ var EndBytePos: PChar; MaxBytes: PtrInt; begin - StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1); + StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1); if StartBytePos <> nil then begin MaxBytes:=PtrInt(PChar(s)+length(s)-StartBytePos); - EndBytePos:=UTF8CharStart(StartBytePos,MaxBytes,CharCount); + EndBytePos:=UTF8CodepointStart(StartBytePos,MaxBytes,CharCount); if EndBytePos=nil then Delete(s,StartBytePos-PChar(s)+1,MaxBytes) else @@ -1107,7 +1143,7 @@ procedure UTF8Insert(const source: UTF8String; var s: UTF8string; var StartBytePos: PChar; begin - StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1); + StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1); if StartBytePos <> nil then Insert(source, s, StartBytePos-PChar(s)+1); end; @@ -1117,7 +1153,7 @@ procedure UTF8Insert(const source: String; var s: String; StartCharIndex: PtrInt var StartBytePos: PChar; begin - StartBytePos:=UTF8CharStart(PChar(s),length(s),StartCharIndex-1); + StartBytePos:=UTF8CodepointStart(PChar(s),length(s),StartCharIndex-1); if StartBytePos <> nil then Insert(source, s, StartBytePos-PChar(s)+1); end; @@ -2424,7 +2460,7 @@ begin { Now everything else } else begin - CharLen := UTF8CharacterLength(@AInStr[InCounter]); + CharLen := UTF8CodepointSize(@AInStr[InCounter]); CharProcessed := False; NewCharLen := CharLen; @@ -2735,8 +2771,7 @@ begin end; -function FindInvalidUTF8Character(p: PChar; Count: PtrInt; - StopOnNonUTF8: Boolean): PtrInt; +function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean): PtrInt; // return -1 if ok var CharLen: Integer; @@ -2803,6 +2838,11 @@ begin Result:=-1; end; +function FindInvalidUTF8Character(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolean = true): PtrInt; +begin + Result := FindInvalidUTF8Codepoint(p, Count, StopOnNonUTF8); +end; + function ValidUTF8String(const s: String): String; inline; begin Result := Utf8EscapeControlChars(s, emPascal); @@ -2848,7 +2888,7 @@ var Ch: Char; i: Integer; begin - if FindInvalidUTF8Character(PChar(S), Length(S)) <> -1 then + if FindInvalidUTF8Codepoint(PChar(S), Length(S)) <> -1 then begin UTF8FixBroken(S); end; @@ -3041,7 +3081,7 @@ begin rBytePos := ByteCount + 1; while (rBytePos > 1) do begin - CharLen:=UTF8CharacterLength(p); + CharLen:=UTF8CodepointSize(p); Dec(rBytePos, CharLen); System.Move(p^, Result[rBytePos], CharLen); Inc(p, CharLen); @@ -3093,7 +3133,7 @@ begin P := PChar(S); while P^ <> #0 do begin - CharLen := UTF8CharacterLength(P); + CharLen := UTF8CodepointSize(P); i := 1; j := 0; ResultLen := Length(Result); @@ -3168,7 +3208,7 @@ begin #128..#255: begin if KeepAllNonASCII then break; - u:=UTF8CharacterToUnicode(p,l); + u:=UTF8CodepointToUnicode(p,l); if (l<=1) then break; // invalid character case u of 128..159, // C1 set of control codes @@ -3216,7 +3256,7 @@ begin begin if KeepAllNonASCII then break; StartP:=UTF8FindNearestCharStart(PChar(Result),length(Result),p-PChar(Result)); - u:=UTF8CharacterToUnicode(PChar(Result)+StartP,l); + u:=UTF8CodepointToUnicode(PChar(Result)+StartP,l); if (l<=1) then break; // invalid character case u of 128..159, // C1 set of control codes diff --git a/components/lazutils/lconvencoding.pas b/components/lazutils/lconvencoding.pas index 04990956d7..c72286aef2 100644 --- a/components/lazutils/lconvencoding.pas +++ b/components/lazutils/lconvencoding.pas @@ -7029,7 +7029,7 @@ begin inc(Src); dec(len); end else begin - Unicode:=UTF8CharacterToUnicode(Src,CharLen); + Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); i:=UTF8CharConvFunc(Unicode); @@ -7068,7 +7068,7 @@ begin inc(Src); dec(len); end else begin - Unicode:=UTF8CharacterToUnicode(Src,CharLen); + Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); if Unicode<=$ffff then begin @@ -7108,7 +7108,7 @@ begin inc(Src); dec(len); end else begin - Unicode:=UTF8CharacterToUnicode(Src,CharLen); + Unicode:=UTF8CodepointToUnicode(Src,CharLen); inc(Src,CharLen); dec(len,CharLen); if Unicode<=$ffff then begin @@ -7265,7 +7265,7 @@ begin end; inc(p); end else begin - i:=UTF8CharacterStrictLength(p); + i:=UTF8CodepointStrictSize(p); //DebugLn(['GuessEncoding ',i,' ',DbgStr(s[p])]); if i=0 then begin {$IFDEF VerboseIDEEncoding} diff --git a/components/lazutils/masks.pas b/components/lazutils/masks.pas index 081049f909..7b51214035 100644 --- a/components/lazutils/masks.pas +++ b/components/lazutils/masks.pas @@ -96,9 +96,9 @@ var Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement begin Result := ''; - p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call + p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call //determine the length in bytes of this UTF-8 character - PLen := UTF8CharacterLength(p); + PLen := UTF8CodepointSize(p); Res := p; //Set correct length for Result (otherwise it returns all chars up to the end of the original string) SetLength(Res,PLen); diff --git a/components/lazutils/paswstring.pas b/components/lazutils/paswstring.pas index 1dad5ce58c..c0acf7081a 100644 --- a/components/lazutils/paswstring.pas +++ b/components/lazutils/paswstring.pas @@ -189,7 +189,7 @@ begin p:=Str; if p=nil then exit(0); while p^<>#0 do begin - l:=UTF8CharacterLength(p); + l:=UTF8CodepointSize(p); inc(Result); inc(p,l); end; diff --git a/components/lazutils/translations.pas b/components/lazutils/translations.pas index 9bf177bdc3..a3bdfc0b91 100644 --- a/components/lazutils/translations.pas +++ b/components/lazutils/translations.pas @@ -1362,7 +1362,7 @@ var // po requires special characters as #number p:=1; while p<=length(Value) do begin - j := UTF8CharacterLength(pchar(@Value[p])); + j := UTF8CodepointSize(pchar(@Value[p])); if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value)) else diff --git a/components/onlinepackagemanager/vst/include/generic/opkman_unicodefunctions.inc b/components/onlinepackagemanager/vst/include/generic/opkman_unicodefunctions.inc index 6c808c967a..218040a005 100644 --- a/components/onlinepackagemanager/vst/include/generic/opkman_unicodefunctions.inc +++ b/components/onlinepackagemanager/vst/include/generic/opkman_unicodefunctions.inc @@ -17,7 +17,7 @@ begin WideCount := Min(WideCount, StrLen); while (CharCount < WideCount) do begin - CharLen := UTF8CharacterLength(P); + CharLen := UTF8CodepointSize(P); Inc(P, CharLen); Inc(Result, CharLen); Inc(CharCount); diff --git a/components/pochecker/simplepofiles.pp b/components/pochecker/simplepofiles.pp index 696b689b00..d69f447876 100644 --- a/components/pochecker/simplepofiles.pp +++ b/components/pochecker/simplepofiles.pp @@ -971,7 +971,7 @@ begin // po requires special characters as #number p:=1; while p<=length(Value) do begin - j := UTF8CharacterLength(pchar(@Value[p])); + j := UTF8CodepointSize(pchar(@Value[p])); if (j=1) and (Value[p] in [#0..#9,#11,#12,#14..#31,#127..#255]) then Value := copy(Value,1,p-1)+'#'+IntToStr(ord(Value[p]))+copy(Value,p+1,length(Value)) else diff --git a/components/printers/design/sourceprinter.pas b/components/printers/design/sourceprinter.pas index fb1ed3fdbc..c345eaa5d9 100644 --- a/components/printers/design/sourceprinter.pas +++ b/components/printers/design/sourceprinter.pas @@ -131,7 +131,7 @@ begin if ShowLineNumbers then s2 := Format('%4d: ',[i]); l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin); l := l - Length(s2); // s2 has only single byte - l := UTF8CharToByteIndex(PChar(s), length(s), l); + l := UTF8CodepointToByteIndex(PChar(s), length(s), l); while (l > MIN_LINE_LEN) and (l < length(s)) do begin l2 := l; while (l2 > MIN_LINE_LEN) and @@ -144,14 +144,14 @@ begin // find utf8 start while (l2 > 1) and (ord(s[l2]) >= 128) and (ord(s[l2+1]) >= 128) and (ord(s[l2+1]) < 192) do dec(l2); - if l2 = 0 then l2 := UTF8CharToByteIndex(PChar(s), length(s), MIN_LINE_LEN); + if l2 = 0 then l2 := UTF8CodepointToByteIndex(PChar(s), length(s), MIN_LINE_LEN); Text[j] := copy(s, 1, l2); delete(s, 1, l2); inc(j); Text.InsertObject(j, '', nil); l := Printer.Canvas.TextFitInfo(s2 + s, Printer.PageWidth - 2 * Margin); l := l - Length(s2); - l := UTF8CharToByteIndex(PChar(s), length(s), l); + l := UTF8CodepointToByteIndex(PChar(s), length(s), l); end; Text[j] := s; inc(i); diff --git a/components/synedit/syncompletion.pas b/components/synedit/syncompletion.pas index de0de68908..c37186fbb3 100644 --- a/components/synedit/syncompletion.pas +++ b/components/synedit/syncompletion.pas @@ -463,7 +463,7 @@ begin {$IF FPC_FULLVERSION >= 20701} if p^ <= #127 then exit; - i := UTF8CharacterLength(p); + i := UTF8CodepointSize(p); SetLength(u, i); // wide chars of UTF-16 <= bytes of UTF-8 string if ConvertUTF8ToUTF16(PWideChar(u), i + 1, p, i, [toInvalidCharToSymbol], L) = trNoError diff --git a/components/synedit/synhighlightertex.pas b/components/synedit/synhighlightertex.pas index f00a2e6db1..36eae8dd54 100644 --- a/components/synedit/synhighlightertex.pas +++ b/components/synedit/synhighlightertex.pas @@ -212,7 +212,7 @@ end; { SpaceProc } procedure TSynTeXSyn.TextProc; begin fTokenID:=tkText; - inc(Run,UTF8CharacterLength(@fLine[Run])); + inc(Run,UTF8CodepointSize(@fLine[Run])); end; { TextProc } procedure TSynTeXSyn.LFProc; diff --git a/components/synedit/test/testbase.pas b/components/synedit/test/testbase.pas index 3c22932fa0..90e69a9f80 100644 --- a/components/synedit/test/testbase.pas +++ b/components/synedit/test/testbase.pas @@ -211,7 +211,7 @@ begin delete(Input, 1, 1); Continue; end; - l := UTF8CharacterLength(@Input[1]); + l := UTF8CodepointSize(@Input[1]); if l < 1 then Break; CommandProcessor(ecChar, copy(Input, 1, l), nil); delete(Input, 1, l); diff --git a/components/wiki/lazwiki/wikiformat.pas b/components/wiki/lazwiki/wikiformat.pas index 2cdcc805a0..47b5ddbdc2 100644 --- a/components/wiki/lazwiki/wikiformat.pas +++ b/components/wiki/lazwiki/wikiformat.pas @@ -542,7 +542,7 @@ begin if (p^ in WFCAllowedChars) and (p[1] in WFCAllowedChars) and (p[2] in WFCAllowedChars) then break; // the next three are normal characters -> stop encoding as base64 - CharLen:=UTF8CharacterLength(p); + CharLen:=UTF8CodepointSize(p); {$ifdef VerboseWikiFileCode} writeln('UTF8ToWikiFileCode sequence UTF8CharLen=',CharLen); {$endif} @@ -706,7 +706,7 @@ begin end else raise Exception.Create('invalid wiki file code: invalid character'); until false; - if FindInvalidUTF8Character(PChar(Result),length(Result))>=0 then + if FindInvalidUTF8Codepoint(PChar(Result),length(Result))>=0 then raise Exception.Create('invalid wiki file code: result is not UTF-8'); end; diff --git a/components/wiki/lazwiki/wikiparser.pas b/components/wiki/lazwiki/wikiparser.pas index ff44b48876..ea35e98fc7 100644 --- a/components/wiki/lazwiki/wikiparser.pas +++ b/components/wiki/lazwiki/wikiparser.pas @@ -1645,8 +1645,8 @@ begin end; end; end else begin - CharLen:=UTF8CharacterLength(PageP); - UpCharLen:=UTF8CharacterLength(PageUpP); + CharLen:=UTF8CodepointSize(PageP); + UpCharLen:=UTF8CodepointSize(PageUpP); if (CharLen>1) or (PageP^ in ['a'..'z','A'..'Z']) then begin if (CharLen=UpCharLen) and CompareMem(PageP,PageUpP,CharLen) then CaseFlags:=CaseFlags+'u' diff --git a/components/wiki/test/wikihelpmanager.pas b/components/wiki/test/wikihelpmanager.pas index 59495e1f52..8452ef8ff3 100644 --- a/components/wiki/test/wikihelpmanager.pas +++ b/components/wiki/test/wikihelpmanager.pas @@ -347,7 +347,7 @@ begin //debugln(['TextToHTMLSnipped phrase "',Phrase,'" found at ',LoTxtP-PChar(LoTxt)]); CurPhraseP:=PChar(Phrase); while (CurPhraseP^<>#0) do begin - l:=UTF8CharacterLength(CurPhraseP); + l:=UTF8CodepointSize(CurPhraseP); inc(LoTxtP,l); inc(CurPhraseP,l); BoldP^+=1; @@ -356,7 +356,7 @@ begin continue; end; end; - inc(LoTxtP,UTF8CharacterLength(LoTxtP)); + inc(LoTxtP,UTF8CodepointSize(LoTxtP)); inc(BoldP); end; end; @@ -367,7 +367,7 @@ begin BoldP:=Bold; while LoTxtP^<>#0 do begin dbgout([' ',dbgstr(LoTxtP^),':',BoldP^]); - inc(LoTxtP,UTF8CharacterLength(LoTxtP)); + inc(LoTxtP,UTF8CodepointSize(LoTxtP)); inc(BoldP); end; debugln; @@ -447,7 +447,7 @@ begin ReplaceSubstring(Result,i,1,'>'); inc(i,length('>')); end else - inc(i,UTF8CharacterLength(@Result[i])); + inc(i,UTF8CodepointSize(@Result[i])); inc(BoldP); end; if IsBold then diff --git a/designer/menushortcuts.pas b/designer/menushortcuts.pas index 807b25d5d3..6ab8ea98b1 100644 --- a/designer/menushortcuts.pas +++ b/designer/menushortcuts.pas @@ -307,7 +307,7 @@ begin if (p = 0) or (p = Length(aText)) then Break; if aText[p+1] <> '&' then // '&&' is reduced to '&' by widgetset GUI. begin - UTF8Len := UTF8CharacterLength(@aText[p+1]); + UTF8Len := UTF8CodepointSize(@aText[p+1]); accelStr := UTF8UpperCase(Copy(aText, p+1, UTF8Len)); // force uppercase // ToDo: Use the whole UTF-8 character in accelStr. How? aShortcut := KeyToShortCut(Ord(accelStr[1]), diff --git a/ide/debugmanager.pas b/ide/debugmanager.pas index 285e5c970a..421c24742c 100644 --- a/ide/debugmanager.pas +++ b/ide/debugmanager.pas @@ -993,7 +993,7 @@ begin ExceptMsg := AExceptionText; // if AExceptionText is not a valid UTF8 string, // then assume it has the ansi encoding and convert it - if FindInvalidUTF8Character(pchar(ExceptMsg),length(ExceptMsg)) > 0 then + if FindInvalidUTF8Codepoint(pchar(ExceptMsg),length(ExceptMsg)) > 0 then ExceptMsg := AnsiToUtf8(ExceptMsg); msg := Format(lisProjectSRaisedExceptionClassSWithMessageSS, [GetTitle, AExceptionClass, LineEnding, ExceptMsg]); diff --git a/ide/frames/compiler_other_options.pas b/ide/frames/compiler_other_options.pas index f834b1a587..6444a5eb5e 100644 --- a/ide/frames/compiler_other_options.pas +++ b/ide/frames/compiler_other_options.pas @@ -293,7 +293,7 @@ begin CondSynEdit.GetWordBoundsAtRowCol(XY,StartX,EndX); if EndX<=XY.X then exit; Line := CondSynEdit.Lines[XY.Y - 1]; - inc(XY.X,UTF8CharacterLength(@Line[XY.X-1])); + inc(XY.X,UTF8CodepointSize(@Line[XY.X-1])); CondSynEdit.LogicalCaretXY:=XY; end; diff --git a/ide/frames/editor_color_options.pas b/ide/frames/editor_color_options.pas index a1528d10a7..a65e3f5a52 100644 --- a/ide/frames/editor_color_options.pas +++ b/ide/frames/editor_color_options.pas @@ -367,7 +367,7 @@ begin l := length(ExtractFileExt(NewName)); if (l > 0) and (l+1 < Length(NewName)) then NewName := Copy(NewName, 1, Length(NewName) - l); - l := UTF8CharacterLength(PChar(NewName)); + l := UTF8CodepointSize(PChar(NewName)); if l > 0 then NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName)); diff --git a/ide/frames/editor_mouseaction_options_advanced.pas b/ide/frames/editor_mouseaction_options_advanced.pas index a211ba8a98..d694c0f179 100644 --- a/ide/frames/editor_mouseaction_options_advanced.pas +++ b/ide/frames/editor_mouseaction_options_advanced.pas @@ -545,7 +545,7 @@ begin l := length(ExtractFileExt(NewName)); if (l > 0) and (l+1 < Length(NewName)) then NewName := Copy(NewName, 1, Length(NewName) - l); - l := UTF8CharacterLength(PChar(NewName)); + l := UTF8CodepointSize(PChar(NewName)); if l > 0 then NewName := UTF8UpperCase(copy(NewName, 1, l)) + copy(NewName, 1+l, length(NewName)); diff --git a/ide/idecmdline.pas b/ide/idecmdline.pas index 8b8bd36c1e..c9ff4b6b36 100644 --- a/ide/idecmdline.pas +++ b/ide/idecmdline.pas @@ -136,7 +136,7 @@ begin s := Trim(s); {$ifdef windows} //cfg file is made by Windows installer and probably is Windows default codepage - if FindInvalidUTF8Character(PChar(s), Length(s), True) > 0 then + if FindInvalidUTF8Codepoint(PChar(s), Length(s), True) > 0 then s := WinCPToUtf8(s); {$endif windows} ParamsAndCfgFileContent.Add(s) diff --git a/ide/ideprocs.pp b/ide/ideprocs.pp index 2d9f5bd673..d3307fad7e 100644 --- a/ide/ideprocs.pp +++ b/ide/ideprocs.pp @@ -1630,7 +1630,7 @@ function TabsToSpaces(const s: string; TabWidth: integer; UseUTF8: boolean): str Dest[DestPos]:=Src[SrcPos]; inc(PhysicalX); if UseUTF8 then - CharLen:=UTF8CharacterLength(@s[SrcPos]) + CharLen:=UTF8CodepointSize(@s[SrcPos]) else CharLen:=1; for i:=1 to CharLen do begin diff --git a/ide/sourceeditor.pp b/ide/sourceeditor.pp index d113e04563..e906c728d7 100644 --- a/ide/sourceeditor.pp +++ b/ide/sourceeditor.pp @@ -2556,7 +2556,7 @@ begin if LogCaret.Y>=Editor.Lines.Count then exit; Line:=Editor.Lines[LogCaret.Y-1]; if LogCaret.X>length(Line) then exit; - CharLen:=UTF8CharacterLength(@Line[LogCaret.X]); + CharLen:=UTF8CodepointSize(@Line[LogCaret.X]); AddPrefix:=copy(Line,LogCaret.X,CharLen); NewPrefix:=CurrentString+AddPrefix; //debugln('TSourceNotebook.OnSynCompletionNextChar NewPrefix="',NewPrefix,'" LogCaret.X=',dbgs(LogCaret.X)); diff --git a/lcl/include/application.inc b/lcl/include/application.inc index 66d7339ae8..8fbeacb7e9 100644 --- a/lcl/include/application.inc +++ b/lcl/include/application.inc @@ -1570,7 +1570,7 @@ var begin if AppNoExceptionMessages in FFlags then exit; Msg := E.Message; - if FindInvalidUTF8Character(PChar(Msg), Length(Msg)) > 0 then + if FindInvalidUTF8Codepoint(PChar(Msg), Length(Msg)) > 0 then Msg := AnsiToUtf8(Msg); if (Msg <> '') and (Msg[length(Msg)] <> '.') then Msg := Msg + '.'; if (not Terminated) and (Self <> nil) and (AppInitialized in FFlags) then diff --git a/lcl/interfaces/gtk/gtkwidgetset.inc b/lcl/interfaces/gtk/gtkwidgetset.inc index 64a1caa156..1fda8cce0d 100644 --- a/lcl/interfaces/gtk/gtkwidgetset.inc +++ b/lcl/interfaces/gtk/gtkwidgetset.inc @@ -5758,7 +5758,7 @@ var Result:=LineStart; LineWidth:=0; repeat - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); CharWidth:=GetLineWidthInPixel(Result,charLen); inc(LineWidth,CharWidth); if LineWidth>MaxWidthInPixel then break; @@ -5767,7 +5767,7 @@ var until false; // at least one char if Result=LineStart then begin - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); inc(Result,charLen); end; end; diff --git a/lcl/interfaces/gtk2/gtk2widgetset.inc b/lcl/interfaces/gtk2/gtk2widgetset.inc index 491cdbd277..f73b9b44b5 100644 --- a/lcl/interfaces/gtk2/gtk2widgetset.inc +++ b/lcl/interfaces/gtk2/gtk2widgetset.inc @@ -6272,7 +6272,7 @@ var Result:=LineStart; LineWidth:=0; repeat - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); CharWidth:=GetLineWidthInPixel(Result,charLen); inc(LineWidth,CharWidth); if LineWidth>MaxWidthInPixel then break; @@ -6281,7 +6281,7 @@ var until false; // at least one char if Result=LineStart then begin - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); inc(Result,charLen); end; end; diff --git a/lcl/interfaces/gtk2/gtk2winapi.inc b/lcl/interfaces/gtk2/gtk2winapi.inc index 7cc3025935..495e6ef3b7 100644 --- a/lcl/interfaces/gtk2/gtk2winapi.inc +++ b/lcl/interfaces/gtk2/gtk2winapi.inc @@ -2658,7 +2658,7 @@ var Points[0].cX := LeftPos + Points[0].cX; Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1; - GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]); + GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]); Points[1].cX := Points[0].cX + Points[1].cX; Points[1].cY := Points[0].cY; @@ -3796,7 +3796,7 @@ var CurScreenX := X; while CurCount > 0 do begin - CharLen := UTF8CharacterLength(CurStr); + CharLen := UTF8CodepointSize(CurStr); DevCtx.DrawTextWithColors(CurStr, CharLen, CurScreenX, Y, Foreground, BackgroundColor); inc(CurScreenX, CurDx^); inc(CurDx); diff --git a/lcl/interfaces/gtk3/gtk3objects.pas b/lcl/interfaces/gtk3/gtk3objects.pas index 4ff378179f..286051954b 100644 --- a/lcl/interfaces/gtk3/gtk3objects.pas +++ b/lcl/interfaces/gtk3/gtk3objects.pas @@ -2022,7 +2022,7 @@ var Result:=LineStart; LineWidth:=0; repeat - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); CharWidth:=GetLineWidthInPixel(Result,charLen); inc(LineWidth,CharWidth); if LineWidth>MaxWidthInPixel then break; @@ -2031,7 +2031,7 @@ var until false; // at least one char if Result=LineStart then begin - charLen:=UTF8CharacterLength(@AText[result]); + charLen:=UTF8CodepointSize(@AText[result]); inc(Result,charLen); end; end; diff --git a/lcl/interfaces/gtk3/gtk3winapi.inc b/lcl/interfaces/gtk3/gtk3winapi.inc index 80c1958c5b..6c742dca16 100644 --- a/lcl/interfaces/gtk3/gtk3winapi.inc +++ b/lcl/interfaces/gtk3/gtk3winapi.inc @@ -919,7 +919,7 @@ var Points[0].cX := LeftPos + Points[0].cX; Points[0].cY := TopPos + tm.tmHeight - TM.tmDescent + 1; - GetTextExtentPoint(DC, @aStr[pIndex], UTF8CharacterLength(@aStr[pIndex]), Points[1]); + GetTextExtentPoint(DC, @aStr[pIndex], UTF8CodepointSize(@aStr[pIndex]), Points[1]); Points[1].cX := Points[0].cX + Points[1].cX; Points[1].cY := Points[0].cY; diff --git a/lcl/interfaces/qt/qtwidgets.pas b/lcl/interfaces/qt/qtwidgets.pas index 5712eb86f5..604dff341f 100644 --- a/lcl/interfaces/qt/qtwidgets.pas +++ b/lcl/interfaces/qt/qtwidgets.pas @@ -3500,7 +3500,7 @@ begin {$endif} InputEvent := QInputMethodEventH(Event); QInputMethodEvent_commitString(InputEvent, @WStr); - UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen); + UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen); {$IFDEF VerboseQtKeys} writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:'); writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar, diff --git a/lcl/interfaces/qt/qtwinapi.inc b/lcl/interfaces/qt/qtwinapi.inc index 1b36b470fd..3e59ef30b6 100644 --- a/lcl/interfaces/qt/qtwinapi.inc +++ b/lcl/interfaces/qt/qtwinapi.inc @@ -2216,7 +2216,7 @@ var CurX := X; while CurCount > 0 do begin - CharLen := UTF8CharacterLength(CurStr); + CharLen := UTF8CodepointSize(CurStr); W := {%H-}Copy(CurStr, 1, CharLen); if AClipped then QtDC.drawText(CurX, Y, Rect^.Right - Rect^.Left, Rect^.Bottom - Rect^.Top, diff --git a/lcl/interfaces/qt5/qtwidgets.pas b/lcl/interfaces/qt5/qtwidgets.pas index 5476799579..c1938dda7a 100644 --- a/lcl/interfaces/qt5/qtwidgets.pas +++ b/lcl/interfaces/qt5/qtwidgets.pas @@ -3504,7 +3504,7 @@ begin {$endif} InputEvent := QInputMethodEventH(Event); QInputMethodEvent_commitString(InputEvent, @WStr); - UnicodeChar := UTF8CharacterToUnicode(PChar(WStr), UnicodeOutLen); + UnicodeChar := UTF8CodepointToUnicode(PChar(WStr), UnicodeOutLen); {$IFDEF VerboseQtKeys} writeln('> TQtWidget.SlotInputMethod ',dbgsname(LCLObject),' event=QEventInputMethod:'); writeln(' commmitString ',WStr,' len ',length(WStr),' UnicodeChar ',UnicodeChar, diff --git a/lcl/interfaces/qt5/qtwinapi.inc b/lcl/interfaces/qt5/qtwinapi.inc index 83fe43ec82..b0a2df73b8 100644 --- a/lcl/interfaces/qt5/qtwinapi.inc +++ b/lcl/interfaces/qt5/qtwinapi.inc @@ -2182,7 +2182,7 @@ var CurX := X; while CurCount > 0 do begin - CharLen := UTF8CharacterLength(CurStr); + CharLen := UTF8CodepointSize(CurStr); if AClipped then begin W := GetUTF8String(Copy(CurStr, 1, CharLen)); diff --git a/lcl/lclproc.pas b/lcl/lclproc.pas index cdaba071f3..0d3081b0e3 100644 --- a/lcl/lclproc.pas +++ b/lcl/lclproc.pas @@ -336,10 +336,10 @@ function UTF16CharacterToUnicode(p: PWideChar; out CharLen: integer): Cardinal; function UnicodeToUTF16(u: cardinal): UTF16String; {$IFDEF EnableWrapperFunctions} -function UTF8CharacterLength(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; +function UTF8CodepointSize(p: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8Length(const s: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; -function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit'; +function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; inline; deprecated 'Use the function in LazUTF8 unit'; function UnicodeToUTF8(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; function UnicodeToUTF8SkipErrors(u: cardinal; Buf: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; function UnicodeToUTF8(u: cardinal): shortstring; inline; deprecated 'Use the function in LazUTF8 unit'; @@ -348,11 +348,11 @@ function UTF8ToDoubleByte(UTF8Str: PChar; Len: PtrInt; DBStr: PByte): PtrInt; in function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer; BytePos: integer): integer; inline; deprecated 'Use the function in LazUTF8 unit'; // find the n-th UTF8 character, ignoring BIDI -function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit'; +function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; inline; deprecated 'Use the function in LazUTF8 unit'; // find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr) -function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; +function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; procedure UTF8FixBroken(P: PChar); inline; deprecated 'Use the function in LazUTF8 unit'; -function UTF8CharacterStrictLength(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; +function UTF8CodepointStrictSize(P: PChar): integer; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8Pos(const SearchForText, SearchInText: string): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string; inline; deprecated 'Use the function in LazUTF8 unit'; @@ -360,7 +360,7 @@ procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt); inline; procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt); inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8LowerCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit'; function UTF8UpperCase(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit'; -function FindInvalidUTF8Character(p: PChar; Count: PtrInt; +function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonASCII: Boolean = true): PtrInt; inline; deprecated 'Use the function in LazUTF8 unit'; function ValidUTF8String(const s: String): String; inline; deprecated 'Use the function in LazUTF8 unit'; @@ -2744,9 +2744,9 @@ begin end; {$IFDEF EnableWrapperFunctions} -function UTF8CharacterLength(p: PChar): integer; +function UTF8CodepointSize(p: PChar): integer; begin - Result := LazUTF8.UTF8CharacterLength(p); + Result := LazUTF8.UTF8CodepointSize(p); end; function UTF8Length(const s: string): PtrInt; @@ -2759,9 +2759,9 @@ begin Result := LazUTF8.UTF8Length(p, ByteCount); end; -function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; +function UTF8CodepointToUnicode(p: PChar; out CharLen: integer): Cardinal; begin - Result := LazUTF8.UTF8CharacterToUnicode(p, CharLen); + Result := LazUTF8.UTF8CodepointToUnicode(p, CharLen); end; function UnicodeToUTF8(u: cardinal; Buf: PChar): integer; @@ -2803,14 +2803,14 @@ end; This function is similar to UTF8FindNearestCharStart } -function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; +function UTF8CodepointStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; begin - Result := LazUTF8.UTF8CharStart(UTF8Str, Len, CharIndex); + Result := LazUTF8.UTF8CodepointStart(UTF8Str, Len, CharIndex); end; -function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; +function UTF8CodepointToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; begin - Result := LazUTF8.UTF8CharToByteIndex(UTF8Str, Len, CharIndex); + Result := LazUTF8.UTF8CodepointToByteIndex(UTF8Str, Len, CharIndex); end; { fix any broken UTF8 sequences with spaces } @@ -2819,9 +2819,9 @@ begin LazUTF8.UTF8FixBroken(P); end; -function UTF8CharacterStrictLength(P: PChar): integer; +function UTF8CodepointStrictSize(P: PChar): integer; begin - Result := LazUTF8.UTF8CharacterStrictLength(P); + Result := LazUTF8.UTF8CodepointStrictSize(P); end; function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; @@ -2859,11 +2859,11 @@ begin Result := LazUTF8.UTF8UpperCase(s); end; -function FindInvalidUTF8Character(p: PChar; Count: PtrInt; +function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonASCII: Boolean): PtrInt; // return -1 if ok begin - Result := LazUTF8.FindInvalidUTF8Character(p, Count, StopOnNonASCII); + Result := LazUTF8.FindInvalidUTF8Codepoint(p, Count, StopOnNonASCII); end; function ValidUTF8String(const s: String): String; diff --git a/lcl/maskedit.pp b/lcl/maskedit.pp index e3b3ab93c3..32f0b7d715 100644 --- a/lcl/maskedit.pp +++ b/lcl/maskedit.pp @@ -372,9 +372,9 @@ var Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement begin Result := ''; - p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call + p := UTF8CodepointStart(PChar(S), Length(S), Index - 1); //zero-based call //determine the length in bytes of this UTF-8 character - PLen := UTF8CharacterLength(p); + PLen := UTF8CodepointSize(p); Res := p; //Set correct length for Result (otherwise it returns all chars up to the end of the original string) SetLength(Res,PLen); diff --git a/lcl/postscriptcanvas.pas b/lcl/postscriptcanvas.pas index c58d83dc52..1bb9859631 100644 --- a/lcl/postscriptcanvas.pas +++ b/lcl/postscriptcanvas.pas @@ -2352,7 +2352,7 @@ var Result := LineStart; LineWidth := 0; repeat - charLen := UTF8CharacterLength(@AText[Result]); + charLen := UTF8CodepointSize(@AText[Result]); CharWidth := TextWidth(MidStr(AText, Result, charLen)); Inc(LineWidth, CharWidth); if LineWidth > MaxWidthInPixel then @@ -2364,7 +2364,7 @@ var // at least one char if Result = LineStart then begin - charLen := UTF8CharacterLength(@AText[Result]); + charLen := UTF8CodepointSize(@AText[Result]); Inc(Result, charLen); end; end; diff --git a/test/lazutils/testlazutf16.pas b/test/lazutils/testlazutf16.pas index da5eed403a..87de53b0b2 100644 --- a/test/lazutils/testlazutf16.pas +++ b/test/lazutils/testlazutf16.pas @@ -142,7 +142,7 @@ begin for i:=0 to $10FFFF do begin s:=UnicodeToUTF8(i); - u:=UTF8CharacterToUnicode(PChar(s), dum); + u:=UTF8CodepointToUnicode(PChar(s), dum); AssertEquals('got (hexidecimal): ' + InttoHex(u,6), i, u); end; end; diff --git a/test/lazutils/testlazutf8.pas b/test/lazutils/testlazutf8.pas index e416e0297b..d9cdd0e9cf 100644 --- a/test/lazutils/testlazutf8.pas +++ b/test/lazutils/testlazutf8.pas @@ -66,7 +66,7 @@ procedure TTestLazUTF8.TestFindInvalidUTF8; var Actual: PtrInt; begin - Actual:=FindInvalidUTF8Character(PChar(s),length(s)); + Actual:=FindInvalidUTF8Codepoint(PChar(s),length(s)); AssertEquals(Title+': '+dbgMemRange(Pointer(s),length(s)),Expected,Actual); end; diff --git a/test/lazutils/testlconvencoding.pas b/test/lazutils/testlconvencoding.pas index 77318f86d8..ef30a76afc 100644 --- a/test/lazutils/testlconvencoding.pas +++ b/test/lazutils/testlconvencoding.pas @@ -44,7 +44,7 @@ procedure TTestLConvEncoding.Test_CP_UTF8_CP; AssertEquals('CodePage '+CodePageName+' to UTF8 creates empty string for character #'+IntToStr(ord(c)),true,false); Back:=ConvertEncodingFromUTF8(AsUTF8,CodePageName,Encoded); if Back<>c then - AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CharacterToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back)); + AssertEquals('CodePage '+CodePageName+' ('+IntToStr(ord(c))+') to UTF8 ('+dbgs(UTF8CodepointToUnicode(PChar(AsUTF8),l))+') and back differ for character #'+IntToStr(ord(c)),DbgStr(c),dbgstr(Back)); end; end; diff --git a/tools/iconvtable.pas b/tools/iconvtable.pas index b6ed008724..74eec6eefb 100644 --- a/tools/iconvtable.pas +++ b/tools/iconvtable.pas @@ -182,7 +182,7 @@ begin s:=SortedTable[i]; if (length(s)=1) and (ord(s[1])<=127) then begin end else if s<>'' then begin - UniCode:=UTF8CharacterToUnicode(@s[1],CharLen); + UniCode:=UTF8CodepointToUnicode(@s[1],CharLen); TableIndex:=StrToTableIndex(s); j:=1; while (i+j<256) do begin @@ -191,11 +191,11 @@ begin ' SortedTable[i]=',ToStringConstant(s), ' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]), ' UniCode[i]=',UniCode, - ' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen), + ' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen), ' TableIndex[i]=',TableIndex, ' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]), '');} - if UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then + if UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen)<>UniCode+j then break; if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then break; diff --git a/tools/iconvtable_dbcs.pas b/tools/iconvtable_dbcs.pas index 8cbec45b2a..3edc704f95 100644 --- a/tools/iconvtable_dbcs.pas +++ b/tools/iconvtable_dbcs.pas @@ -138,7 +138,7 @@ begin SL.LoadFromFile(FilenameUTF8); s:=SL[0]; if s<>'' then begin - DBCSToUTF8[Index]:=UTF8CharacterToUnicode(PChar(s),CharLen); + DBCSToUTF8[Index]:=UTF8CodepointToUnicode(PChar(s),CharLen); if CharLen=0 then DBCSToUTF8[Index]:=0; writeln(IntToStr(Index)+'='+IntToStr(DBCSToUTF8[Index])+' s='+ToStringConstant(s)+' '+IntToStr(DBCSToUTF8[Index]-DBCSToUTF8[Index-1]-1)); end; @@ -319,7 +319,7 @@ begin s:=SortedTable[i]; if (length(s)=1) and (ord(s[1])<=127) then begin end else if s<>'' then begin - UniCode:=UTF8CharacterToUnicode(@s[1],CharLen); + UniCode:=UTF8CodepointToUnicode(@s[1],CharLen); TableIndex:=StrToTableIndex(s); j:=1; while (i+j<256) do begin @@ -328,11 +328,11 @@ begin ' SortedTable[i]=',ToStringConstant(s), ' SortedTable[i+j]=',ToStringConstant(SortedTable[i+j]), ' UniCode[i]=',UniCode, - ' UniCode[i+j]=',UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen), + ' UniCode[i+j]=',UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen), ' TableIndex[i]=',TableIndex, ' TableIndex[i+j]=',StrToTableIndex(SortedTable[i+j]), '');*) - if integer(UTF8CharacterToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then + if integer(UTF8CodepointToUnicode(@SortedTable[i+j][1],CharLen))<>UniCode+j then break; if StrToTableIndex(SortedTable[i+j])<>TableIndex+j then break;