LazUtf8: UTF8CodepointCount() added, which support combining code point

This commit is contained in:
rich2014 2024-08-29 21:24:14 +08:00
parent a534b967a0
commit c8a1f93a6e

View File

@ -84,6 +84,9 @@ function UTF8Length(p: PChar; ByteCount: PtrInt): PtrInt;
function UTF8LengthFast(const s: string): PtrInt; inline;
function UTF8LengthFast(p: PChar; ByteCount: PtrInt): PtrInt;
function UTF8CodepointCount(const s: string): PtrInt; inline;
function UTF8CodepointCount(p: PChar; ByteCount: PtrInt): PtrInt;
// Functions dealing with unicode number U+xxx.
function UTF8CodepointToUnicode(p: PChar; out CodepointLen: integer): Cardinal;
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal; deprecated 'Use UTF8CodepointToUnicode instead.';
@ -567,6 +570,26 @@ begin
Result := ByteCount - Result;
end;
function UTF8CodepointCount(const s: string): PtrInt;
begin
Result:=UTF8CodepointCount(PChar(s),length(s));
end;
function UTF8CodepointCount(p: PChar; ByteCount: PtrInt): PtrInt;
var
codepointLen: LongInt;
begin
Result:=0;
while (ByteCount>0) do begin
inc(Result);
codepointLen:=UTF8CodepointLen(p,ByteCount,false);
if codepointLen=0 then
Exit;
inc(p,codepointLen);
dec(ByteCount,codepointLen);
end;
end;
function UTF8CodepointToUnicode(p: PChar; out CodepointLen: integer): Cardinal;
{ if p=nil then CodepointLen=0 otherwise CodepointLen>0
If there is an encoding error the Result is 0 and CodepointLen=1.