From b192fb9760e1df8b10e4feafb7d22cc2815ccde5 Mon Sep 17 00:00:00 2001 From: bart <9132501-flyingsheep@users.noreply.gitlab.com> Date: Thu, 17 Mar 2016 10:42:52 +0000 Subject: [PATCH] LazUtf8: Refactor UTF8FindNearestCharStart. Resolves Issue #0029851. git-svn-id: trunk@51973 - --- components/lazutils/lazutf8.pas | 47 +++++++++------------------------ 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/components/lazutils/lazutf8.pas b/components/lazutils/lazutf8.pas index 450a302ec5..e1fd7a94cb 100644 --- a/components/lazutils/lazutf8.pas +++ b/components/lazutils/lazutf8.pas @@ -643,45 +643,22 @@ begin end; { Find the start of the UTF8 character which contains BytePos, + if BytePos is not part of a valid Utf8Codepoint the function returns BytePos Len is length in byte, BytePos starts at 0 } -function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt - ): SizeInt; +function UTF8FindNearestCharStart(UTF8Str: PChar; Len: SizeInt; BytePos: SizeInt): SizeInt; +var + CurPos: PChar; + CharLen: Integer; begin - Result:=0; - if (UTF8Str<>nil) and (Len>0) and (BytePos>=0) then begin - Result:=BytePos; - if Result>Len then Result:=Len-1; - if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin - dec(Result); - if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin - dec(Result); - if (Result>0) and (ord(UTF8Str[Result]) and %11000000=%10000000) then begin - dec(Result); - // should be four byte character - if (ord(UTF8Str[Result]) and %11111000<>%11110000) then begin - // broken UTF8 character - inc(Result,3); - end else begin - // is four byte character - end; - end else if (ord(UTF8Str[Result]) and %11110000<>%11100000) then begin - // broken UTF8 character, should be three byte - inc(Result,2); - end else - begin - // is three byte character - end; - end else if (ord(UTF8Str[Result]) and %11100000<>%11000000) then begin - // broken UTF8 character, should be two byte - inc(Result); - end else - begin - // is two byte character - end; - end; - end; + if (BytePos > Len-1) then BytePos := Len - 1; + CurPos := Utf8Str + BytePos; + //No need to check the result value, since when it retuns False CurPos will be reset + //to it's original value, and that's what we want to return in that case + Utf8TryFindCodepointStart(Utf8Str, CurPos, CharLen); + Result := CurPos - Utf8Str; end; + { Len is the length in bytes of UTF8Str CharIndex is the position of the desired char (starting at 0), in chars }