lazutils: utf8pos: added optional parameter StartPos

git-svn-id: trunk@37703 -
2025-12-16 00:40:43 +01:00 · 2012-06-20 12:11:42 +00:00 · 2012-06-20 12:11:42 +00:00 · 593c7fdb94
commit 593c7fdb94
parent 3d1ce5b136
4 changed files with 72 additions and 13 deletions
--- a/components/lazutils/lazutf8.pas
+++ b/components/lazutils/lazutf8.pas
@ -58,7 +58,9 @@ procedure UTF8FixBroken(P: PChar); overload;
 procedure UTF8FixBroken(var S: string); overload;
 function UTF8CharacterStrictLength(P: PChar): integer;
 function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
-function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
+function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
+function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
+  SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
 function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;
 procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt);
 procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt);
@ -472,8 +474,6 @@ end;

 { Len is the length in bytes of UTF8Str
  CharIndex is the position of the desired char (starting at 0), in chars
-
-  This function is similar to UTF8FindNearestCharStart
 }
 function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
 var
@ -487,7 +487,7 @@ begin
      dec(CharIndex);
      inc(Result,CharLen);
    end;
-    if (CharIndex>0) or (Len<0) then
+    if (CharIndex<>0) or (Len<0) then
      Result:=nil;
  end;
 end;
@ -674,16 +674,58 @@ begin
  SetLength(Result, Dest - PChar(Result));
 end;

-function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
+function UTF8Pos(const SearchForText, SearchInText: string;
+  StartPos: SizeInt = 1): PtrInt;
 // returns the character index, where the SearchForText starts in SearchInText
+// an optional StartPos can be given (in UTF-8 codepoints, not in byte)
+// returns 0 if not found
 var
-  p: LongInt;
+  i: SizeInt;
+  p: PChar;
+  StartPosP: PChar;
 begin
-  p:=System.Pos(SearchForText,SearchInText);
-  if p>0 then
-    Result:=UTF8Length(PChar(SearchInText),p-1)+1
-  else
-    Result:=0;
+  Result:=0;
+  if StartPos=1 then
+  begin
+    i:=System.Pos(SearchForText,SearchInText);
+    if i>0 then
+      Result:=UTF8Length(PChar(SearchInText),i-1)+1;
+  end
+  else if StartPos>1 then
+  begin
+    // skip
+    StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
+    if StartPosP=nil then exit;
+    // search
+    p:=UTF8PosP(PChar(SearchForText),length(SearchForText),
+                StartPosP,length(SearchInText)+PChar(SearchInText)-StartPosP);
+    // get UTF-8 position
+    if p=nil then exit;
+    Result:=StartPos+UTF8Length(StartPosP,p-StartPosP);
+  end;
+end;
+
+function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
+  SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
+// returns the position where SearchInText starts in SearchForText
+// returns nil if not found
+var
+  p: SizeInt;
+begin
+  Result:=nil;
+  if (SearchForText=nil) or (SearchForTextLen=0) or (SearchInText=nil) then
+    exit;
+  while SearchInTextLen>0 do begin
+    p:=IndexByte(SearchInText^,SearchInTextLen,PByte(SearchForText)^);
+    if p<0 then exit;
+    inc(SearchInText,p);
+    dec(SearchInTextLen,p);
+    if SearchInTextLen<SearchForTextLen then exit;
+    if CompareMem(SearchInText,SearchForText,SearchForTextLen) then
+      exit(SearchInText);
+    inc(SearchInText);
+    dec(SearchInTextLen);
+  end;
 end;

 function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;
--- a/docs/xml/lazutils/lazutf8.xml
+++ b/docs/xml/lazutils/lazutf8.xml
@ -323,8 +323,11 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
      </element>
      <!-- function Visibility: default -->
      <element name="UTF8Pos">
-        <short/>
-        <descr/>
+        <short>Returns the character index, where the SearchForText starts in SearchInText</short>
+        <descr>Returns the character index, where the SearchForText starts in SearchInText.
+An optional StartPos can be given (as character index, not in byte).
+Returns 0 if not found.
+</descr>
        <errors/>
        <seealso/>
      </element>
@ -724,6 +727,9 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
 Use Flags to only delete at start or only at end or to to not delete line breaks.
 Control characters are the unicode sets C0 and C1 and the left-to-right and right-to-left marks.</descr>
      </element>
+    <element name="UTF8PosP"><short>Returns the position where SearchInText starts in SearchForText</short><descr>If not found it returns nil.
+Null characters #0 are treated as normal characters.</descr>
+      </element>
    </module>
    <!-- LazUTF8 -->
  </package>
--- a/ide/initialsetupdlgs.pas
+++ b/ide/initialsetupdlgs.pas
@ -50,6 +50,7 @@ uses
 type
  TSDFilenameQuality = (
    sddqInvalid,
+    sddqWrongMinorVersion,
    sddqWrongVersion,
    sddqIncomplete,
    sddqCompatible
@ -700,6 +701,7 @@ begin
          if SrcVer<>FPCVer then
          begin
            Note:=Format(lisFoundVersionExpected, [SrcVer, FPCVer]);
+
            Result:=sddqWrongVersion;
            exit;
          end;
--- a/test/lazutils/testlazutf8.pas
+++ b/test/lazutils/testlazutf8.pas
@ -4,6 +4,7 @@

 Test specific with:
     ./runtests --format=plain --suite=TestUTF8Trim
+     ./runtests --format=plain --suite=TestUTF8Pos
 }
 unit TestLazUTF8;

@ -22,6 +23,7 @@ type
  public
  published
    procedure TestUTF8Trim;
+    procedure TestUTF8Pos;
  end;

 implementation
@ -45,6 +47,13 @@ begin
  AssertEquals('left-to-right, right-to-left mark','a',UTF8Trim(#$E2#$80#$8E'a'#$E2#$80#$8F));
 end;

+procedure TTestLazUTF8.TestUTF8Pos;
+begin
+  AssertEquals('Skip first occurence',4,UTF8Pos('ab','abcabc',2));
+  AssertEquals('Not found',0,UTF8Pos('abc'#0,'abcabc'));
+  AssertEquals('Check #0',2,UTF8Pos('bc'#0,'abc'#0'abc'));
+end;
+
 initialization
  AddToLazUtilsTestSuite(TTestLazUTF8);