lazutils: utf8pos: added optional parameter StartPos

git-svn-id: trunk@37703 -
This commit is contained in:
mattias 2012-06-20 12:11:42 +00:00
parent 3d1ce5b136
commit 593c7fdb94
4 changed files with 72 additions and 13 deletions

View File

@ -58,7 +58,9 @@ procedure UTF8FixBroken(P: PChar); overload;
procedure UTF8FixBroken(var S: string); overload;
function UTF8CharacterStrictLength(P: PChar): integer;
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;
procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt);
procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt);
@ -472,8 +474,6 @@ end;
{ Len is the length in bytes of UTF8Str
CharIndex is the position of the desired char (starting at 0), in chars
This function is similar to UTF8FindNearestCharStart
}
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
var
@ -487,7 +487,7 @@ begin
dec(CharIndex);
inc(Result,CharLen);
end;
if (CharIndex>0) or (Len<0) then
if (CharIndex<>0) or (Len<0) then
Result:=nil;
end;
end;
@ -674,16 +674,58 @@ begin
SetLength(Result, Dest - PChar(Result));
end;
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
function UTF8Pos(const SearchForText, SearchInText: string;
StartPos: SizeInt = 1): PtrInt;
// returns the character index, where the SearchForText starts in SearchInText
// an optional StartPos can be given (in UTF-8 codepoints, not in byte)
// returns 0 if not found
var
p: LongInt;
i: SizeInt;
p: PChar;
StartPosP: PChar;
begin
p:=System.Pos(SearchForText,SearchInText);
if p>0 then
Result:=UTF8Length(PChar(SearchInText),p-1)+1
else
Result:=0;
Result:=0;
if StartPos=1 then
begin
i:=System.Pos(SearchForText,SearchInText);
if i>0 then
Result:=UTF8Length(PChar(SearchInText),i-1)+1;
end
else if StartPos>1 then
begin
// skip
StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
if StartPosP=nil then exit;
// search
p:=UTF8PosP(PChar(SearchForText),length(SearchForText),
StartPosP,length(SearchInText)+PChar(SearchInText)-StartPosP);
// get UTF-8 position
if p=nil then exit;
Result:=StartPos+UTF8Length(StartPosP,p-StartPosP);
end;
end;
function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
// returns the position where SearchInText starts in SearchForText
// returns nil if not found
var
p: SizeInt;
begin
Result:=nil;
if (SearchForText=nil) or (SearchForTextLen=0) or (SearchInText=nil) then
exit;
while SearchInTextLen>0 do begin
p:=IndexByte(SearchInText^,SearchInTextLen,PByte(SearchForText)^);
if p<0 then exit;
inc(SearchInText,p);
dec(SearchInTextLen,p);
if SearchInTextLen<SearchForTextLen then exit;
if CompareMem(SearchInText,SearchForText,SearchForTextLen) then
exit(SearchInText);
inc(SearchInText);
dec(SearchInTextLen);
end;
end;
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;

View File

@ -323,8 +323,11 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
</element>
<!-- function Visibility: default -->
<element name="UTF8Pos">
<short/>
<descr/>
<short>Returns the character index, where the SearchForText starts in SearchInText</short>
<descr>Returns the character index, where the SearchForText starts in SearchInText.
An optional StartPos can be given (as character index, not in byte).
Returns 0 if not found.
</descr>
<errors/>
<seealso/>
</element>
@ -724,6 +727,9 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
Use Flags to only delete at start or only at end or to to not delete line breaks.
Control characters are the unicode sets C0 and C1 and the left-to-right and right-to-left marks.</descr>
</element>
<element name="UTF8PosP"><short>Returns the position where SearchInText starts in SearchForText</short><descr>If not found it returns nil.
Null characters #0 are treated as normal characters.</descr>
</element>
</module>
<!-- LazUTF8 -->
</package>

View File

@ -50,6 +50,7 @@ uses
type
TSDFilenameQuality = (
sddqInvalid,
sddqWrongMinorVersion,
sddqWrongVersion,
sddqIncomplete,
sddqCompatible
@ -700,6 +701,7 @@ begin
if SrcVer<>FPCVer then
begin
Note:=Format(lisFoundVersionExpected, [SrcVer, FPCVer]);
Result:=sddqWrongVersion;
exit;
end;

View File

@ -4,6 +4,7 @@
Test specific with:
./runtests --format=plain --suite=TestUTF8Trim
./runtests --format=plain --suite=TestUTF8Pos
}
unit TestLazUTF8;
@ -22,6 +23,7 @@ type
public
published
procedure TestUTF8Trim;
procedure TestUTF8Pos;
end;
implementation
@ -45,6 +47,13 @@ begin
AssertEquals('left-to-right, right-to-left mark','a',UTF8Trim(#$E2#$80#$8E'a'#$E2#$80#$8F));
end;
procedure TTestLazUTF8.TestUTF8Pos;
begin
AssertEquals('Skip first occurence',4,UTF8Pos('ab','abcabc',2));
AssertEquals('Not found',0,UTF8Pos('abc'#0,'abcabc'));
AssertEquals('Check #0',2,UTF8Pos('bc'#0,'abc'#0'abc'));
end;
initialization
AddToLazUtilsTestSuite(TTestLazUTF8);