mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-12-16 00:40:43 +01:00
lazutils: utf8pos: added optional parameter StartPos
git-svn-id: trunk@37703 -
This commit is contained in:
parent
3d1ce5b136
commit
593c7fdb94
@ -58,7 +58,9 @@ procedure UTF8FixBroken(P: PChar); overload;
|
||||
procedure UTF8FixBroken(var S: string); overload;
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
|
||||
function UTF8Pos(const SearchForText, SearchInText: string; StartPos: SizeInt = 1): PtrInt;
|
||||
function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
|
||||
SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
|
||||
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;
|
||||
procedure UTF8Delete(var s: String; StartCharIndex, CharCount: PtrInt);
|
||||
procedure UTF8Insert(const source: String; var s: string; StartCharIndex: PtrInt);
|
||||
@ -472,8 +474,6 @@ end;
|
||||
|
||||
{ Len is the length in bytes of UTF8Str
|
||||
CharIndex is the position of the desired char (starting at 0), in chars
|
||||
|
||||
This function is similar to UTF8FindNearestCharStart
|
||||
}
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
var
|
||||
@ -487,7 +487,7 @@ begin
|
||||
dec(CharIndex);
|
||||
inc(Result,CharLen);
|
||||
end;
|
||||
if (CharIndex>0) or (Len<0) then
|
||||
if (CharIndex<>0) or (Len<0) then
|
||||
Result:=nil;
|
||||
end;
|
||||
end;
|
||||
@ -674,16 +674,58 @@ begin
|
||||
SetLength(Result, Dest - PChar(Result));
|
||||
end;
|
||||
|
||||
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
|
||||
function UTF8Pos(const SearchForText, SearchInText: string;
|
||||
StartPos: SizeInt = 1): PtrInt;
|
||||
// returns the character index, where the SearchForText starts in SearchInText
|
||||
// an optional StartPos can be given (in UTF-8 codepoints, not in byte)
|
||||
// returns 0 if not found
|
||||
var
|
||||
p: LongInt;
|
||||
i: SizeInt;
|
||||
p: PChar;
|
||||
StartPosP: PChar;
|
||||
begin
|
||||
p:=System.Pos(SearchForText,SearchInText);
|
||||
if p>0 then
|
||||
Result:=UTF8Length(PChar(SearchInText),p-1)+1
|
||||
else
|
||||
Result:=0;
|
||||
Result:=0;
|
||||
if StartPos=1 then
|
||||
begin
|
||||
i:=System.Pos(SearchForText,SearchInText);
|
||||
if i>0 then
|
||||
Result:=UTF8Length(PChar(SearchInText),i-1)+1;
|
||||
end
|
||||
else if StartPos>1 then
|
||||
begin
|
||||
// skip
|
||||
StartPosP:=UTF8CharStart(PChar(SearchInText),Length(SearchInText),StartPos-1);
|
||||
if StartPosP=nil then exit;
|
||||
// search
|
||||
p:=UTF8PosP(PChar(SearchForText),length(SearchForText),
|
||||
StartPosP,length(SearchInText)+PChar(SearchInText)-StartPosP);
|
||||
// get UTF-8 position
|
||||
if p=nil then exit;
|
||||
Result:=StartPos+UTF8Length(StartPosP,p-StartPosP);
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8PosP(SearchForText: PChar; SearchForTextLen: SizeInt;
|
||||
SearchInText: PChar; SearchInTextLen: SizeInt): PChar;
|
||||
// returns the position where SearchInText starts in SearchForText
|
||||
// returns nil if not found
|
||||
var
|
||||
p: SizeInt;
|
||||
begin
|
||||
Result:=nil;
|
||||
if (SearchForText=nil) or (SearchForTextLen=0) or (SearchInText=nil) then
|
||||
exit;
|
||||
while SearchInTextLen>0 do begin
|
||||
p:=IndexByte(SearchInText^,SearchInTextLen,PByte(SearchForText)^);
|
||||
if p<0 then exit;
|
||||
inc(SearchInText,p);
|
||||
dec(SearchInTextLen,p);
|
||||
if SearchInTextLen<SearchForTextLen then exit;
|
||||
if CompareMem(SearchInText,SearchForText,SearchForTextLen) then
|
||||
exit(SearchInText);
|
||||
inc(SearchInText);
|
||||
dec(SearchInTextLen);
|
||||
end;
|
||||
end;
|
||||
|
||||
function UTF8Copy(const s: string; StartCharIndex, CharCount: PtrInt): string;
|
||||
|
||||
@ -323,8 +323,11 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
|
||||
</element>
|
||||
<!-- function Visibility: default -->
|
||||
<element name="UTF8Pos">
|
||||
<short/>
|
||||
<descr/>
|
||||
<short>Returns the character index, where the SearchForText starts in SearchInText</short>
|
||||
<descr>Returns the character index, where the SearchForText starts in SearchInText.
|
||||
An optional StartPos can be given (as character index, not in byte).
|
||||
Returns 0 if not found.
|
||||
</descr>
|
||||
<errors/>
|
||||
<seealso/>
|
||||
</element>
|
||||
@ -724,6 +727,9 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
|
||||
Use Flags to only delete at start or only at end or to to not delete line breaks.
|
||||
Control characters are the unicode sets C0 and C1 and the left-to-right and right-to-left marks.</descr>
|
||||
</element>
|
||||
<element name="UTF8PosP"><short>Returns the position where SearchInText starts in SearchForText</short><descr>If not found it returns nil.
|
||||
Null characters #0 are treated as normal characters.</descr>
|
||||
</element>
|
||||
</module>
|
||||
<!-- LazUTF8 -->
|
||||
</package>
|
||||
|
||||
@ -50,6 +50,7 @@ uses
|
||||
type
|
||||
TSDFilenameQuality = (
|
||||
sddqInvalid,
|
||||
sddqWrongMinorVersion,
|
||||
sddqWrongVersion,
|
||||
sddqIncomplete,
|
||||
sddqCompatible
|
||||
@ -700,6 +701,7 @@ begin
|
||||
if SrcVer<>FPCVer then
|
||||
begin
|
||||
Note:=Format(lisFoundVersionExpected, [SrcVer, FPCVer]);
|
||||
|
||||
Result:=sddqWrongVersion;
|
||||
exit;
|
||||
end;
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
|
||||
Test specific with:
|
||||
./runtests --format=plain --suite=TestUTF8Trim
|
||||
./runtests --format=plain --suite=TestUTF8Pos
|
||||
}
|
||||
unit TestLazUTF8;
|
||||
|
||||
@ -22,6 +23,7 @@ type
|
||||
public
|
||||
published
|
||||
procedure TestUTF8Trim;
|
||||
procedure TestUTF8Pos;
|
||||
end;
|
||||
|
||||
implementation
|
||||
@ -45,6 +47,13 @@ begin
|
||||
AssertEquals('left-to-right, right-to-left mark','a',UTF8Trim(#$E2#$80#$8E'a'#$E2#$80#$8F));
|
||||
end;
|
||||
|
||||
procedure TTestLazUTF8.TestUTF8Pos;
|
||||
begin
|
||||
AssertEquals('Skip first occurence',4,UTF8Pos('ab','abcabc',2));
|
||||
AssertEquals('Not found',0,UTF8Pos('abc'#0,'abcabc'));
|
||||
AssertEquals('Check #0',2,UTF8Pos('bc'#0,'abc'#0'abc'));
|
||||
end;
|
||||
|
||||
initialization
|
||||
AddToLazUtilsTestSuite(TTestLazUTF8);
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user