mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-11 15:56:08 +02:00
LazUtils: Masks.pas: implement handling of UTF8 codepoints for mask matching. Issue #0024373.
git-svn-id: trunk@43637 -
This commit is contained in:
parent
cc8cd63dec
commit
3fb2dd4469
@ -20,21 +20,23 @@ interface
|
||||
|
||||
uses
|
||||
// For Smart Linking: Do not use the LCL!
|
||||
Classes, SysUtils, Contnrs, StrUtils, LazUtilsStrConsts;
|
||||
Classes, SysUtils, Contnrs, LazUtilsStrConsts, LazUtf8;
|
||||
|
||||
type
|
||||
TMaskCharType = (mcChar, mcCharSet, mcAnyChar, mcAnyText);
|
||||
|
||||
|
||||
TCharSet = set of Char;
|
||||
PCharSet = ^TCharSet;
|
||||
|
||||
|
||||
TUtf8Char = String[7];
|
||||
|
||||
TMaskChar = record
|
||||
case CharType: TMaskCharType of
|
||||
mcChar: (CharValue: Char);
|
||||
mcChar: (CharValue: TUtf8Char);
|
||||
mcCharSet: (Negative: Boolean; SetValue: PCharSet);
|
||||
mcAnyChar, mcAnyText: ();
|
||||
end;
|
||||
|
||||
|
||||
TMaskString = record
|
||||
MinLength: Integer;
|
||||
MaxLength: Integer;
|
||||
@ -53,18 +55,18 @@ type
|
||||
public
|
||||
constructor Create(const AValue: String; const CaseSensitive: Boolean = False);
|
||||
destructor Destroy; override;
|
||||
|
||||
|
||||
function Matches(const AFileName: String): Boolean;
|
||||
function MatchesWindowsMask(const AFileName: String): Boolean;
|
||||
end;
|
||||
|
||||
|
||||
{ TParseStringList }
|
||||
|
||||
TParseStringList = class(TStringList)
|
||||
public
|
||||
constructor Create(const AText, ASeparators: String);
|
||||
end;
|
||||
|
||||
|
||||
{ TMaskList }
|
||||
|
||||
TMaskList = class
|
||||
@ -75,10 +77,10 @@ type
|
||||
public
|
||||
constructor Create(const AValue: String; ASeparator: Char = ';'; const CaseSensitive: Boolean = False);
|
||||
destructor Destroy; override;
|
||||
|
||||
|
||||
function Matches(const AFileName: String): Boolean;
|
||||
function MatchesWindowsMask(const AFileName: String): Boolean;
|
||||
|
||||
|
||||
property Count: Integer read GetCount;
|
||||
property Items[Index: Integer]: TMask read GetItem;
|
||||
end;
|
||||
@ -90,6 +92,26 @@ function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char =
|
||||
|
||||
implementation
|
||||
|
||||
//Utf8 helper functions
|
||||
|
||||
function GetCodePoint(const S: String; const Index: PtrInt): TUTF8Char;
|
||||
//equivalent for Result := S[Index], but for Utf8 encoded strings
|
||||
var
|
||||
p: PChar;
|
||||
PLen: PtrInt;
|
||||
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
|
||||
begin
|
||||
Result := '';
|
||||
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||
//determine the length in bytes of this UTF-8 character
|
||||
PLen := UTF8CharacterLength(p);
|
||||
Res := p;
|
||||
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
|
||||
SetLength(Res,PLen);
|
||||
Result := Res;
|
||||
end;
|
||||
|
||||
|
||||
function MatchesMask(const FileName, Mask: String; const CaseSensitive: Boolean): Boolean;
|
||||
var
|
||||
AMask: TMask;
|
||||
@ -184,6 +206,7 @@ var
|
||||
CharSet: TCharSet;
|
||||
Valid: Boolean;
|
||||
C, Last: Char;
|
||||
CP: TUtf8Char;
|
||||
begin
|
||||
SkipAnyText := False;
|
||||
|
||||
@ -191,7 +214,7 @@ var
|
||||
FMask.Chars[High(FMask.Chars)].CharType := mcCharSet;
|
||||
|
||||
Inc(I);
|
||||
if (I <= Length(AValue)) and (AValue[I] = '!') then
|
||||
if (I <= Utf8Length(AValue)) and (GetCodePoint(AValue,I) = '!') then
|
||||
begin
|
||||
FMask.Chars[High(FMask.Chars)].Negative := True;
|
||||
Inc(I);
|
||||
@ -201,24 +224,30 @@ var
|
||||
Last := '-';
|
||||
CharSet := [];
|
||||
Valid := False;
|
||||
while I <= Length(AValue) do
|
||||
while I <= Utf8Length(AValue) do
|
||||
begin
|
||||
case AValue[I] of
|
||||
CP := GetCodePoint(AValue,I);
|
||||
//CharSets can only contain 1-byte codepoints
|
||||
if (Length(CP) <> 1) then CharSetError;
|
||||
case CP of
|
||||
'-':
|
||||
begin
|
||||
if Last = '-' then CharSetError;
|
||||
Inc(I);
|
||||
|
||||
if (I > Length(AValue)) then CharSetError;
|
||||
if (I > Utf8Length(AValue)) then CharSetError;
|
||||
CP := GetCodePoint(AValue, I);
|
||||
if (Length(CP) <> 1) then CharSetError;
|
||||
if fCaseSensitive then
|
||||
begin
|
||||
//DebugLn('Set: ' + Last + '-' + (AValue[I]));
|
||||
for C := Last to (AValue[I]) do Include(CharSet, C);
|
||||
//writeln('MaskUtf8: Set: ' + Last + '-' + (CP[1]));
|
||||
for C := Last to CP[1] do
|
||||
Include(CharSet, C)
|
||||
end
|
||||
else
|
||||
begin
|
||||
//DebugLn('Set: ' + Last + '-' + UpCase(AValue[I]));
|
||||
for C := Last to UpCase(AValue[I]) do Include(CharSet, C);
|
||||
//writeln('MaskUtf8: Set: ' + Last + '-' + UpCase(CP[1]));
|
||||
for C := Last to UpCase(CP[1]) do
|
||||
Include(CharSet, C)
|
||||
end;
|
||||
Inc(I);
|
||||
end;
|
||||
@ -230,15 +259,14 @@ var
|
||||
else
|
||||
begin
|
||||
if fCaseSensitive then
|
||||
Last := AValue[I]
|
||||
Last := CP[1]
|
||||
else
|
||||
Last := UpCase(AValue[I]);
|
||||
Last := UpCase(CP[1]);
|
||||
Include(CharSet, Last);
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
if (not Valid) or (CharSet = []) then CharSetError;
|
||||
|
||||
New(FMask.Chars[High(FMask.Chars)].SetValue);
|
||||
@ -259,9 +287,9 @@ var
|
||||
begin
|
||||
CharType := mcChar;
|
||||
if fCaseSensitive then
|
||||
CharValue := AValue[I]
|
||||
CharValue := GetCodePoint(AValue,I)
|
||||
else
|
||||
CharValue := UpCase(AValue[I]);
|
||||
CharValue := Utf8UpperCase(GetCodePoint(AValue,I));
|
||||
end;
|
||||
|
||||
Inc(FMask.MinLength);
|
||||
@ -278,9 +306,9 @@ begin
|
||||
SkipAnyText := False;
|
||||
|
||||
I := 1;
|
||||
while I <= Length(AValue) do
|
||||
while I <= Utf8Length(AValue) do
|
||||
begin
|
||||
case AValue[I] of
|
||||
case GetCodePoint(AValue,I) of
|
||||
'*': AddAnyText;
|
||||
'?': AddAnyChar;
|
||||
'[': AddCharSet;
|
||||
@ -316,13 +344,14 @@ function TMask.Matches(const AFileName: String): Boolean;
|
||||
var
|
||||
L: Integer;
|
||||
S: String;
|
||||
|
||||
|
||||
function MatchToEnd(MaskIndex, CharIndex: Integer): Boolean;
|
||||
var
|
||||
I, J: Integer;
|
||||
CP: TUtf8Char;
|
||||
begin
|
||||
Result := False;
|
||||
|
||||
|
||||
for I := MaskIndex to High(FMask.Chars) do
|
||||
begin
|
||||
case FMask.Chars[I].CharType of
|
||||
@ -330,14 +359,17 @@ var
|
||||
begin
|
||||
if CharIndex > L then Exit;
|
||||
//DebugLn('Match ' + S[CharIndex] + '<?>' + FMask.Chars[I].CharValue);
|
||||
if S[CharIndex] <> FMask.Chars[I].CharValue then Exit;
|
||||
if GetCodePoint(S,CharIndex) <> FMask.Chars[I].CharValue then Exit;
|
||||
Inc(CharIndex);
|
||||
end;
|
||||
mcCharSet:
|
||||
begin
|
||||
if CharIndex > L then Exit;
|
||||
CP := GetCodePoint(S, CharIndex);
|
||||
//There are only 1-byte codepoints in charsets
|
||||
if (Length(CP) <> 1) then Exit;
|
||||
if FMask.Chars[I].Negative xor
|
||||
(S[CharIndex] in FMask.Chars[I].SetValue^) then Inc(CharIndex)
|
||||
(Cp[1] in FMask.Chars[I].SetValue^) then Inc(CharIndex)
|
||||
else Exit;
|
||||
end;
|
||||
mcAnyChar:
|
||||
@ -352,7 +384,7 @@ var
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
|
||||
for J := CharIndex to L do
|
||||
if MatchToEnd(I + 1, J) then
|
||||
begin
|
||||
@ -362,24 +394,24 @@ var
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
Result := CharIndex > L;
|
||||
end;
|
||||
|
||||
|
||||
begin
|
||||
Result := False;
|
||||
L := Length(AFileName);
|
||||
L := Utf8Length(AFileName);
|
||||
if L = 0 then
|
||||
begin
|
||||
if FMask.MinLength = 0 then Result := True;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
|
||||
if (L < FMask.MinLength) or (L > FMask.MaxLength) then Exit;
|
||||
if fCaseSensitive then
|
||||
S := AFileName
|
||||
else
|
||||
S := UpperCase(AFileName);
|
||||
S := Utf8UpperCase(AFileName);
|
||||
Result := MatchToEnd(0, 1);
|
||||
end;
|
||||
|
||||
@ -394,10 +426,10 @@ begin
|
||||
// foo. matches only foo but not foo.txt
|
||||
// foo.* -> match either foo or foo.*
|
||||
|
||||
if (Length(fInitialMask) > 2) and (RightStr(fInitialMask,3) = '*.*') then
|
||||
if (Utf8Length(fInitialMask) > 2) and (Utf8RightStr(fInitialMask,3) = '*.*') then
|
||||
// foo*.*
|
||||
begin
|
||||
NewMaskValue := Copy(fInitialMask,1,Length(fInitialMask)-2);
|
||||
NewMaskValue := Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2);
|
||||
ClearMaskString;
|
||||
InitMaskString(NewMaskValue, fCaseSensitive);
|
||||
Result := Matches(AFileName);
|
||||
@ -406,7 +438,7 @@ begin
|
||||
InitMaskString(fInitialMask, fCaseSensitive);
|
||||
end
|
||||
//else if (Length(fInitialMask) > 1) and (RightStr(fInitialMask,2) = '*.') then
|
||||
else if (Length(fInitialMask) > 1) and (fInitialMask[Length(fInitialMask)] = '.') then
|
||||
else if (Utf8Length(fInitialMask) > 1) and (GetCodePoint(fInitialMask,Utf8Length(fInitialMask)) = '.') then
|
||||
|
||||
//foo*. or *. or foo.
|
||||
begin
|
||||
@ -415,7 +447,7 @@ begin
|
||||
Ext := ExtractFileExt(AFileName);
|
||||
if (Ext = '') or (Ext = AFileName) then
|
||||
begin
|
||||
NewMaskValue := Copy(fInitialMask,1,Length(fInitialMask)-1);
|
||||
NewMaskValue := Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-1);
|
||||
ClearMaskString;
|
||||
InitMaskString(NewMaskValue, fCaseSensitive);
|
||||
Result := Matches(AFileName);
|
||||
@ -428,14 +460,14 @@ begin
|
||||
Result := False;
|
||||
end;
|
||||
end
|
||||
else if (Length(fInitialMask) > 2) and (RightStr(fInitialMask,2) = '.*') then
|
||||
else if (Utf8Length(fInitialMask) > 2) and (Utf8RightStr(fInitialMask,2) = '.*') then
|
||||
//foo.* (but not '.*')
|
||||
begin
|
||||
//First see if we have 'foo'
|
||||
if fCaseSensitive then
|
||||
Result := (AFileName = Copy(fInitialMask,1,Length(fInitialMask)-2))
|
||||
Result := (AFileName = Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2))
|
||||
else
|
||||
Result := (CompareText(AFileName,Copy(fInitialMask,1,Length(fInitialMask)-2)) = 0);
|
||||
Result := (Utf8CompareText(AFileName,Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2)) = 0);
|
||||
if not Result then Result := Matches(AFileName);
|
||||
end
|
||||
else
|
||||
@ -462,7 +494,7 @@ begin
|
||||
S := I + 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
if Length(AText) >= S then Add(Copy(AText, S, Length(AText) - S + 1));
|
||||
end;
|
||||
|
||||
@ -484,7 +516,7 @@ var
|
||||
I: Integer;
|
||||
begin
|
||||
FMasks := TObjectList.Create(True);
|
||||
|
||||
|
||||
S := TParseStringList.Create(AValue, ASeparator);
|
||||
try
|
||||
for I := 0 to S.Count - 1 do
|
||||
@ -497,7 +529,7 @@ end;
|
||||
destructor TMaskList.Destroy;
|
||||
begin
|
||||
FMasks.Free;
|
||||
|
||||
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
@ -506,7 +538,7 @@ var
|
||||
I: Integer;
|
||||
begin
|
||||
Result := False;
|
||||
|
||||
|
||||
for I := 0 to FMasks.Count - 1 do
|
||||
begin
|
||||
if TMask(FMasks.Items[I]).Matches(AFileName) then
|
||||
|
Loading…
Reference in New Issue
Block a user