mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-12 16:49:44 +02:00
LazUtils: Masks.pas: implement handling of UTF8 codepoints for mask matching. Issue #0024373.
git-svn-id: trunk@43637 -
This commit is contained in:
parent
cc8cd63dec
commit
3fb2dd4469
@ -20,21 +20,23 @@ interface
|
|||||||
|
|
||||||
uses
|
uses
|
||||||
// For Smart Linking: Do not use the LCL!
|
// For Smart Linking: Do not use the LCL!
|
||||||
Classes, SysUtils, Contnrs, StrUtils, LazUtilsStrConsts;
|
Classes, SysUtils, Contnrs, LazUtilsStrConsts, LazUtf8;
|
||||||
|
|
||||||
type
|
type
|
||||||
TMaskCharType = (mcChar, mcCharSet, mcAnyChar, mcAnyText);
|
TMaskCharType = (mcChar, mcCharSet, mcAnyChar, mcAnyText);
|
||||||
|
|
||||||
TCharSet = set of Char;
|
TCharSet = set of Char;
|
||||||
PCharSet = ^TCharSet;
|
PCharSet = ^TCharSet;
|
||||||
|
|
||||||
|
TUtf8Char = String[7];
|
||||||
|
|
||||||
TMaskChar = record
|
TMaskChar = record
|
||||||
case CharType: TMaskCharType of
|
case CharType: TMaskCharType of
|
||||||
mcChar: (CharValue: Char);
|
mcChar: (CharValue: TUtf8Char);
|
||||||
mcCharSet: (Negative: Boolean; SetValue: PCharSet);
|
mcCharSet: (Negative: Boolean; SetValue: PCharSet);
|
||||||
mcAnyChar, mcAnyText: ();
|
mcAnyChar, mcAnyText: ();
|
||||||
end;
|
end;
|
||||||
|
|
||||||
TMaskString = record
|
TMaskString = record
|
||||||
MinLength: Integer;
|
MinLength: Integer;
|
||||||
MaxLength: Integer;
|
MaxLength: Integer;
|
||||||
@ -53,18 +55,18 @@ type
|
|||||||
public
|
public
|
||||||
constructor Create(const AValue: String; const CaseSensitive: Boolean = False);
|
constructor Create(const AValue: String; const CaseSensitive: Boolean = False);
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
|
|
||||||
function Matches(const AFileName: String): Boolean;
|
function Matches(const AFileName: String): Boolean;
|
||||||
function MatchesWindowsMask(const AFileName: String): Boolean;
|
function MatchesWindowsMask(const AFileName: String): Boolean;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TParseStringList }
|
{ TParseStringList }
|
||||||
|
|
||||||
TParseStringList = class(TStringList)
|
TParseStringList = class(TStringList)
|
||||||
public
|
public
|
||||||
constructor Create(const AText, ASeparators: String);
|
constructor Create(const AText, ASeparators: String);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{ TMaskList }
|
{ TMaskList }
|
||||||
|
|
||||||
TMaskList = class
|
TMaskList = class
|
||||||
@ -75,10 +77,10 @@ type
|
|||||||
public
|
public
|
||||||
constructor Create(const AValue: String; ASeparator: Char = ';'; const CaseSensitive: Boolean = False);
|
constructor Create(const AValue: String; ASeparator: Char = ';'; const CaseSensitive: Boolean = False);
|
||||||
destructor Destroy; override;
|
destructor Destroy; override;
|
||||||
|
|
||||||
function Matches(const AFileName: String): Boolean;
|
function Matches(const AFileName: String): Boolean;
|
||||||
function MatchesWindowsMask(const AFileName: String): Boolean;
|
function MatchesWindowsMask(const AFileName: String): Boolean;
|
||||||
|
|
||||||
property Count: Integer read GetCount;
|
property Count: Integer read GetCount;
|
||||||
property Items[Index: Integer]: TMask read GetItem;
|
property Items[Index: Integer]: TMask read GetItem;
|
||||||
end;
|
end;
|
||||||
@ -90,6 +92,26 @@ function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char =
|
|||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
|
//Utf8 helper functions
|
||||||
|
|
||||||
|
function GetCodePoint(const S: String; const Index: PtrInt): TUTF8Char;
|
||||||
|
//equivalent for Result := S[Index], but for Utf8 encoded strings
|
||||||
|
var
|
||||||
|
p: PChar;
|
||||||
|
PLen: PtrInt;
|
||||||
|
Res: AnsiString; //intermediate needed for PChar -> String -> ShortString assignement
|
||||||
|
begin
|
||||||
|
Result := '';
|
||||||
|
p := UTF8CharStart(PChar(S), Length(S), Index - 1); //zero-based call
|
||||||
|
//determine the length in bytes of this UTF-8 character
|
||||||
|
PLen := UTF8CharacterLength(p);
|
||||||
|
Res := p;
|
||||||
|
//Set correct length for Result (otherwise it returns all chars up to the end of the original string)
|
||||||
|
SetLength(Res,PLen);
|
||||||
|
Result := Res;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
function MatchesMask(const FileName, Mask: String; const CaseSensitive: Boolean): Boolean;
|
function MatchesMask(const FileName, Mask: String; const CaseSensitive: Boolean): Boolean;
|
||||||
var
|
var
|
||||||
AMask: TMask;
|
AMask: TMask;
|
||||||
@ -184,6 +206,7 @@ var
|
|||||||
CharSet: TCharSet;
|
CharSet: TCharSet;
|
||||||
Valid: Boolean;
|
Valid: Boolean;
|
||||||
C, Last: Char;
|
C, Last: Char;
|
||||||
|
CP: TUtf8Char;
|
||||||
begin
|
begin
|
||||||
SkipAnyText := False;
|
SkipAnyText := False;
|
||||||
|
|
||||||
@ -191,7 +214,7 @@ var
|
|||||||
FMask.Chars[High(FMask.Chars)].CharType := mcCharSet;
|
FMask.Chars[High(FMask.Chars)].CharType := mcCharSet;
|
||||||
|
|
||||||
Inc(I);
|
Inc(I);
|
||||||
if (I <= Length(AValue)) and (AValue[I] = '!') then
|
if (I <= Utf8Length(AValue)) and (GetCodePoint(AValue,I) = '!') then
|
||||||
begin
|
begin
|
||||||
FMask.Chars[High(FMask.Chars)].Negative := True;
|
FMask.Chars[High(FMask.Chars)].Negative := True;
|
||||||
Inc(I);
|
Inc(I);
|
||||||
@ -201,24 +224,30 @@ var
|
|||||||
Last := '-';
|
Last := '-';
|
||||||
CharSet := [];
|
CharSet := [];
|
||||||
Valid := False;
|
Valid := False;
|
||||||
while I <= Length(AValue) do
|
while I <= Utf8Length(AValue) do
|
||||||
begin
|
begin
|
||||||
case AValue[I] of
|
CP := GetCodePoint(AValue,I);
|
||||||
|
//CharSets can only contain 1-byte codepoints
|
||||||
|
if (Length(CP) <> 1) then CharSetError;
|
||||||
|
case CP of
|
||||||
'-':
|
'-':
|
||||||
begin
|
begin
|
||||||
if Last = '-' then CharSetError;
|
if Last = '-' then CharSetError;
|
||||||
Inc(I);
|
Inc(I);
|
||||||
|
if (I > Utf8Length(AValue)) then CharSetError;
|
||||||
if (I > Length(AValue)) then CharSetError;
|
CP := GetCodePoint(AValue, I);
|
||||||
|
if (Length(CP) <> 1) then CharSetError;
|
||||||
if fCaseSensitive then
|
if fCaseSensitive then
|
||||||
begin
|
begin
|
||||||
//DebugLn('Set: ' + Last + '-' + (AValue[I]));
|
//writeln('MaskUtf8: Set: ' + Last + '-' + (CP[1]));
|
||||||
for C := Last to (AValue[I]) do Include(CharSet, C);
|
for C := Last to CP[1] do
|
||||||
|
Include(CharSet, C)
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
//DebugLn('Set: ' + Last + '-' + UpCase(AValue[I]));
|
//writeln('MaskUtf8: Set: ' + Last + '-' + UpCase(CP[1]));
|
||||||
for C := Last to UpCase(AValue[I]) do Include(CharSet, C);
|
for C := Last to UpCase(CP[1]) do
|
||||||
|
Include(CharSet, C)
|
||||||
end;
|
end;
|
||||||
Inc(I);
|
Inc(I);
|
||||||
end;
|
end;
|
||||||
@ -230,15 +259,14 @@ var
|
|||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
if fCaseSensitive then
|
if fCaseSensitive then
|
||||||
Last := AValue[I]
|
Last := CP[1]
|
||||||
else
|
else
|
||||||
Last := UpCase(AValue[I]);
|
Last := UpCase(CP[1]);
|
||||||
Include(CharSet, Last);
|
Include(CharSet, Last);
|
||||||
Inc(I);
|
Inc(I);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if (not Valid) or (CharSet = []) then CharSetError;
|
if (not Valid) or (CharSet = []) then CharSetError;
|
||||||
|
|
||||||
New(FMask.Chars[High(FMask.Chars)].SetValue);
|
New(FMask.Chars[High(FMask.Chars)].SetValue);
|
||||||
@ -259,9 +287,9 @@ var
|
|||||||
begin
|
begin
|
||||||
CharType := mcChar;
|
CharType := mcChar;
|
||||||
if fCaseSensitive then
|
if fCaseSensitive then
|
||||||
CharValue := AValue[I]
|
CharValue := GetCodePoint(AValue,I)
|
||||||
else
|
else
|
||||||
CharValue := UpCase(AValue[I]);
|
CharValue := Utf8UpperCase(GetCodePoint(AValue,I));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
Inc(FMask.MinLength);
|
Inc(FMask.MinLength);
|
||||||
@ -278,9 +306,9 @@ begin
|
|||||||
SkipAnyText := False;
|
SkipAnyText := False;
|
||||||
|
|
||||||
I := 1;
|
I := 1;
|
||||||
while I <= Length(AValue) do
|
while I <= Utf8Length(AValue) do
|
||||||
begin
|
begin
|
||||||
case AValue[I] of
|
case GetCodePoint(AValue,I) of
|
||||||
'*': AddAnyText;
|
'*': AddAnyText;
|
||||||
'?': AddAnyChar;
|
'?': AddAnyChar;
|
||||||
'[': AddCharSet;
|
'[': AddCharSet;
|
||||||
@ -316,13 +344,14 @@ function TMask.Matches(const AFileName: String): Boolean;
|
|||||||
var
|
var
|
||||||
L: Integer;
|
L: Integer;
|
||||||
S: String;
|
S: String;
|
||||||
|
|
||||||
function MatchToEnd(MaskIndex, CharIndex: Integer): Boolean;
|
function MatchToEnd(MaskIndex, CharIndex: Integer): Boolean;
|
||||||
var
|
var
|
||||||
I, J: Integer;
|
I, J: Integer;
|
||||||
|
CP: TUtf8Char;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
|
|
||||||
for I := MaskIndex to High(FMask.Chars) do
|
for I := MaskIndex to High(FMask.Chars) do
|
||||||
begin
|
begin
|
||||||
case FMask.Chars[I].CharType of
|
case FMask.Chars[I].CharType of
|
||||||
@ -330,14 +359,17 @@ var
|
|||||||
begin
|
begin
|
||||||
if CharIndex > L then Exit;
|
if CharIndex > L then Exit;
|
||||||
//DebugLn('Match ' + S[CharIndex] + '<?>' + FMask.Chars[I].CharValue);
|
//DebugLn('Match ' + S[CharIndex] + '<?>' + FMask.Chars[I].CharValue);
|
||||||
if S[CharIndex] <> FMask.Chars[I].CharValue then Exit;
|
if GetCodePoint(S,CharIndex) <> FMask.Chars[I].CharValue then Exit;
|
||||||
Inc(CharIndex);
|
Inc(CharIndex);
|
||||||
end;
|
end;
|
||||||
mcCharSet:
|
mcCharSet:
|
||||||
begin
|
begin
|
||||||
if CharIndex > L then Exit;
|
if CharIndex > L then Exit;
|
||||||
|
CP := GetCodePoint(S, CharIndex);
|
||||||
|
//There are only 1-byte codepoints in charsets
|
||||||
|
if (Length(CP) <> 1) then Exit;
|
||||||
if FMask.Chars[I].Negative xor
|
if FMask.Chars[I].Negative xor
|
||||||
(S[CharIndex] in FMask.Chars[I].SetValue^) then Inc(CharIndex)
|
(Cp[1] in FMask.Chars[I].SetValue^) then Inc(CharIndex)
|
||||||
else Exit;
|
else Exit;
|
||||||
end;
|
end;
|
||||||
mcAnyChar:
|
mcAnyChar:
|
||||||
@ -352,7 +384,7 @@ var
|
|||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
for J := CharIndex to L do
|
for J := CharIndex to L do
|
||||||
if MatchToEnd(I + 1, J) then
|
if MatchToEnd(I + 1, J) then
|
||||||
begin
|
begin
|
||||||
@ -362,24 +394,24 @@ var
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
Result := CharIndex > L;
|
Result := CharIndex > L;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
L := Length(AFileName);
|
L := Utf8Length(AFileName);
|
||||||
if L = 0 then
|
if L = 0 then
|
||||||
begin
|
begin
|
||||||
if FMask.MinLength = 0 then Result := True;
|
if FMask.MinLength = 0 then Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if (L < FMask.MinLength) or (L > FMask.MaxLength) then Exit;
|
if (L < FMask.MinLength) or (L > FMask.MaxLength) then Exit;
|
||||||
if fCaseSensitive then
|
if fCaseSensitive then
|
||||||
S := AFileName
|
S := AFileName
|
||||||
else
|
else
|
||||||
S := UpperCase(AFileName);
|
S := Utf8UpperCase(AFileName);
|
||||||
Result := MatchToEnd(0, 1);
|
Result := MatchToEnd(0, 1);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -394,10 +426,10 @@ begin
|
|||||||
// foo. matches only foo but not foo.txt
|
// foo. matches only foo but not foo.txt
|
||||||
// foo.* -> match either foo or foo.*
|
// foo.* -> match either foo or foo.*
|
||||||
|
|
||||||
if (Length(fInitialMask) > 2) and (RightStr(fInitialMask,3) = '*.*') then
|
if (Utf8Length(fInitialMask) > 2) and (Utf8RightStr(fInitialMask,3) = '*.*') then
|
||||||
// foo*.*
|
// foo*.*
|
||||||
begin
|
begin
|
||||||
NewMaskValue := Copy(fInitialMask,1,Length(fInitialMask)-2);
|
NewMaskValue := Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2);
|
||||||
ClearMaskString;
|
ClearMaskString;
|
||||||
InitMaskString(NewMaskValue, fCaseSensitive);
|
InitMaskString(NewMaskValue, fCaseSensitive);
|
||||||
Result := Matches(AFileName);
|
Result := Matches(AFileName);
|
||||||
@ -406,7 +438,7 @@ begin
|
|||||||
InitMaskString(fInitialMask, fCaseSensitive);
|
InitMaskString(fInitialMask, fCaseSensitive);
|
||||||
end
|
end
|
||||||
//else if (Length(fInitialMask) > 1) and (RightStr(fInitialMask,2) = '*.') then
|
//else if (Length(fInitialMask) > 1) and (RightStr(fInitialMask,2) = '*.') then
|
||||||
else if (Length(fInitialMask) > 1) and (fInitialMask[Length(fInitialMask)] = '.') then
|
else if (Utf8Length(fInitialMask) > 1) and (GetCodePoint(fInitialMask,Utf8Length(fInitialMask)) = '.') then
|
||||||
|
|
||||||
//foo*. or *. or foo.
|
//foo*. or *. or foo.
|
||||||
begin
|
begin
|
||||||
@ -415,7 +447,7 @@ begin
|
|||||||
Ext := ExtractFileExt(AFileName);
|
Ext := ExtractFileExt(AFileName);
|
||||||
if (Ext = '') or (Ext = AFileName) then
|
if (Ext = '') or (Ext = AFileName) then
|
||||||
begin
|
begin
|
||||||
NewMaskValue := Copy(fInitialMask,1,Length(fInitialMask)-1);
|
NewMaskValue := Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-1);
|
||||||
ClearMaskString;
|
ClearMaskString;
|
||||||
InitMaskString(NewMaskValue, fCaseSensitive);
|
InitMaskString(NewMaskValue, fCaseSensitive);
|
||||||
Result := Matches(AFileName);
|
Result := Matches(AFileName);
|
||||||
@ -428,14 +460,14 @@ begin
|
|||||||
Result := False;
|
Result := False;
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
else if (Length(fInitialMask) > 2) and (RightStr(fInitialMask,2) = '.*') then
|
else if (Utf8Length(fInitialMask) > 2) and (Utf8RightStr(fInitialMask,2) = '.*') then
|
||||||
//foo.* (but not '.*')
|
//foo.* (but not '.*')
|
||||||
begin
|
begin
|
||||||
//First see if we have 'foo'
|
//First see if we have 'foo'
|
||||||
if fCaseSensitive then
|
if fCaseSensitive then
|
||||||
Result := (AFileName = Copy(fInitialMask,1,Length(fInitialMask)-2))
|
Result := (AFileName = Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2))
|
||||||
else
|
else
|
||||||
Result := (CompareText(AFileName,Copy(fInitialMask,1,Length(fInitialMask)-2)) = 0);
|
Result := (Utf8CompareText(AFileName,Utf8Copy(fInitialMask,1,Utf8Length(fInitialMask)-2)) = 0);
|
||||||
if not Result then Result := Matches(AFileName);
|
if not Result then Result := Matches(AFileName);
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@ -462,7 +494,7 @@ begin
|
|||||||
S := I + 1;
|
S := I + 1;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
if Length(AText) >= S then Add(Copy(AText, S, Length(AText) - S + 1));
|
if Length(AText) >= S then Add(Copy(AText, S, Length(AText) - S + 1));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -484,7 +516,7 @@ var
|
|||||||
I: Integer;
|
I: Integer;
|
||||||
begin
|
begin
|
||||||
FMasks := TObjectList.Create(True);
|
FMasks := TObjectList.Create(True);
|
||||||
|
|
||||||
S := TParseStringList.Create(AValue, ASeparator);
|
S := TParseStringList.Create(AValue, ASeparator);
|
||||||
try
|
try
|
||||||
for I := 0 to S.Count - 1 do
|
for I := 0 to S.Count - 1 do
|
||||||
@ -497,7 +529,7 @@ end;
|
|||||||
destructor TMaskList.Destroy;
|
destructor TMaskList.Destroy;
|
||||||
begin
|
begin
|
||||||
FMasks.Free;
|
FMasks.Free;
|
||||||
|
|
||||||
inherited Destroy;
|
inherited Destroy;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -506,7 +538,7 @@ var
|
|||||||
I: Integer;
|
I: Integer;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
|
|
||||||
for I := 0 to FMasks.Count - 1 do
|
for I := 0 to FMasks.Count - 1 do
|
||||||
begin
|
begin
|
||||||
if TMask(FMasks.Items[I]).Matches(AFileName) then
|
if TMask(FMasks.Items[I]).Matches(AFileName) then
|
||||||
|
Loading…
Reference in New Issue
Block a user