fcl-passrc: resolver: split source and target codepage

git-svn-id: trunk@47830 -
This commit is contained in:
Mattias Gaertner 2020-12-21 15:50:50 +00:00
parent 049af48bd8
commit fe4fda2a35
3 changed files with 260 additions and 95 deletions

View File

@ -564,6 +564,7 @@ type
TResEvalString = class(TResEvalValue) TResEvalString = class(TResEvalValue)
public public
S: RawByteString; S: RawByteString;
OnlyASCII: boolean;
constructor Create; override; constructor Create; override;
constructor CreateValue(const aValue: RawByteString); constructor CreateValue(const aValue: RawByteString);
function Clone: TResEvalValue; override; function Clone: TResEvalValue; override;
@ -692,7 +693,8 @@ type
private private
FAllowedInts: TResEvalTypedInts; FAllowedInts: TResEvalTypedInts;
{$ifdef FPC_HAS_CPSTRING} {$ifdef FPC_HAS_CPSTRING}
FDefaultEncoding: TSystemCodePage; FDefaultSourceEncoding: TSystemCodePage;
FDefaultStringEncoding: TSystemCodePage;
{$endif} {$endif}
FOnEvalIdentifier: TPasResEvalIdentHandler; FOnEvalIdentifier: TPasResEvalIdentHandler;
FOnEvalParams: TPasResEvalParamsHandler; FOnEvalParams: TPasResEvalParamsHandler;
@ -779,6 +781,8 @@ type
function GetUTF8Str(const s: RawByteString; ErrorEl: TPasElement): String; function GetUTF8Str(const s: RawByteString; ErrorEl: TPasElement): String;
function GetUnicodeStr(const s: RawByteString; ErrorEl: TPasElement): UnicodeString; function GetUnicodeStr(const s: RawByteString; ErrorEl: TPasElement): UnicodeString;
function GetWideChar(const s: RawByteString; out w: WideChar): boolean; function GetWideChar(const s: RawByteString; out w: WideChar): boolean;
function GetExprStringTargetCP(Expr: TPasExpr): TSystemCodePage; virtual; // e.g. var s: String(1234) = 'ä' return 1234
function GetExprStringSourceCP(Expr: TPasExpr): TSystemCodePage; virtual; // e.g. {$codepage 123}var s: String = 'ä' return 123
{$endif} {$endif}
property OnLog: TPasResEvalLogHandler read FOnLog write FOnLog; property OnLog: TPasResEvalLogHandler read FOnLog write FOnLog;
property OnEvalIdentifier: TPasResEvalIdentHandler read FOnEvalIdentifier write FOnEvalIdentifier; property OnEvalIdentifier: TPasResEvalIdentHandler read FOnEvalIdentifier write FOnEvalIdentifier;
@ -786,7 +790,8 @@ type
property OnRangeCheckEl: TPasResEvalRangeCheckElHandler read FOnRangeCheckEl write FOnRangeCheckEl; property OnRangeCheckEl: TPasResEvalRangeCheckElHandler read FOnRangeCheckEl write FOnRangeCheckEl;
property AllowedInts: TResEvalTypedInts read FAllowedInts write FAllowedInts; property AllowedInts: TResEvalTypedInts read FAllowedInts write FAllowedInts;
{$ifdef FPC_HAS_CPSTRING} {$ifdef FPC_HAS_CPSTRING}
property DefaultStringCodePage: TSystemCodePage read FDefaultEncoding write FDefaultEncoding; property DefaultSourceCodePage: TSystemCodePage read FDefaultSourceEncoding write FDefaultSourceEncoding;
property DefaultStringCodePage: TSystemCodePage read FDefaultStringEncoding write FDefaultStringEncoding;
{$endif} {$endif}
end; end;
TResExprEvaluatorClass = class of TResExprEvaluator; TResExprEvaluatorClass = class of TResExprEvaluator;
@ -4126,15 +4131,22 @@ end;
function TResExprEvaluator.EvalPrimitiveExprString(Expr: TPrimitiveExpr function TResExprEvaluator.EvalPrimitiveExprString(Expr: TPrimitiveExpr
): TResEvalValue; ): TResEvalValue;
{ Extracts the value from a Pascal string literal //Extracts the value from a Pascal string literal
//
S is a Pascal string literal e.g. 'Line'#10 // S is a Pascal string literal e.g. 'Line'#10
'' empty string // '' empty string
'''' => "'" // '''' => "'"
#decimal // #decimal
#$hex // #$hex
^l l is a letter a-z // ^l l is a letter a-z
} //
// Codepage:
// For example {$codepage utf8}var s: AnsiString(CP_1251) = 'a';
// Source codepage is CP_UTF8, target codepage is CP_1251
//
// Source codepage is needed for reading non ASCII string literals 'ä'.
// Target codepage is needed for reading non ASCII # literals.
// Target codepage costs time to compute.
procedure RangeError(id: TMaxPrecInt); procedure RangeError(id: TMaxPrecInt);
begin begin
@ -4142,24 +4154,36 @@ function TResExprEvaluator.EvalPrimitiveExprString(Expr: TPrimitiveExpr
RaiseRangeCheck(id,Expr); RaiseRangeCheck(id,Expr);
end; end;
procedure Add(h: String); {$IFDEF FPC_HAS_CPSTRING}
var
TargetCPValid: boolean;
TargetCP: word;
SourceCPValid: boolean;
SourceCP: word;
procedure FetchSourceCP;
begin begin
{$ifdef FPC_HAS_CPSTRING} if SourceCPValid then exit;
if Result.Kind=revkString then SourceCP:=GetExprStringSourceCP(Expr);
TResEvalString(Result).S:=TResEvalString(Result).S+h if SourceCP=DefaultSystemCodePage then
else SourceCP:=CP_ACP;
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+GetUnicodeStr(h,Expr); SourceCPValid:=true;
{$else}
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+h;
{$endif}
end; end;
procedure AddHash(u: longword; ForceUTF16: boolean); procedure FetchTargetCP;
{$ifdef FPC_HAS_CPSTRING} begin
if TargetCPValid then exit;
TargetCP:=GetExprStringTargetCP(Expr);
if TargetCP=DefaultSystemCodePage then
TargetCP:=CP_ACP;
TargetCPValid:=true;
end;
procedure ForceUTF16;
var var
h: RawByteString; h: RawByteString;
begin begin
if ((u>255) or ForceUTF16) and (Result.Kind=revkString) then if Result.Kind=revkString then
begin begin
// switch to unicodestring // switch to unicodestring
h:=TResEvalString(Result).S; h:=TResEvalString(Result).S;
@ -4167,22 +4191,196 @@ function TResExprEvaluator.EvalPrimitiveExprString(Expr: TPrimitiveExpr
Result:=nil; // in case of exception in GetUnicodeStr Result:=nil; // in case of exception in GetUnicodeStr
Result:=TResEvalUTF16.CreateValue(GetUnicodeStr(h,Expr)); Result:=TResEvalUTF16.CreateValue(GetUnicodeStr(h,Expr));
end; end;
end;
{$ENDIF}
procedure AddSrc(h: String);
{$ifdef FPC_HAS_CPSTRING}
var
Value: TResEvalString;
OnlyASCII: Boolean;
i: Integer;
{$ENDIF}
begin
if h='' then exit;
{$ifdef FPC_HAS_CPSTRING}
OnlyASCII:=true;
for i:=1 to length(h) do
if ord(h[i])>127 then
begin
// append non ASCII -> needs codepage
OnlyASCII:=false;
FetchSourceCP;
SetCodePage(rawbytestring(h),SourceCP,false);
break;
end;
if Result.Kind=revkString then if Result.Kind=revkString then
TResEvalString(Result).S:=TResEvalString(Result).S+Chr(u) begin
Value:=TResEvalString(Result);
if OnlyASCII and Value.OnlyASCII then
begin
// concatenate ascii strings
Value.S:=Value.S+h;
exit;
end;
// concatenate non ascii strings
FetchTargetCP;
case TargetCP of
CP_UTF16:
begin
ForceUTF16;
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+GetUnicodeStr(h,Expr);
//writeln('AddSrc len(h)=',length(h),' StringCodePage=',StringCodePage(h),' GetCodePage=',GetCodePage(h),' S=',length(TResEvalUTF16(Result).S));
end;
CP_UTF16BE:
RaiseNotYetImplemented(20201220222608,Expr);
else
begin
if Value.OnlyASCII and (Value.S<>'') then
SetCodePage(Value.S,TargetCP,false);
Value.S:=Value.S+h;
end;
end;
end
else
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+GetUnicodeStr(h,Expr);
{$else}
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+h;
{$endif}
end;
procedure AddHash(u: longword);
{$ifdef FPC_HAS_CPSTRING}
begin
if Result.Kind=revkString then
TResEvalString(Result).s:=TResEvalString(Result).S+Chr(u)
else else
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+WideChar(u); TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+WideChar(u);
end; end;
{$else} {$else}
begin begin
TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+WideChar(u); TResEvalUTF16(Result).S:=TResEvalUTF16(Result).S+WideChar(u);
if ForceUTF16 then ;
end; end;
{$endif} {$endif}
function ReadHash(Value: TResEvalValue; const S: string; p, l: integer): integer;
var
StartP: Integer;
u: longword;
c: Char;
{$ifdef FPC_HAS_CPSTRING}
ValueAnsi: TResEvalString;
ValueUTF16: TResEvalUTF16;
OldCP: TSystemCodePage;
{$ENDIF}
begin
Result:=p;
inc(Result);
if Result>l then
RaiseInternalError(20181016121354); // error in scanner
if S[Result]='$' then
begin
// #$hexnumber
inc(Result);
StartP:=Result;
u:=0;
while Result<=l do
begin
c:=S[Result];
case c of
'0'..'9': u:=u*16+longword(ord(c)-ord('0'));
'a'..'f': u:=u*16+longword(ord(c)-ord('a'))+10;
'A'..'F': u:=u*16+longword(ord(c)-ord('A'))+10;
else break;
end;
if u>$10FFFF then
RangeError(20170523115712);
inc(Result);
end;
end
else
begin
// #decimalnumber
StartP:=Result;
u:=0;
while Result<=l do
begin
c:=S[Result];
case c of
'0'..'9': u:=u*10+longword(ord(c)-ord('0'));
else break;
end;
if u>$ffff then
RangeError(20170523123137);
inc(Result);
end;
end;
if Result=StartP then
RaiseInternalError(20170523123806);
{$IFDEF FPC_HAS_CPSTRING}
if u<128 then
begin
// ASCII
AddHash(u);
exit;
end;
// non ASCII
FetchTargetCP;
if (TargetCP=CP_UTF16) or (TargetCP=CP_UTF16BE) or (u>255) then
begin
ForceUTF16;
ValueUTF16:=TResEvalUTF16(Value);
if u>$ffff then
begin
// split into two
dec(u,$10000);
ValueUTF16.S:=ValueUTF16.S+WideChar($D800+(u shr 10));
ValueUTF16.S:=ValueUTF16.S+WideChar($DC00+(u and $3ff));
end
else
ValueUTF16.S:=ValueUTF16.S+WideChar(u);
if TargetCP=CP_UTF16BE then
RaiseNotYetImplemented(20201220212206,Expr);
end
else
begin
// byte encoding
ValueAnsi:=TResEvalString(Value);
if ValueAnsi.S<>'' then
begin
// append
OldCP:=StringCodePage(ValueAnsi.S);
if OldCP<>TargetCP then
SetCodePage(ValueAnsi.S,TargetCP,false);
ValueAnsi.S:=ValueAnsi.S+Chr(u);
end
else
begin
// start
ValueAnsi.S:=Chr(u);
SetCodePage(ValueAnsi.S,TargetCP,false);
end;
ValueAnsi.OnlyASCII:=false;
end;
{$ELSE}
if u>$ffff then
begin
// split into two
dec(u,$10000);
AddHash($D800+(u shr 10));
AddHash($DC00+(u and $3ff));
end
else
AddHash(u);
{$ENDIF}
end;
var var
p, StartP, l: integer; p, StartP, l: integer;
c: Char; c: Char;
u: longword;
S: String; S: String;
begin begin
Result:=nil; Result:=nil;
@ -4194,6 +4392,10 @@ begin
if l=0 then if l=0 then
RaiseInternalError(20170523113809); RaiseInternalError(20170523113809);
{$ifdef FPC_HAS_CPSTRING} {$ifdef FPC_HAS_CPSTRING}
TargetCPValid:=false;
TargetCP:=CP_ACP;
SourceCPValid:=false;
SourceCP:=CP_ACP;
Result:=TResEvalString.Create; Result:=TResEvalString.Create;
{$else} {$else}
Result:=TResEvalUTF16.Create; Result:=TResEvalUTF16.Create;
@ -4216,12 +4418,12 @@ begin
'''': '''':
begin begin
if p>StartP then if p>StartP then
Add(copy(S,StartP,p-StartP)); AddSrc(copy(S,StartP,p-StartP));
inc(p); inc(p);
StartP:=p; StartP:=p;
if (p>l) or (S[p]<>'''') then if (p>l) or (S[p]<>'''') then
break; break;
Add(''''); AddSrc('''');
inc(p); inc(p);
StartP:=p; StartP:=p;
end; end;
@ -4230,65 +4432,10 @@ begin
end; end;
until false; until false;
if p>StartP then if p>StartP then
Add(copy(S,StartP,p-StartP)); AddSrc(copy(S,StartP,p-StartP));
end; end;
'#': '#':
begin p:=ReadHash(Result,S,p,l);
inc(p);
if p>l then
RaiseInternalError(20181016121354);
if S[p]='$' then
begin
// #$hexnumber
inc(p);
StartP:=p;
u:=0;
while p<=l do
begin
c:=S[p];
case c of
'0'..'9': u:=u*16+longword(ord(c)-ord('0'));
'a'..'f': u:=u*16+longword(ord(c)-ord('a'))+10;
'A'..'F': u:=u*16+longword(ord(c)-ord('A'))+10;
else break;
end;
if u>$10FFFF then
RangeError(20170523115712);
inc(p);
end;
if p=StartP then
RaiseInternalError(20170207164956);
if u>$ffff then
begin
// split into two
dec(u,$10000);
AddHash($D800+(u shr 10),true);
AddHash($DC00+(u and $3ff),true);
end
else
AddHash(u,p-StartP>2);
end
else
begin
// #decimalnumber
StartP:=p;
u:=0;
while p<=l do
begin
c:=S[p];
case c of
'0'..'9': u:=u*10+longword(ord(c)-ord('0'));
else break;
end;
if u>$ffff then
RangeError(20170523123137);
inc(p);
end;
if p=StartP then
RaiseInternalError(20170523123806);
AddHash(u,(S[StartP]='0') and (u>0));
end;
end;
'^': '^':
begin begin
// ^A is #1 // ^A is #1
@ -4297,8 +4444,8 @@ begin
RaiseInternalError(20181016121520); RaiseInternalError(20181016121520);
c:=S[p]; c:=S[p];
case c of case c of
'a'..'z': AddHash(ord(c)-ord('a')+1,false); 'a'..'z': AddHash(ord(c)-ord('a')+1);
'A'..'Z': AddHash(ord(c)-ord('A')+1,false); 'A'..'Z': AddHash(ord(c)-ord('A')+1);
else RaiseInternalError(20170523123809); else RaiseInternalError(20170523123809);
end; end;
inc(p); inc(p);
@ -4324,7 +4471,8 @@ begin
inherited Create; inherited Create;
FAllowedInts:=ReitDefaults; FAllowedInts:=ReitDefaults;
{$ifdef FPC_HAS_CPSTRING} {$ifdef FPC_HAS_CPSTRING}
FDefaultEncoding:=CP_ACP; FDefaultSourceEncoding:=system.DefaultSystemCodePage;
FDefaultStringEncoding:=CP_ACP;
{$endif} {$endif}
end; end;
@ -5116,11 +5264,11 @@ end;
function TResExprEvaluator.GetCodePage(const s: RawByteString): TSystemCodePage; function TResExprEvaluator.GetCodePage(const s: RawByteString): TSystemCodePage;
begin begin
if s='' then exit(DefaultStringCodePage); if s='' then exit(DefaultSourceCodePage);
Result:=StringCodePage(s); Result:=StringCodePage(s);
if (Result=CP_ACP) or (Result=CP_NONE) then if (Result=CP_ACP) or (Result=CP_NONE) then
begin begin
Result:=DefaultStringCodePage; Result:=DefaultSourceCodePage;
if (Result=CP_ACP) or (Result=CP_NONE) then if (Result=CP_ACP) or (Result=CP_NONE) then
begin begin
Result:=System.DefaultSystemCodePage; Result:=System.DefaultSystemCodePage;
@ -5182,7 +5330,7 @@ var
begin begin
if s='' then exit(''); if s='' then exit('');
CP:=GetCodePage(s); CP:=GetCodePage(s);
if CP=CP_UTF8 then if (CP=CP_UTF8) or ((CP=CP_ACP) and (DefaultSystemCodePage=CP_UTF8)) then
begin begin
if ErrorEl<>nil then if ErrorEl<>nil then
CheckValidUTF8(s,ErrorEl); CheckValidUTF8(s,ErrorEl);
@ -5217,6 +5365,20 @@ begin
Result:=true; Result:=true;
end; end;
end; end;
function TResExprEvaluator.GetExprStringTargetCP(Expr: TPasExpr
): TSystemCodePage;
begin
Result:=DefaultStringCodePage;
if Expr=nil then ;
end;
function TResExprEvaluator.GetExprStringSourceCP(Expr: TPasExpr
): TSystemCodePage;
begin
Result:=DefaultSourceCodePage;
if Expr=nil then ;
end;
{$endif} {$endif}
procedure TResExprEvaluator.PredBool(Value: TResEvalBool; ErrorEl: TPasElement); procedure TResExprEvaluator.PredBool(Value: TResEvalBool; ErrorEl: TPasElement);
@ -5565,6 +5727,7 @@ end;
constructor TResEvalString.Create; constructor TResEvalString.Create;
begin begin
inherited Create; inherited Create;
OnlyASCII:=true;
Kind:=revkString; Kind:=revkString;
end; end;
@ -5578,6 +5741,7 @@ function TResEvalString.Clone: TResEvalValue;
begin begin
Result:=inherited Clone; Result:=inherited Clone;
TResEvalString(Result).S:=S; TResEvalString(Result).S:=S;
TResEvalString(Result).OnlyASCII:=OnlyASCII;
end; end;
function TResEvalString.AsString: string; function TResEvalString.AsString: string;

View File

@ -15704,7 +15704,7 @@ begin
end; end;
{$endif} {$endif}
revkUnicodeString: revkUnicodeString:
if length(TResEvalUTF16(Value).S)=1 then if (length(TResEvalUTF16(Value).S)=1) and (bt in btAllChars) then
begin begin
w:=TResEvalUTF16(Value).S[1]; w:=TResEvalUTF16(Value).S[1];
{$ifdef FPC_HAS_CPSTRING} {$ifdef FPC_HAS_CPSTRING}

View File

@ -299,7 +299,7 @@ type
Procedure TestIntegerBoolFail; Procedure TestIntegerBoolFail;
Procedure TestBooleanOperators; Procedure TestBooleanOperators;
Procedure TestStringOperators; Procedure TestStringOperators;
Procedure TestWideCharOperators; Procedure TestWideCharOperators_DelphiUnicode;
Procedure TestFloatOperators; Procedure TestFloatOperators;
Procedure TestCAssignments; Procedure TestCAssignments;
Procedure TestTypeCastBaseTypes; Procedure TestTypeCastBaseTypes;
@ -2181,6 +2181,7 @@ begin
Result.OnFindUnit:=@OnPasResolverFindUnit; Result.OnFindUnit:=@OnPasResolverFindUnit;
Result.OnLog:=@OnPasResolverLog; Result.OnLog:=@OnPasResolverLog;
Result.Hub:=Hub; Result.Hub:=Hub;
Result.ExprEvaluator.DefaultSourceCodePage:=CP_UTF8;
FModules.Add(Result); FModules.Add(Result);
end; end;
@ -4678,9 +4679,9 @@ begin
ParseProgram; ParseProgram;
end; end;
procedure TTestResolver.TestWideCharOperators; procedure TTestResolver.TestWideCharOperators_DelphiUnicode;
begin begin
ResolverEngine.ExprEvaluator.DefaultStringCodePage:=CP_UTF8; ResolverEngine.ExprEvaluator.DefaultStringCodePage:=CP_UTF16;
ResolverEngine.BaseTypeChar:=btWideChar; ResolverEngine.BaseTypeChar:=btWideChar;
ResolverEngine.BaseTypeString:=btUnicodeString; ResolverEngine.BaseTypeString:=btUnicodeString;
StartProgram(false); StartProgram(false);