From 1a560e98756f30a7789d8089230c251e8f3835c9 Mon Sep 17 00:00:00 2001 From: Jonas Maebe Date: Fri, 19 Jul 2013 16:33:14 +0000 Subject: [PATCH] * when concatenating ansistrings, do not map CP_NONE (rawbytestring) to CP_ACP (defaultsystemcodepage), because if all input strings have the same code page then the result should also have that code page if it's assigned to a rawbytestring rather than getting defaultsystemcodepage * do not consider empty strings to determine the code page of the result in fpc_AnsiStr_Concat_multi(), because that will cause a different result than when using a sequence of fpc_AnsiStr_Concat() calls (it ignores empty strings to determine the result code page) and it's also slower * do not consider the run time code page of the destination string in fpc_AnsiStr_Concat(_multi)() because Delphi does not do so either. This was introduced in r19118, probably to hide another bug + test git-svn-id: branches/cpstrrtl@25143 - --- .gitattributes | 1 + compiler/nadd.pas | 8 +++- compiler/nopt.pas | 8 +++- compiler/symdef.pas | 6 ++- rtl/inc/astrings.inc | 89 +++++++++++++++++++++++-------------- rtl/java/jastrings.inc | 73 +++++++++++++++++++----------- tests/test/tcpstrconcat4.pp | 49 ++++++++++++++++++++ 7 files changed, 170 insertions(+), 64 deletions(-) create mode 100644 tests/test/tcpstrconcat4.pp diff --git a/.gitattributes b/.gitattributes index f2494cc5c6..de3844be98 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11011,6 +11011,7 @@ tests/test/tcpstrchar2ansistr.pp svneol=native#text/plain tests/test/tcpstrconcat.pp svneol=native#text/plain tests/test/tcpstrconcat2.pp svneol=native#text/plain tests/test/tcpstrconcat3.pp svneol=native#text/plain +tests/test/tcpstrconcat4.pp svneol=native#text/plain tests/test/tcpstrconcatmulti.pp svneol=native#text/plain tests/test/tcpstrconcatmulti2.pp svneol=native#text/plain tests/test/tcpstrpchar2ansistr.pp svneol=native#text/plain diff --git a/compiler/nadd.pas b/compiler/nadd.pas index 396df1576f..f9bcbb7494 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -2170,7 +2170,9 @@ implementation if is_ansistring(resultdef) then para:=ccallparanode.create( cordconstnode.create( - getparaencoding(resultdef), + { don't use getparaencoding(), we have to know + when the result is rawbytestring } + tstringdef(resultdef).encoding, u16inttype, true ), @@ -2208,7 +2210,9 @@ implementation if is_ansistring(resultdef) then para:=ccallparanode.create( cordconstnode.create( - getparaencoding(resultdef), + { don't use getparaencoding(), we have to know + when the result is rawbytestring } + tstringdef(resultdef).encoding, u16inttype, true ), diff --git a/compiler/nopt.pas b/compiler/nopt.pas index c5fe243a9a..b81419faa6 100644 --- a/compiler/nopt.pas +++ b/compiler/nopt.pas @@ -349,7 +349,9 @@ begin if is_ansistring(p.resultdef) then para:=ccallparanode.create( cordconstnode.create( - getparaencoding(p.resultdef), + { don't use getparaencoding(), we have to know + when the result is rawbytestring } + tstringdef(p.resultdef).encoding, u16inttype, true ), @@ -383,7 +385,9 @@ begin if is_ansistring(p.resultdef) then para:=ccallparanode.create( cordconstnode.create( - getparaencoding(p.resultdef), + { don't use getparaencoding(), we have to know + when the result is rawbytestring } + tstringdef(p.resultdef).encoding, u16inttype, true ), diff --git a/compiler/symdef.pas b/compiler/symdef.pas index 65ce202f02..8465b9db4b 100644 --- a/compiler/symdef.pas +++ b/compiler/symdef.pas @@ -1125,7 +1125,11 @@ implementation function getparaencoding(def:tdef):tstringencoding; inline; begin { don't pass CP_NONE encoding to internal functions - they expect 0 encoding instead } + they expect 0 encoding instead + exception: result of string concatenation, because if you pass the + result of a string concatenation to a rawbytestring, the result of + that concatenation shouldn't be converted to defaultsystemcodepage + if all strings have the same type } result:=tstringdef(def).encoding; if result=CP_NONE then result:=0 diff --git a/rtl/inc/astrings.inc b/rtl/inc/astrings.inc index 362583d5d7..bbc8b8d854 100644 --- a/rtl/inc/astrings.inc +++ b/rtl/inc/astrings.inc @@ -214,10 +214,9 @@ Var S1CP, S2CP, DestCP: TSystemCodePage; begin {$ifdef FPC_HAS_CPSTRING} - if (Pointer(DestS)=nil) then - DestCP:=cp - else - DestCP:=StringCodePage(DestS); + DestCP:=cp; + if DestCp=CP_NONE then + DestCP:=DefaultSystemCodePage; {$else FPC_HAS_CPSTRING} DestCP:=StringCodePage(DestS); {$endif FPC_HAS_CPSTRING} @@ -235,6 +234,13 @@ begin else S2CP:=StringCodePage(S2); S2CP:=TranslatePlaceholderCP(S2CP); +{$ifdef FPC_HAS_CPSTRING} + { if the result is rawbytestring and both strings have the same code page, + keep that code page } + if (cp=CP_NONE) and + (S1CP=S2CP) then + DestCP:=S1CP; +{$endif FPC_HAS_CPSTRING} if (S1CP<>DestCP) or (S2CP<>DestCP) then begin ansistr_concat_complex(DestS,S1,S2,DestCP); @@ -284,15 +290,17 @@ end; {$define FPC_HAS_ANSISTR_CONCAT_MULTI} procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc; Var - lowstart,i : Longint; + lowstart, + nonemptystart, + i : Longint; p,pc : pointer; Size,NewLen, OldDestLen : SizeInt; destcopy : pointer; - DestCP : TSystemCodePage; U : UnicodeString; - sameCP : Boolean; + DestCP, tmpCP : TSystemCodePage; + sameCP : Boolean; begin if high(sarr)=0 then begin @@ -300,20 +308,26 @@ begin exit; end; {$ifdef FPC_HAS_CPSTRING} - if (Pointer(DestS)=nil) then - DestCP:=cp - else - DestCP:=StringCodePage(DestS); + DestCP:=cp; + if DestCp=CP_NONE then + DestCP:=DefaultSystemCodePage; {$else FPC_HAS_CPSTRING} DestCP:=StringCodePage(DestS); {$endif FPC_HAS_CPSTRING} DestCP:=TranslatePlaceholderCP(DestCP); sameCP:=true; lowstart:=low(sarr); - for i:=lowstart to high(sarr) do + { skip empty strings } + while (lowstart<=high(sarr)) and + (sarr[lowstart]='') do + inc(lowstart); + tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart])); + for i:=lowstart+1 to high(sarr) do begin - tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i])); - if (DestCP<>tmpCp) then + { ignore the code page of empty strings, it will always be + DefaultSystemCodePage but it doesn't matter for the outcome } + if (sarr[i]<>'') and + (tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then begin sameCP:=false; break; @@ -323,45 +337,52 @@ begin begin U:=''; for i:=lowstart to high(sarr) do - U:=U+UnicodeString(sarr[i]); + if sarr[i]<>'' then + U:=U+UnicodeString(sarr[i]); DestS:=''; widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(U)),DestS,DestCP,Length(U)); exit; end; - +{$ifdef FPC_HAS_CPSTRING} + { if the result is rawbytestring and all strings have the same code page, + keep that code page } + if cp=CP_NONE then + DestCP:=tmpCP; +{$endif FPC_HAS_CPSTRING} destcopy:=nil; - lowstart:=low(sarr); - if Pointer(DestS)=Pointer(sarr[lowstart]) then - inc(lowstart); + nonemptystart:=lowstart; { Check for another reuse, then we can't use the append optimization } - for i:=lowstart to high(sarr) do + if DestS<>'' then begin - if Pointer(DestS)=Pointer(sarr[i]) then + if Pointer(DestS)=Pointer(sarr[lowstart]) then + inc(lowstart); + for i:=lowstart to high(sarr) do begin - { if DestS is used somewhere in the middle of the expression, - we need to make sure the original string still exists after - we empty/modify DestS } - destcopy:=pointer(dests); - fpc_AnsiStr_Incr_Ref(destcopy); - lowstart:=low(sarr); - break; + if Pointer(DestS)=Pointer(sarr[i]) then + begin + { if DestS is used somewhere in the middle of the expression, + we need to make sure the original string still exists after + we empty/modify DestS } + destcopy:=pointer(dests); + fpc_AnsiStr_Incr_Ref(destcopy); + lowstart:=nonemptystart; + break; + end; end; end; { Start with empty DestS if we start with concatting - the first array element } - if lowstart=low(sarr) then + the first (non-empty) array element } + if lowstart=nonemptystart then DestS:=''; OldDestLen:=length(DestS); { Calculate size of the result so we can do a single call to SetLength() } NewLen:=0; - for i:=low(sarr) to high(sarr) do + for i:=nonemptystart to high(sarr) do inc(NewLen,length(sarr[i])); SetLength(DestS,NewLen); - if (StringCodePage(DestS) <> DestCP) then - SetCodePage(DestS,DestCP,False); { Concat all strings, except the string we already copied in DestS } pc:=Pointer(DestS)+OldDestLen; @@ -375,6 +396,8 @@ begin inc(pc,size); end; end; + SetCodePage(DestS,tmpCP,False); + SetCodePage(DestS,DestCP,True); fpc_AnsiStr_Decr_Ref(destcopy); end; {$endif FPC_HAS_ANSISTR_CONCAT_MULTI} diff --git a/rtl/java/jastrings.inc b/rtl/java/jastrings.inc index cb4cdf78ff..b4fe81edcf 100644 --- a/rtl/java/jastrings.inc +++ b/rtl/java/jastrings.inc @@ -334,15 +334,17 @@ Function fpc_AnsiStr_To_AnsiStr (const S : RawByteString;cp : TSystemCodePage): {$define FPC_HAS_ANSISTR_CONCAT_MULTI} procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc; Var - lowstart,i : Longint; + lowstart, + nonemptystart, + i : Longint; p : pointer; Size,NewLen, OldDestLen : SizeInt; destcopy : RawByteString; - DestCP : TSystemCodePage; U : UnicodeString; - sameCP : Boolean; + DestCP : TSystemCodePage; tmpCP : TSystemCodePage; + sameCP : Boolean; begin if high(sarr)=0 then begin @@ -350,20 +352,26 @@ begin exit; end; {$ifdef FPC_HAS_CPSTRING} - if (Pointer(DestS)=nil) then - DestCP:=cp - else - DestCP:=StringCodePage(DestS); + DestCP:=cp; + if DestCp=CP_NONE then + DestCP:=DefaultSystemCodePage; {$else FPC_HAS_CPSTRING} DestCP:=StringCodePage(DestS); {$endif FPC_HAS_CPSTRING} DestCP:=TranslatePlaceholderCP(DestCP); sameCP:=true; lowstart:=low(sarr); - for i:=lowstart to high(sarr) do + { skip empty strings } + while (lowstart<=high(sarr)) and + (sarr[lowstart]='') do + inc(lowstart); + tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart])); + for i:=lowstart+1 to high(sarr) do begin - tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i])); - if (DestCP<>tmpCp) then + { ignore the code page of empty strings, it will always be + DefaultSystemCodePage but it doesn't matter for the outcome } + if (sarr[i]<>'') and + (tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then begin sameCP:=false; break; @@ -373,33 +381,43 @@ begin begin U:=''; for i:=lowstart to high(sarr) do - U:=U+UnicodeString(sarr[i]); + if sarr[i]<>'' then + U:=U+UnicodeString(sarr[i]); DestS:=''; widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(JLString(U).toCharArray),DestS,DestCP,Length(U)); exit; end; + {$ifdef FPC_HAS_CPSTRING} + { if the result is rawbytestring and all strings have the same code page, + keep that code page } + if cp=CP_NONE then + DestCP:=tmpCP; + {$endif FPC_HAS_CPSTRING} - lowstart:=low(sarr); - if Pointer(DestS)=Pointer(sarr[lowstart]) then - inc(lowstart); + nonemptystart:=lowstart; { Check for another reuse, then we can't use the append optimization } - for i:=lowstart to high(sarr) do + if DestS<>'' then begin - if Pointer(DestS)=Pointer(sarr[i]) then + if Pointer(DestS)=Pointer(sarr[lowstart]) then + inc(lowstart); + for i:=lowstart to high(sarr) do begin - { if DestS is used somewhere in the middle of the expression, - we need to make sure the original string still exists after - we empty/modify DestS -- not necessary on JVM platform, ansistrings - are not explicitly refrence counted there } - lowstart:=low(sarr); - break; + if Pointer(DestS)=Pointer(sarr[i]) then + begin + { if DestS is used somewhere in the middle of the expression, + we need to make sure the original string still exists after + we empty/modify DestS -- not necessary on JVM platform, ansistrings + are not explicitly refrence counted there } + lowstart:=nonemptystart; + break; + end; end; end; { Start with empty DestS if we start with concatting - the first array element } - if lowstart=low(sarr) then + the first (non-empty) array element } + if lowstart=nonemptystart then DestS:=''; OldDestLen:=length(DestS); { Calculate size of the result so we can do @@ -408,8 +426,6 @@ begin for i:=low(sarr) to high(sarr) do inc(NewLen,length(sarr[i])); SetLength(DestS,NewLen); - if (StringCodePage(DestS) <> DestCP) then - SetCodePage(DestS,DestCP,False); { Concat all strings, except the string we already copied in DestS } NewLen:=OldDestLen; @@ -423,6 +439,11 @@ begin inc(NewLen,size); end; end; + if NewLen<>0 then + begin + SetCodePage(DestS,tmpCP,False); + SetCodePage(DestS,DestCP,True); + end; end; diff --git a/tests/test/tcpstrconcat4.pp b/tests/test/tcpstrconcat4.pp new file mode 100644 index 0000000000..340e4b5eba --- /dev/null +++ b/tests/test/tcpstrconcat4.pp @@ -0,0 +1,49 @@ +{$mode delphiunicode} + +type + tstr850 = type ansistring(850); + tstr866 = type ansistring(866); + tstr65001 = type ansistring(65001); + +procedure test; +var + s1: tstr850; + s2: tstr866; + s3: tstr65001; + r: rawbytestring; +begin + s1:='a'; + s2:='b'; + s3:='c'; + r:='d'; + r:=s1+s2; + writeln(stringcodepage(r)); + if (stringcodepage(r)<>0) and + (stringcodepage(r)<>defaultsystemcodepage) then + halt(1); + setcodepage(r,850); + r:=s1+s2; + writeln(stringcodepage(r)); + if (stringcodepage(r)<>0) and + (stringcodepage(r)<>defaultsystemcodepage) then + halt(2); + setcodepage(r,CP_ASCII); + r:=s1+s2; + writeln(stringcodepage(r)); + if (stringcodepage(r)<>0) and + (stringcodepage(r)<>defaultsystemcodepage) then + halt(3); + r:=s1+s1; + writeln(stringcodepage(r)); + if (stringcodepage(r)<>stringcodepage(s1)) then + halt(4); + r:=s2+s2; + writeln(stringcodepage(r)); + if (stringcodepage(r)<>stringcodepage(s2)) then + halt(5); +end; + +begin + test; +end. +