* when concatenating ansistrings, do not map CP_NONE (rawbytestring) to

CP_ACP (defaultsystemcodepage), because if all input strings have the
    same code page then the result should also have that code page if it's
    assigned to a rawbytestring rather than getting defaultsystemcodepage
  * do not consider empty strings to determine the code page of the result
    in fpc_AnsiStr_Concat_multi(), because that will cause a different
    result than when using a sequence of fpc_AnsiStr_Concat() calls (it
    ignores empty strings to determine the result code page) and it's also
    slower
  * do not consider the run time code page of the destination string in
    fpc_AnsiStr_Concat(_multi)() because Delphi does not do so either. This
    was introduced in r19118, probably to hide another bug
  + test

git-svn-id: branches/cpstrrtl@25143 -
This commit is contained in:
Jonas Maebe 2013-07-19 16:33:14 +00:00
parent 9b9252e507
commit 1a560e9875
7 changed files with 170 additions and 64 deletions

1
.gitattributes vendored
View File

@ -11011,6 +11011,7 @@ tests/test/tcpstrchar2ansistr.pp svneol=native#text/plain
tests/test/tcpstrconcat.pp svneol=native#text/plain
tests/test/tcpstrconcat2.pp svneol=native#text/plain
tests/test/tcpstrconcat3.pp svneol=native#text/plain
tests/test/tcpstrconcat4.pp svneol=native#text/plain
tests/test/tcpstrconcatmulti.pp svneol=native#text/plain
tests/test/tcpstrconcatmulti2.pp svneol=native#text/plain
tests/test/tcpstrpchar2ansistr.pp svneol=native#text/plain

View File

@ -2170,7 +2170,9 @@ implementation
if is_ansistring(resultdef) then
para:=ccallparanode.create(
cordconstnode.create(
getparaencoding(resultdef),
{ don't use getparaencoding(), we have to know
when the result is rawbytestring }
tstringdef(resultdef).encoding,
u16inttype,
true
),
@ -2208,7 +2210,9 @@ implementation
if is_ansistring(resultdef) then
para:=ccallparanode.create(
cordconstnode.create(
getparaencoding(resultdef),
{ don't use getparaencoding(), we have to know
when the result is rawbytestring }
tstringdef(resultdef).encoding,
u16inttype,
true
),

View File

@ -349,7 +349,9 @@ begin
if is_ansistring(p.resultdef) then
para:=ccallparanode.create(
cordconstnode.create(
getparaencoding(p.resultdef),
{ don't use getparaencoding(), we have to know
when the result is rawbytestring }
tstringdef(p.resultdef).encoding,
u16inttype,
true
),
@ -383,7 +385,9 @@ begin
if is_ansistring(p.resultdef) then
para:=ccallparanode.create(
cordconstnode.create(
getparaencoding(p.resultdef),
{ don't use getparaencoding(), we have to know
when the result is rawbytestring }
tstringdef(p.resultdef).encoding,
u16inttype,
true
),

View File

@ -1125,7 +1125,11 @@ implementation
function getparaencoding(def:tdef):tstringencoding; inline;
begin
{ don't pass CP_NONE encoding to internal functions
they expect 0 encoding instead }
they expect 0 encoding instead
exception: result of string concatenation, because if you pass the
result of a string concatenation to a rawbytestring, the result of
that concatenation shouldn't be converted to defaultsystemcodepage
if all strings have the same type }
result:=tstringdef(def).encoding;
if result=CP_NONE then
result:=0

View File

@ -214,10 +214,9 @@ Var
S1CP, S2CP, DestCP: TSystemCodePage;
begin
{$ifdef FPC_HAS_CPSTRING}
if (Pointer(DestS)=nil) then
DestCP:=cp
else
DestCP:=StringCodePage(DestS);
DestCP:=cp;
if DestCp=CP_NONE then
DestCP:=DefaultSystemCodePage;
{$else FPC_HAS_CPSTRING}
DestCP:=StringCodePage(DestS);
{$endif FPC_HAS_CPSTRING}
@ -235,6 +234,13 @@ begin
else
S2CP:=StringCodePage(S2);
S2CP:=TranslatePlaceholderCP(S2CP);
{$ifdef FPC_HAS_CPSTRING}
{ if the result is rawbytestring and both strings have the same code page,
keep that code page }
if (cp=CP_NONE) and
(S1CP=S2CP) then
DestCP:=S1CP;
{$endif FPC_HAS_CPSTRING}
if (S1CP<>DestCP) or (S2CP<>DestCP) then
begin
ansistr_concat_complex(DestS,S1,S2,DestCP);
@ -284,15 +290,17 @@ end;
{$define FPC_HAS_ANSISTR_CONCAT_MULTI}
procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
Var
lowstart,i : Longint;
lowstart,
nonemptystart,
i : Longint;
p,pc : pointer;
Size,NewLen,
OldDestLen : SizeInt;
destcopy : pointer;
DestCP : TSystemCodePage;
U : UnicodeString;
sameCP : Boolean;
DestCP,
tmpCP : TSystemCodePage;
sameCP : Boolean;
begin
if high(sarr)=0 then
begin
@ -300,20 +308,26 @@ begin
exit;
end;
{$ifdef FPC_HAS_CPSTRING}
if (Pointer(DestS)=nil) then
DestCP:=cp
else
DestCP:=StringCodePage(DestS);
DestCP:=cp;
if DestCp=CP_NONE then
DestCP:=DefaultSystemCodePage;
{$else FPC_HAS_CPSTRING}
DestCP:=StringCodePage(DestS);
{$endif FPC_HAS_CPSTRING}
DestCP:=TranslatePlaceholderCP(DestCP);
sameCP:=true;
lowstart:=low(sarr);
for i:=lowstart to high(sarr) do
{ skip empty strings }
while (lowstart<=high(sarr)) and
(sarr[lowstart]='') do
inc(lowstart);
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
for i:=lowstart+1 to high(sarr) do
begin
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
if (DestCP<>tmpCp) then
{ ignore the code page of empty strings, it will always be
DefaultSystemCodePage but it doesn't matter for the outcome }
if (sarr[i]<>'') and
(tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
begin
sameCP:=false;
break;
@ -323,45 +337,52 @@ begin
begin
U:='';
for i:=lowstart to high(sarr) do
U:=U+UnicodeString(sarr[i]);
if sarr[i]<>'' then
U:=U+UnicodeString(sarr[i]);
DestS:='';
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(U)),DestS,DestCP,Length(U));
exit;
end;
{$ifdef FPC_HAS_CPSTRING}
{ if the result is rawbytestring and all strings have the same code page,
keep that code page }
if cp=CP_NONE then
DestCP:=tmpCP;
{$endif FPC_HAS_CPSTRING}
destcopy:=nil;
lowstart:=low(sarr);
if Pointer(DestS)=Pointer(sarr[lowstart]) then
inc(lowstart);
nonemptystart:=lowstart;
{ Check for another reuse, then we can't use
the append optimization }
for i:=lowstart to high(sarr) do
if DestS<>'' then
begin
if Pointer(DestS)=Pointer(sarr[i]) then
if Pointer(DestS)=Pointer(sarr[lowstart]) then
inc(lowstart);
for i:=lowstart to high(sarr) do
begin
{ if DestS is used somewhere in the middle of the expression,
we need to make sure the original string still exists after
we empty/modify DestS }
destcopy:=pointer(dests);
fpc_AnsiStr_Incr_Ref(destcopy);
lowstart:=low(sarr);
break;
if Pointer(DestS)=Pointer(sarr[i]) then
begin
{ if DestS is used somewhere in the middle of the expression,
we need to make sure the original string still exists after
we empty/modify DestS }
destcopy:=pointer(dests);
fpc_AnsiStr_Incr_Ref(destcopy);
lowstart:=nonemptystart;
break;
end;
end;
end;
{ Start with empty DestS if we start with concatting
the first array element }
if lowstart=low(sarr) then
the first (non-empty) array element }
if lowstart=nonemptystart then
DestS:='';
OldDestLen:=length(DestS);
{ Calculate size of the result so we can do
a single call to SetLength() }
NewLen:=0;
for i:=low(sarr) to high(sarr) do
for i:=nonemptystart to high(sarr) do
inc(NewLen,length(sarr[i]));
SetLength(DestS,NewLen);
if (StringCodePage(DestS) <> DestCP) then
SetCodePage(DestS,DestCP,False);
{ Concat all strings, except the string we already
copied in DestS }
pc:=Pointer(DestS)+OldDestLen;
@ -375,6 +396,8 @@ begin
inc(pc,size);
end;
end;
SetCodePage(DestS,tmpCP,False);
SetCodePage(DestS,DestCP,True);
fpc_AnsiStr_Decr_Ref(destcopy);
end;
{$endif FPC_HAS_ANSISTR_CONCAT_MULTI}

View File

@ -334,15 +334,17 @@ Function fpc_AnsiStr_To_AnsiStr (const S : RawByteString;cp : TSystemCodePage):
{$define FPC_HAS_ANSISTR_CONCAT_MULTI}
procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
Var
lowstart,i : Longint;
lowstart,
nonemptystart,
i : Longint;
p : pointer;
Size,NewLen,
OldDestLen : SizeInt;
destcopy : RawByteString;
DestCP : TSystemCodePage;
U : UnicodeString;
sameCP : Boolean;
DestCP : TSystemCodePage;
tmpCP : TSystemCodePage;
sameCP : Boolean;
begin
if high(sarr)=0 then
begin
@ -350,20 +352,26 @@ begin
exit;
end;
{$ifdef FPC_HAS_CPSTRING}
if (Pointer(DestS)=nil) then
DestCP:=cp
else
DestCP:=StringCodePage(DestS);
DestCP:=cp;
if DestCp=CP_NONE then
DestCP:=DefaultSystemCodePage;
{$else FPC_HAS_CPSTRING}
DestCP:=StringCodePage(DestS);
{$endif FPC_HAS_CPSTRING}
DestCP:=TranslatePlaceholderCP(DestCP);
sameCP:=true;
lowstart:=low(sarr);
for i:=lowstart to high(sarr) do
{ skip empty strings }
while (lowstart<=high(sarr)) and
(sarr[lowstart]='') do
inc(lowstart);
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
for i:=lowstart+1 to high(sarr) do
begin
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
if (DestCP<>tmpCp) then
{ ignore the code page of empty strings, it will always be
DefaultSystemCodePage but it doesn't matter for the outcome }
if (sarr[i]<>'') and
(tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
begin
sameCP:=false;
break;
@ -373,33 +381,43 @@ begin
begin
U:='';
for i:=lowstart to high(sarr) do
U:=U+UnicodeString(sarr[i]);
if sarr[i]<>'' then
U:=U+UnicodeString(sarr[i]);
DestS:='';
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(JLString(U).toCharArray),DestS,DestCP,Length(U));
exit;
end;
{$ifdef FPC_HAS_CPSTRING}
{ if the result is rawbytestring and all strings have the same code page,
keep that code page }
if cp=CP_NONE then
DestCP:=tmpCP;
{$endif FPC_HAS_CPSTRING}
lowstart:=low(sarr);
if Pointer(DestS)=Pointer(sarr[lowstart]) then
inc(lowstart);
nonemptystart:=lowstart;
{ Check for another reuse, then we can't use
the append optimization }
for i:=lowstart to high(sarr) do
if DestS<>'' then
begin
if Pointer(DestS)=Pointer(sarr[i]) then
if Pointer(DestS)=Pointer(sarr[lowstart]) then
inc(lowstart);
for i:=lowstart to high(sarr) do
begin
{ if DestS is used somewhere in the middle of the expression,
we need to make sure the original string still exists after
we empty/modify DestS -- not necessary on JVM platform, ansistrings
are not explicitly refrence counted there }
lowstart:=low(sarr);
break;
if Pointer(DestS)=Pointer(sarr[i]) then
begin
{ if DestS is used somewhere in the middle of the expression,
we need to make sure the original string still exists after
we empty/modify DestS -- not necessary on JVM platform, ansistrings
are not explicitly refrence counted there }
lowstart:=nonemptystart;
break;
end;
end;
end;
{ Start with empty DestS if we start with concatting
the first array element }
if lowstart=low(sarr) then
the first (non-empty) array element }
if lowstart=nonemptystart then
DestS:='';
OldDestLen:=length(DestS);
{ Calculate size of the result so we can do
@ -408,8 +426,6 @@ begin
for i:=low(sarr) to high(sarr) do
inc(NewLen,length(sarr[i]));
SetLength(DestS,NewLen);
if (StringCodePage(DestS) <> DestCP) then
SetCodePage(DestS,DestCP,False);
{ Concat all strings, except the string we already
copied in DestS }
NewLen:=OldDestLen;
@ -423,6 +439,11 @@ begin
inc(NewLen,size);
end;
end;
if NewLen<>0 then
begin
SetCodePage(DestS,tmpCP,False);
SetCodePage(DestS,DestCP,True);
end;
end;

View File

@ -0,0 +1,49 @@
{$mode delphiunicode}
type
tstr850 = type ansistring(850);
tstr866 = type ansistring(866);
tstr65001 = type ansistring(65001);
procedure test;
var
s1: tstr850;
s2: tstr866;
s3: tstr65001;
r: rawbytestring;
begin
s1:='a';
s2:='b';
s3:='c';
r:='d';
r:=s1+s2;
writeln(stringcodepage(r));
if (stringcodepage(r)<>0) and
(stringcodepage(r)<>defaultsystemcodepage) then
halt(1);
setcodepage(r,850);
r:=s1+s2;
writeln(stringcodepage(r));
if (stringcodepage(r)<>0) and
(stringcodepage(r)<>defaultsystemcodepage) then
halt(2);
setcodepage(r,CP_ASCII);
r:=s1+s2;
writeln(stringcodepage(r));
if (stringcodepage(r)<>0) and
(stringcodepage(r)<>defaultsystemcodepage) then
halt(3);
r:=s1+s1;
writeln(stringcodepage(r));
if (stringcodepage(r)<>stringcodepage(s1)) then
halt(4);
r:=s2+s2;
writeln(stringcodepage(r));
if (stringcodepage(r)<>stringcodepage(s2)) then
halt(5);
end;
begin
test;
end.