mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-02 17:50:16 +02:00
* when concatenating ansistrings, do not map CP_NONE (rawbytestring) to
CP_ACP (defaultsystemcodepage), because if all input strings have the same code page then the result should also have that code page if it's assigned to a rawbytestring rather than getting defaultsystemcodepage * do not consider empty strings to determine the code page of the result in fpc_AnsiStr_Concat_multi(), because that will cause a different result than when using a sequence of fpc_AnsiStr_Concat() calls (it ignores empty strings to determine the result code page) and it's also slower * do not consider the run time code page of the destination string in fpc_AnsiStr_Concat(_multi)() because Delphi does not do so either. This was introduced in r19118, probably to hide another bug + test git-svn-id: branches/cpstrrtl@25143 -
This commit is contained in:
parent
9b9252e507
commit
1a560e9875
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -11011,6 +11011,7 @@ tests/test/tcpstrchar2ansistr.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcat.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcat2.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcat3.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcat4.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcatmulti.pp svneol=native#text/plain
|
||||
tests/test/tcpstrconcatmulti2.pp svneol=native#text/plain
|
||||
tests/test/tcpstrpchar2ansistr.pp svneol=native#text/plain
|
||||
|
@ -2170,7 +2170,9 @@ implementation
|
||||
if is_ansistring(resultdef) then
|
||||
para:=ccallparanode.create(
|
||||
cordconstnode.create(
|
||||
getparaencoding(resultdef),
|
||||
{ don't use getparaencoding(), we have to know
|
||||
when the result is rawbytestring }
|
||||
tstringdef(resultdef).encoding,
|
||||
u16inttype,
|
||||
true
|
||||
),
|
||||
@ -2208,7 +2210,9 @@ implementation
|
||||
if is_ansistring(resultdef) then
|
||||
para:=ccallparanode.create(
|
||||
cordconstnode.create(
|
||||
getparaencoding(resultdef),
|
||||
{ don't use getparaencoding(), we have to know
|
||||
when the result is rawbytestring }
|
||||
tstringdef(resultdef).encoding,
|
||||
u16inttype,
|
||||
true
|
||||
),
|
||||
|
@ -349,7 +349,9 @@ begin
|
||||
if is_ansistring(p.resultdef) then
|
||||
para:=ccallparanode.create(
|
||||
cordconstnode.create(
|
||||
getparaencoding(p.resultdef),
|
||||
{ don't use getparaencoding(), we have to know
|
||||
when the result is rawbytestring }
|
||||
tstringdef(p.resultdef).encoding,
|
||||
u16inttype,
|
||||
true
|
||||
),
|
||||
@ -383,7 +385,9 @@ begin
|
||||
if is_ansistring(p.resultdef) then
|
||||
para:=ccallparanode.create(
|
||||
cordconstnode.create(
|
||||
getparaencoding(p.resultdef),
|
||||
{ don't use getparaencoding(), we have to know
|
||||
when the result is rawbytestring }
|
||||
tstringdef(p.resultdef).encoding,
|
||||
u16inttype,
|
||||
true
|
||||
),
|
||||
|
@ -1125,7 +1125,11 @@ implementation
|
||||
function getparaencoding(def:tdef):tstringencoding; inline;
|
||||
begin
|
||||
{ don't pass CP_NONE encoding to internal functions
|
||||
they expect 0 encoding instead }
|
||||
they expect 0 encoding instead
|
||||
exception: result of string concatenation, because if you pass the
|
||||
result of a string concatenation to a rawbytestring, the result of
|
||||
that concatenation shouldn't be converted to defaultsystemcodepage
|
||||
if all strings have the same type }
|
||||
result:=tstringdef(def).encoding;
|
||||
if result=CP_NONE then
|
||||
result:=0
|
||||
|
@ -214,10 +214,9 @@ Var
|
||||
S1CP, S2CP, DestCP: TSystemCodePage;
|
||||
begin
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
if (Pointer(DestS)=nil) then
|
||||
DestCP:=cp
|
||||
else
|
||||
DestCP:=StringCodePage(DestS);
|
||||
DestCP:=cp;
|
||||
if DestCp=CP_NONE then
|
||||
DestCP:=DefaultSystemCodePage;
|
||||
{$else FPC_HAS_CPSTRING}
|
||||
DestCP:=StringCodePage(DestS);
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
@ -235,6 +234,13 @@ begin
|
||||
else
|
||||
S2CP:=StringCodePage(S2);
|
||||
S2CP:=TranslatePlaceholderCP(S2CP);
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
{ if the result is rawbytestring and both strings have the same code page,
|
||||
keep that code page }
|
||||
if (cp=CP_NONE) and
|
||||
(S1CP=S2CP) then
|
||||
DestCP:=S1CP;
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
if (S1CP<>DestCP) or (S2CP<>DestCP) then
|
||||
begin
|
||||
ansistr_concat_complex(DestS,S1,S2,DestCP);
|
||||
@ -284,15 +290,17 @@ end;
|
||||
{$define FPC_HAS_ANSISTR_CONCAT_MULTI}
|
||||
procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
|
||||
Var
|
||||
lowstart,i : Longint;
|
||||
lowstart,
|
||||
nonemptystart,
|
||||
i : Longint;
|
||||
p,pc : pointer;
|
||||
Size,NewLen,
|
||||
OldDestLen : SizeInt;
|
||||
destcopy : pointer;
|
||||
DestCP : TSystemCodePage;
|
||||
U : UnicodeString;
|
||||
sameCP : Boolean;
|
||||
DestCP,
|
||||
tmpCP : TSystemCodePage;
|
||||
sameCP : Boolean;
|
||||
begin
|
||||
if high(sarr)=0 then
|
||||
begin
|
||||
@ -300,20 +308,26 @@ begin
|
||||
exit;
|
||||
end;
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
if (Pointer(DestS)=nil) then
|
||||
DestCP:=cp
|
||||
else
|
||||
DestCP:=StringCodePage(DestS);
|
||||
DestCP:=cp;
|
||||
if DestCp=CP_NONE then
|
||||
DestCP:=DefaultSystemCodePage;
|
||||
{$else FPC_HAS_CPSTRING}
|
||||
DestCP:=StringCodePage(DestS);
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
DestCP:=TranslatePlaceholderCP(DestCP);
|
||||
sameCP:=true;
|
||||
lowstart:=low(sarr);
|
||||
for i:=lowstart to high(sarr) do
|
||||
{ skip empty strings }
|
||||
while (lowstart<=high(sarr)) and
|
||||
(sarr[lowstart]='') do
|
||||
inc(lowstart);
|
||||
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
|
||||
for i:=lowstart+1 to high(sarr) do
|
||||
begin
|
||||
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
|
||||
if (DestCP<>tmpCp) then
|
||||
{ ignore the code page of empty strings, it will always be
|
||||
DefaultSystemCodePage but it doesn't matter for the outcome }
|
||||
if (sarr[i]<>'') and
|
||||
(tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
|
||||
begin
|
||||
sameCP:=false;
|
||||
break;
|
||||
@ -323,45 +337,52 @@ begin
|
||||
begin
|
||||
U:='';
|
||||
for i:=lowstart to high(sarr) do
|
||||
U:=U+UnicodeString(sarr[i]);
|
||||
if sarr[i]<>'' then
|
||||
U:=U+UnicodeString(sarr[i]);
|
||||
|
||||
DestS:='';
|
||||
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(Pointer(U)),DestS,DestCP,Length(U));
|
||||
exit;
|
||||
end;
|
||||
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
{ if the result is rawbytestring and all strings have the same code page,
|
||||
keep that code page }
|
||||
if cp=CP_NONE then
|
||||
DestCP:=tmpCP;
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
destcopy:=nil;
|
||||
lowstart:=low(sarr);
|
||||
if Pointer(DestS)=Pointer(sarr[lowstart]) then
|
||||
inc(lowstart);
|
||||
nonemptystart:=lowstart;
|
||||
{ Check for another reuse, then we can't use
|
||||
the append optimization }
|
||||
for i:=lowstart to high(sarr) do
|
||||
if DestS<>'' then
|
||||
begin
|
||||
if Pointer(DestS)=Pointer(sarr[i]) then
|
||||
if Pointer(DestS)=Pointer(sarr[lowstart]) then
|
||||
inc(lowstart);
|
||||
for i:=lowstart to high(sarr) do
|
||||
begin
|
||||
{ if DestS is used somewhere in the middle of the expression,
|
||||
we need to make sure the original string still exists after
|
||||
we empty/modify DestS }
|
||||
destcopy:=pointer(dests);
|
||||
fpc_AnsiStr_Incr_Ref(destcopy);
|
||||
lowstart:=low(sarr);
|
||||
break;
|
||||
if Pointer(DestS)=Pointer(sarr[i]) then
|
||||
begin
|
||||
{ if DestS is used somewhere in the middle of the expression,
|
||||
we need to make sure the original string still exists after
|
||||
we empty/modify DestS }
|
||||
destcopy:=pointer(dests);
|
||||
fpc_AnsiStr_Incr_Ref(destcopy);
|
||||
lowstart:=nonemptystart;
|
||||
break;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
{ Start with empty DestS if we start with concatting
|
||||
the first array element }
|
||||
if lowstart=low(sarr) then
|
||||
the first (non-empty) array element }
|
||||
if lowstart=nonemptystart then
|
||||
DestS:='';
|
||||
OldDestLen:=length(DestS);
|
||||
{ Calculate size of the result so we can do
|
||||
a single call to SetLength() }
|
||||
NewLen:=0;
|
||||
for i:=low(sarr) to high(sarr) do
|
||||
for i:=nonemptystart to high(sarr) do
|
||||
inc(NewLen,length(sarr[i]));
|
||||
SetLength(DestS,NewLen);
|
||||
if (StringCodePage(DestS) <> DestCP) then
|
||||
SetCodePage(DestS,DestCP,False);
|
||||
{ Concat all strings, except the string we already
|
||||
copied in DestS }
|
||||
pc:=Pointer(DestS)+OldDestLen;
|
||||
@ -375,6 +396,8 @@ begin
|
||||
inc(pc,size);
|
||||
end;
|
||||
end;
|
||||
SetCodePage(DestS,tmpCP,False);
|
||||
SetCodePage(DestS,DestCP,True);
|
||||
fpc_AnsiStr_Decr_Ref(destcopy);
|
||||
end;
|
||||
{$endif FPC_HAS_ANSISTR_CONCAT_MULTI}
|
||||
|
@ -334,15 +334,17 @@ Function fpc_AnsiStr_To_AnsiStr (const S : RawByteString;cp : TSystemCodePage):
|
||||
{$define FPC_HAS_ANSISTR_CONCAT_MULTI}
|
||||
procedure fpc_AnsiStr_Concat_multi (var DestS:RawByteString;const sarr:array of RawByteString{$ifdef FPC_HAS_CPSTRING};cp : TSystemCodePage{$endif FPC_HAS_CPSTRING}); compilerproc;
|
||||
Var
|
||||
lowstart,i : Longint;
|
||||
lowstart,
|
||||
nonemptystart,
|
||||
i : Longint;
|
||||
p : pointer;
|
||||
Size,NewLen,
|
||||
OldDestLen : SizeInt;
|
||||
destcopy : RawByteString;
|
||||
DestCP : TSystemCodePage;
|
||||
U : UnicodeString;
|
||||
sameCP : Boolean;
|
||||
DestCP : TSystemCodePage;
|
||||
tmpCP : TSystemCodePage;
|
||||
sameCP : Boolean;
|
||||
begin
|
||||
if high(sarr)=0 then
|
||||
begin
|
||||
@ -350,20 +352,26 @@ begin
|
||||
exit;
|
||||
end;
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
if (Pointer(DestS)=nil) then
|
||||
DestCP:=cp
|
||||
else
|
||||
DestCP:=StringCodePage(DestS);
|
||||
DestCP:=cp;
|
||||
if DestCp=CP_NONE then
|
||||
DestCP:=DefaultSystemCodePage;
|
||||
{$else FPC_HAS_CPSTRING}
|
||||
DestCP:=StringCodePage(DestS);
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
DestCP:=TranslatePlaceholderCP(DestCP);
|
||||
sameCP:=true;
|
||||
lowstart:=low(sarr);
|
||||
for i:=lowstart to high(sarr) do
|
||||
{ skip empty strings }
|
||||
while (lowstart<=high(sarr)) and
|
||||
(sarr[lowstart]='') do
|
||||
inc(lowstart);
|
||||
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[lowstart]));
|
||||
for i:=lowstart+1 to high(sarr) do
|
||||
begin
|
||||
tmpCP:=TranslatePlaceholderCP(StringCodePage(sarr[i]));
|
||||
if (DestCP<>tmpCp) then
|
||||
{ ignore the code page of empty strings, it will always be
|
||||
DefaultSystemCodePage but it doesn't matter for the outcome }
|
||||
if (sarr[i]<>'') and
|
||||
(tmpCP<>TranslatePlaceholderCP(StringCodePage(sarr[i]))) then
|
||||
begin
|
||||
sameCP:=false;
|
||||
break;
|
||||
@ -373,33 +381,43 @@ begin
|
||||
begin
|
||||
U:='';
|
||||
for i:=lowstart to high(sarr) do
|
||||
U:=U+UnicodeString(sarr[i]);
|
||||
if sarr[i]<>'' then
|
||||
U:=U+UnicodeString(sarr[i]);
|
||||
|
||||
DestS:='';
|
||||
widestringmanager.Unicode2AnsiMoveProc(PUnicodeChar(JLString(U).toCharArray),DestS,DestCP,Length(U));
|
||||
exit;
|
||||
end;
|
||||
{$ifdef FPC_HAS_CPSTRING}
|
||||
{ if the result is rawbytestring and all strings have the same code page,
|
||||
keep that code page }
|
||||
if cp=CP_NONE then
|
||||
DestCP:=tmpCP;
|
||||
{$endif FPC_HAS_CPSTRING}
|
||||
|
||||
lowstart:=low(sarr);
|
||||
if Pointer(DestS)=Pointer(sarr[lowstart]) then
|
||||
inc(lowstart);
|
||||
nonemptystart:=lowstart;
|
||||
{ Check for another reuse, then we can't use
|
||||
the append optimization }
|
||||
for i:=lowstart to high(sarr) do
|
||||
if DestS<>'' then
|
||||
begin
|
||||
if Pointer(DestS)=Pointer(sarr[i]) then
|
||||
if Pointer(DestS)=Pointer(sarr[lowstart]) then
|
||||
inc(lowstart);
|
||||
for i:=lowstart to high(sarr) do
|
||||
begin
|
||||
{ if DestS is used somewhere in the middle of the expression,
|
||||
we need to make sure the original string still exists after
|
||||
we empty/modify DestS -- not necessary on JVM platform, ansistrings
|
||||
are not explicitly refrence counted there }
|
||||
lowstart:=low(sarr);
|
||||
break;
|
||||
if Pointer(DestS)=Pointer(sarr[i]) then
|
||||
begin
|
||||
{ if DestS is used somewhere in the middle of the expression,
|
||||
we need to make sure the original string still exists after
|
||||
we empty/modify DestS -- not necessary on JVM platform, ansistrings
|
||||
are not explicitly refrence counted there }
|
||||
lowstart:=nonemptystart;
|
||||
break;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
{ Start with empty DestS if we start with concatting
|
||||
the first array element }
|
||||
if lowstart=low(sarr) then
|
||||
the first (non-empty) array element }
|
||||
if lowstart=nonemptystart then
|
||||
DestS:='';
|
||||
OldDestLen:=length(DestS);
|
||||
{ Calculate size of the result so we can do
|
||||
@ -408,8 +426,6 @@ begin
|
||||
for i:=low(sarr) to high(sarr) do
|
||||
inc(NewLen,length(sarr[i]));
|
||||
SetLength(DestS,NewLen);
|
||||
if (StringCodePage(DestS) <> DestCP) then
|
||||
SetCodePage(DestS,DestCP,False);
|
||||
{ Concat all strings, except the string we already
|
||||
copied in DestS }
|
||||
NewLen:=OldDestLen;
|
||||
@ -423,6 +439,11 @@ begin
|
||||
inc(NewLen,size);
|
||||
end;
|
||||
end;
|
||||
if NewLen<>0 then
|
||||
begin
|
||||
SetCodePage(DestS,tmpCP,False);
|
||||
SetCodePage(DestS,DestCP,True);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
|
49
tests/test/tcpstrconcat4.pp
Normal file
49
tests/test/tcpstrconcat4.pp
Normal file
@ -0,0 +1,49 @@
|
||||
{$mode delphiunicode}
|
||||
|
||||
type
|
||||
tstr850 = type ansistring(850);
|
||||
tstr866 = type ansistring(866);
|
||||
tstr65001 = type ansistring(65001);
|
||||
|
||||
procedure test;
|
||||
var
|
||||
s1: tstr850;
|
||||
s2: tstr866;
|
||||
s3: tstr65001;
|
||||
r: rawbytestring;
|
||||
begin
|
||||
s1:='a';
|
||||
s2:='b';
|
||||
s3:='c';
|
||||
r:='d';
|
||||
r:=s1+s2;
|
||||
writeln(stringcodepage(r));
|
||||
if (stringcodepage(r)<>0) and
|
||||
(stringcodepage(r)<>defaultsystemcodepage) then
|
||||
halt(1);
|
||||
setcodepage(r,850);
|
||||
r:=s1+s2;
|
||||
writeln(stringcodepage(r));
|
||||
if (stringcodepage(r)<>0) and
|
||||
(stringcodepage(r)<>defaultsystemcodepage) then
|
||||
halt(2);
|
||||
setcodepage(r,CP_ASCII);
|
||||
r:=s1+s2;
|
||||
writeln(stringcodepage(r));
|
||||
if (stringcodepage(r)<>0) and
|
||||
(stringcodepage(r)<>defaultsystemcodepage) then
|
||||
halt(3);
|
||||
r:=s1+s1;
|
||||
writeln(stringcodepage(r));
|
||||
if (stringcodepage(r)<>stringcodepage(s1)) then
|
||||
halt(4);
|
||||
r:=s2+s2;
|
||||
writeln(stringcodepage(r));
|
||||
if (stringcodepage(r)<>stringcodepage(s2)) then
|
||||
halt(5);
|
||||
end;
|
||||
|
||||
begin
|
||||
test;
|
||||
end.
|
||||
|
Loading…
Reference in New Issue
Block a user