mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-13 09:59:25 +02:00
compiler: apply patches from Inoussa and Jonas:
defcmp: Address code paged' string type comparison taking care of the code page ncnv: Remove un-needed code page comparison to CP_UTF8, some fixes regarding shortstrings and wide char/string ncon: For the case of tstringconstnode.changestringtype (ncon.pas) where the code page are of CP_NONE or 0 no translation is done as : * CP_NONE is compatible to all * For 0 the raw bytes are just copied. My changes: - change ascii2unicode to allow pass source codepage, - convert in both cases when source or destination is UTF8 git-svn-id: trunk@19457 -
This commit is contained in:
parent
cf81d39682
commit
a99ffb3097
@ -335,20 +335,32 @@ implementation
|
||||
{ Constant string }
|
||||
if (fromtreetype=stringconstn) then
|
||||
begin
|
||||
if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) then
|
||||
if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
|
||||
((tstringdef(def_from).stringtype<>st_ansistring) or
|
||||
(tstringdef(def_from).encoding=tstringdef(def_to).encoding)
|
||||
) then
|
||||
eq:=te_equal
|
||||
else
|
||||
begin
|
||||
doconv:=tc_string_2_string;
|
||||
{ Don't prefer conversions from widestring to a
|
||||
normal string as we can lose information }
|
||||
if (tstringdef(def_from).stringtype in [st_widestring,st_unicodestring]) and
|
||||
not (tstringdef(def_to).stringtype in [st_widestring,st_unicodestring]) then
|
||||
eq:=te_convert_l3
|
||||
else if tstringdef(def_to).stringtype in [st_widestring,st_unicodestring] then
|
||||
eq:=te_convert_l2
|
||||
if (tstringdef(def_from).stringtype = st_ansistring) and
|
||||
(tstringdef(def_to).stringtype = st_ansistring) then
|
||||
if (tstringdef(def_to).encoding=globals.CP_UTF8) then
|
||||
eq:=te_convert_l1
|
||||
else
|
||||
eq:=te_convert_l2
|
||||
else
|
||||
eq:=te_convert_l1;
|
||||
begin
|
||||
{ Don't prefer conversions from widestring to a
|
||||
normal string as we can lose information }
|
||||
if (tstringdef(def_from).stringtype in [st_widestring,st_unicodestring]) and
|
||||
not (tstringdef(def_to).stringtype in [st_widestring,st_unicodestring]) then
|
||||
eq:=te_convert_l3
|
||||
else if tstringdef(def_to).stringtype in [st_widestring,st_unicodestring] then
|
||||
eq:=te_convert_l2
|
||||
else
|
||||
eq:=te_convert_l1;
|
||||
end;
|
||||
end;
|
||||
end
|
||||
else if (tstringdef(def_from).stringtype=tstringdef(def_to).stringtype) and
|
||||
|
@ -1644,10 +1644,30 @@ implementation
|
||||
end;
|
||||
st_ansistring :
|
||||
begin
|
||||
if not(is_ansistring(rd)) then
|
||||
inserttypeconv(right,cansistringtype);
|
||||
if not(is_ansistring(ld)) then
|
||||
inserttypeconv(left,cansistringtype);
|
||||
{ use same code page if possible (don't force same code
|
||||
page in case both are ansistrings with code page <>
|
||||
CP_NONE, since then data loss can occur (the ansistring
|
||||
helpers will convert them at run time to an encoding
|
||||
that can represent both encodings) }
|
||||
if is_ansistring(ld) and
|
||||
(tstringdef(ld).encoding<>0) and
|
||||
(tstringdef(ld).encoding<>globals.CP_NONE) and
|
||||
(not is_ansistring(rd) or
|
||||
(tstringdef(rd).encoding=0) or
|
||||
(tstringdef(rd).encoding=globals.CP_NONE)) then
|
||||
inserttypeconv(right,ld)
|
||||
else if is_ansistring(rd) and
|
||||
(tstringdef(rd).encoding<>0) and
|
||||
(tstringdef(rd).encoding<>globals.CP_NONE) and
|
||||
(not is_ansistring(ld) or
|
||||
(tstringdef(ld).encoding=0) or
|
||||
(tstringdef(ld).encoding=globals.CP_NONE)) then
|
||||
inserttypeconv(left,rd)
|
||||
else
|
||||
begin
|
||||
inserttypeconv(left,cansistringtype);
|
||||
inserttypeconv(right,cansistringtype);
|
||||
end;
|
||||
end;
|
||||
st_longstring :
|
||||
begin
|
||||
|
@ -1025,23 +1025,18 @@ implementation
|
||||
newblock : tblocknode;
|
||||
newstat : tstatementnode;
|
||||
restemp : ttempcreatenode;
|
||||
//sa : ansistring;
|
||||
//cw : WideChar;
|
||||
//l : SizeUInt;
|
||||
sa : ansistring;
|
||||
cw : WideChar;
|
||||
l : SizeUInt;
|
||||
begin
|
||||
result:=nil;
|
||||
{ we can't do widechar to ansichar conversions at compile time, since }
|
||||
{ this maps all non-ascii chars to '?' -> loses information }
|
||||
|
||||
if (left.nodetype=ordconstn) and
|
||||
((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
|
||||
((tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring,st_ansistring]) or
|
||||
(torddef(left.resultdef).ordtype=uchar) or
|
||||
((torddef(left.resultdef).ordtype=uwidechar) and
|
||||
(current_settings.sourcecodepage<>CP_UTF8)
|
||||
(tstringdef(resultdef).stringtype<>st_shortstring)
|
||||
)
|
||||
)
|
||||
{ widechar >=128 is destroyed }
|
||||
{(tordconstnode(left).value.uvalue<128))} then
|
||||
) then
|
||||
begin
|
||||
if (tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) then
|
||||
begin
|
||||
@ -1062,12 +1057,11 @@ implementation
|
||||
hp:=cstringconstnode.createstr(unicode2asciichar(tcompilerwidechar(tordconstnode(left).value.uvalue)))
|
||||
else
|
||||
begin
|
||||
exit;
|
||||
{Word(cw):=tcompilerwidechar(tordconstnode(left).value.uvalue);
|
||||
Word(cw):=tcompilerwidechar(tordconstnode(left).value.uvalue);
|
||||
SetLength(sa,5);
|
||||
l:=UnicodeToUtf8(@(sa[1]),Length(sa),@cw,1);
|
||||
SetLength(sa,l-1);
|
||||
hp:=cstringconstnode.createstr(sa);}
|
||||
hp:=cstringconstnode.createstr(sa);
|
||||
end
|
||||
end
|
||||
else
|
||||
@ -1077,6 +1071,18 @@ implementation
|
||||
result:=hp;
|
||||
end
|
||||
else
|
||||
if (tstringdef(resultdef).stringtype=st_shortstring) and
|
||||
(torddef(left.resultdef).ordtype=uwidechar) and
|
||||
(tcompilerwidechar(tordconstnode(left).value.uvalue) <= 127)
|
||||
then
|
||||
begin
|
||||
SetLength(sa,1);
|
||||
Byte(sa[1]):= tordconstnode(left).value.uvalue;
|
||||
hp:=cstringconstnode.createstr(sa);
|
||||
tstringconstnode(hp).changestringtype(resultdef);
|
||||
result:=hp;
|
||||
end
|
||||
else
|
||||
{ shortstrings are handled 'inline' (except for widechars) }
|
||||
if (tstringdef(resultdef).stringtype<>st_shortstring) or
|
||||
(torddef(left.resultdef).ordtype=uwidechar) then
|
||||
@ -1133,14 +1139,11 @@ implementation
|
||||
begin
|
||||
result:=nil;
|
||||
if (left.nodetype=stringconstn) and
|
||||
((tstringdef(resultdef).stringtype=st_shortstring) or
|
||||
((tstringdef(resultdef).stringtype=st_ansistring) and
|
||||
(((tstringdef(resultdef).stringtype=st_ansistring) and
|
||||
(tstringdef(resultdef).encoding<>CP_NONE)
|
||||
)
|
||||
) and
|
||||
((tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) and
|
||||
(current_settings.sourcecodepage<>CP_UTF8)
|
||||
) then
|
||||
(tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) then
|
||||
begin
|
||||
tstringconstnode(left).changestringtype(resultdef);
|
||||
Result:=left;
|
||||
@ -1163,7 +1166,18 @@ implementation
|
||||
resultdef
|
||||
);
|
||||
left:=nil;
|
||||
end;
|
||||
end
|
||||
else if (left.nodetype=stringconstn) and
|
||||
(tstringdef(left.resultdef).stringtype in [st_unicodestring,st_widestring]) and
|
||||
(tstringdef(resultdef).stringtype=st_shortstring) then
|
||||
begin
|
||||
if not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str)) then
|
||||
begin
|
||||
tstringconstnode(left).changestringtype(resultdef);
|
||||
Result:=left;
|
||||
left:=nil;
|
||||
end;
|
||||
end
|
||||
end;
|
||||
|
||||
function ttypeconvnode.typecheck_char_to_chararray : tnode;
|
||||
@ -1190,8 +1204,7 @@ implementation
|
||||
((torddef(resultdef).ordtype<>uchar) or
|
||||
(torddef(left.resultdef).ordtype<>uwidechar) or
|
||||
(current_settings.sourcecodepage<>CP_UTF8))
|
||||
{ >= 128 is replaced by '?' currently -> loses information }
|
||||
{(tordconstnode(left).value.uvalue<128))} then
|
||||
then
|
||||
begin
|
||||
if (torddef(resultdef).ordtype=uchar) and
|
||||
(torddef(left.resultdef).ordtype=uwidechar) and
|
||||
@ -2269,11 +2282,8 @@ implementation
|
||||
(
|
||||
((not is_widechararray(left.resultdef) and
|
||||
not is_wide_or_unicode_string(left.resultdef)) or
|
||||
(tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring]) or
|
||||
(current_settings.sourcecodepage<>CP_UTF8)
|
||||
(tstringdef(resultdef).stringtype in [st_widestring,st_unicodestring,st_ansistring])
|
||||
)
|
||||
{ non-ascii chars would be replaced with '?' -> loses info }
|
||||
{not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str)))}
|
||||
) then
|
||||
begin
|
||||
tstringconstnode(left).changestringtype(resultdef);
|
||||
|
@ -993,7 +993,7 @@ implementation
|
||||
not(cst_type in [cst_widestring,cst_unicodestring]) then
|
||||
begin
|
||||
initwidestring(pw);
|
||||
ascii2unicode(value_str,len,pw);
|
||||
ascii2unicode(value_str,len,current_settings.sourcecodepage,pw);
|
||||
ansistringdispose(value_str,len);
|
||||
pcompilerwidestring(value_str):=pw;
|
||||
end
|
||||
@ -1035,8 +1035,55 @@ implementation
|
||||
cp2:=tstringdef(resultdef).encoding
|
||||
else if (cst_type in [cst_shortstring,cst_conststring,cst_longstring]) then
|
||||
cp2:=current_settings.sourcecodepage;
|
||||
if cpavailable(cp1) and cpavailable(cp2) then
|
||||
changecodepage(value_str,len,cp2,value_str,cp1);
|
||||
{ don't change string if codepages are equal or string length is 0 }
|
||||
if (cp1<>cp2) and (len>0) then
|
||||
begin
|
||||
if cpavailable(cp1) and cpavailable(cp2) then
|
||||
changecodepage(value_str,len,cp2,value_str,cp1)
|
||||
else if (cp1 <> CP_NONE) and (cp2 <> CP_NONE) and (cp1 <> 0) and (cp2 <> 0) then
|
||||
begin
|
||||
{ if source encoding is UTF8 convert using UTF8->UTF16->destination encoding }
|
||||
if (cp2=CP_UTF8) then
|
||||
begin
|
||||
if not cpavailable(cp1) then
|
||||
Message1(option_code_page_not_available,IntToStr(cp1));
|
||||
initwidestring(pw);
|
||||
setlengthwidestring(pw,len);
|
||||
l:=Utf8ToUnicode(PUnicodeChar(pw^.data),len,value_str,len);
|
||||
if (l<>getlengthwidestring(pw)) then
|
||||
begin
|
||||
setlengthwidestring(pw,l);
|
||||
ReAllocMem(value_str,l);
|
||||
end;
|
||||
unicode2ascii(pw,value_str,cp1);
|
||||
donewidestring(pw);
|
||||
end
|
||||
else
|
||||
{ if destination encoding is UTF8 convert using source encoding->UTF16->UTF8 }
|
||||
if (cp1=CP_UTF8) then
|
||||
begin
|
||||
if not cpavailable(cp2) then
|
||||
Message1(option_code_page_not_available,IntToStr(cp2));
|
||||
initwidestring(pw);
|
||||
setlengthwidestring(pw,len);
|
||||
ascii2unicode(value_str,len,cp2,pw);
|
||||
l:=UnicodeToUtf8(nil,PUnicodeChar(pw^.data),0);
|
||||
if l<>len then
|
||||
ReAllocMem(value_str,l);
|
||||
len:=l-1;
|
||||
UnicodeToUtf8(value_str,PUnicodeChar(pw^.data),l);
|
||||
donewidestring(pw);
|
||||
end
|
||||
else
|
||||
begin
|
||||
{ output error message that encoding is not available for the compiler }
|
||||
if not cpavailable(cp1) then
|
||||
Message1(option_code_page_not_available,IntToStr(cp1));
|
||||
if not cpavailable(cp2) then
|
||||
Message1(option_code_page_not_available,IntToStr(cp2));
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
cst_type:=st2cst[tstringdef(def).stringtype];
|
||||
resultdef:=def;
|
||||
|
@ -4208,9 +4208,9 @@ In case not, the value returned can be arbitrary.
|
||||
if not iswidestring then
|
||||
begin
|
||||
if len>0 then
|
||||
ascii2unicode(@cstringpattern[1],len,patternw)
|
||||
ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
|
||||
else
|
||||
ascii2unicode(nil,len,patternw);
|
||||
ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
|
||||
iswidestring:=true;
|
||||
len:=0;
|
||||
end;
|
||||
@ -4252,9 +4252,9 @@ In case not, the value returned can be arbitrary.
|
||||
if not iswidestring then
|
||||
begin
|
||||
if len>0 then
|
||||
ascii2unicode(@cstringpattern[1],len,patternw)
|
||||
ascii2unicode(@cstringpattern[1],len,current_settings.sourcecodepage,patternw)
|
||||
else
|
||||
ascii2unicode(nil,len,patternw);
|
||||
ascii2unicode(nil,len,current_settings.sourcecodepage,patternw);
|
||||
iswidestring:=true;
|
||||
len:=0;
|
||||
end;
|
||||
|
@ -52,7 +52,7 @@ unit widestr;
|
||||
procedure copywidestring(s,d : pcompilerwidestring);
|
||||
function asciichar2unicode(c : char) : tcompilerwidechar;
|
||||
function unicode2asciichar(c : tcompilerwidechar) : char;
|
||||
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
|
||||
procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring);
|
||||
procedure unicode2ascii(r : pcompilerwidestring;p : pchar;cp : tstringencoding);
|
||||
function hasnonasciichars(const p: pcompilerwidestring): boolean;
|
||||
function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
|
||||
@ -189,14 +189,14 @@ unit widestr;
|
||||
Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
|
||||
end;
|
||||
|
||||
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
|
||||
procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring);
|
||||
var
|
||||
source : pchar;
|
||||
dest : tcompilerwidecharptr;
|
||||
i : SizeInt;
|
||||
m : punicodemap;
|
||||
begin
|
||||
m:=getmap(current_settings.sourcecodepage);
|
||||
m:=getmap(cp);
|
||||
setlengthwidestring(r,l);
|
||||
source:=p;
|
||||
dest:=tcompilerwidecharptr(r^.data);
|
||||
|
Loading…
Reference in New Issue
Block a user