Merge branch 'compiller-str-term0-fix' into 'main'

Fixed missing zero terminating for strings, fixed conversion using UnicodeToUtf8, unicode2ascii

Closes #39746

See merge request freepascal.org/fpc/source!158
This commit is contained in:
Sergey Larin 2023-07-07 21:43:30 +00:00
commit d52d8fcfa9
7 changed files with 39 additions and 26 deletions

View File

@ -2446,7 +2446,7 @@ implementation
len:=ppufile.getlongint; len:=ppufile.getlongint;
getmem(str,len+1); getmem(str,len+1);
ppufile.getdata(str^,len); ppufile.getdata(str^,len);
str[len]:=#0 str[len]:=#0;
end; end;
@ -2463,7 +2463,13 @@ implementation
p : tlinkedlistitem; p : tlinkedlistitem;
begin begin
p:=inherited getcopy; p:=inherited getcopy;
getmem(tai_string(p).str,len); if (len>0) and (str[len-1]=#0) then
getmem(tai_string(p).str,len)
else
begin
getmem(tai_string(p).str,len+1);
tai_string(p).str[len]:=#0;
end;
move(str^,tai_string(p).str^,len); move(str^,tai_string(p).str^,len);
getcopy:=p; getcopy:=p;
end; end;

View File

@ -1056,6 +1056,7 @@ implementation
ascii2unicode(value_str,len,current_settings.sourcecodepage,pw); ascii2unicode(value_str,len,current_settings.sourcecodepage,pw);
ansistringdispose(value_str,len); ansistringdispose(value_str,len);
pcompilerwidestring(value_str):=pw; pcompilerwidestring(value_str):=pw;
len:=getlengthwidestring(pw);
end end
else else
{ convert unicode 2 ascii } { convert unicode 2 ascii }
@ -1069,6 +1070,7 @@ implementation
begin begin
pw:=pcompilerwidestring(value_str); pw:=pcompilerwidestring(value_str);
l2:=len; l2:=len;
{ returns room for terminating 0 }
l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),l2); l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),l2);
getmem(pc,l); getmem(pc,l);
UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),l2); UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),l2);
@ -1081,6 +1083,7 @@ implementation
pw:=pcompilerwidestring(value_str); pw:=pcompilerwidestring(value_str);
getmem(pc,getlengthwidestring(pw)+1); getmem(pc,getlengthwidestring(pw)+1);
unicode2ascii(pw,pc,cp1); unicode2ascii(pw,pc,cp1);
pc[getlengthwidestring(pw)]:=#0;
donewidestring(pw); donewidestring(pw);
value_str:=pc; value_str:=pc;
end; end;
@ -1119,14 +1122,15 @@ implementation
end; end;
initwidestring(pw); initwidestring(pw);
setlengthwidestring(pw,len); setlengthwidestring(pw,len);
{ returns room for terminating 0 } { returns room for terminating 0, Utf8ToUnicode does not write terminating 0 }
l:=Utf8ToUnicode(PUnicodeChar(pw^.data),len,value_str,len); l:=Utf8ToUnicode(PUnicodeChar(pw^.data),len,value_str,len);
if (l<>getlengthwidestring(pw)) then if (l-1<>len) then
begin begin
setlengthwidestring(pw,l); setlengthwidestring(pw,l-1);
ReAllocMem(value_str,l); ReAllocMem(value_str,l);
end; end;
unicode2ascii(pw,value_str,cp1); unicode2ascii(pw,value_str,cp1);
value_str[l-1]:=#0;
len:=l-1; len:=l-1;
donewidestring(pw); donewidestring(pw);
end end
@ -1144,7 +1148,7 @@ implementation
ascii2unicode(value_str,len,cp2,pw); ascii2unicode(value_str,len,cp2,pw);
{ returns room for terminating 0 } { returns room for terminating 0 }
l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),len); l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),len);
if l<>len then if (l-1<>len) then
ReAllocMem(value_str,l); ReAllocMem(value_str,l);
UnicodeToUtf8(value_str,l,PUnicodeChar(pw^.data),len); UnicodeToUtf8(value_str,l,PUnicodeChar(pw^.data),len);
len:=l-1; len:=l-1;
@ -1216,6 +1220,7 @@ implementation
{$ifdef DEBUG_NODE_XML} {$ifdef DEBUG_NODE_XML}
procedure TStringConstNode.XMLPrintNodeData(var T: Text); procedure TStringConstNode.XMLPrintNodeData(var T: Text);
var var
l: longint;
OutputStr: ansistring; OutputStr: ansistring;
begin begin
inherited XMLPrintNodeData(T); inherited XMLPrintNodeData(T);
@ -1247,8 +1252,9 @@ implementation
cst_widestring, cst_unicodestring: cst_widestring, cst_unicodestring:
begin begin
{ value_str is of type PCompilerWideString } { value_str is of type PCompilerWideString }
SetLength(OutputStr, len); l := UnicodeToUtf8(nil, 0, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
UnicodeToUtf8(PChar(OutputStr), PUnicodeChar(PCompilerWideString(value_str)^.data), len + 1); { +1 for the null terminator } SetLength(OutputStr, l - 1);
UnicodeToUtf8(PChar(OutputStr), l, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
end; end;
else else
OutputStr := ansistring(value_str); OutputStr := ansistring(value_str);

View File

@ -1557,9 +1557,10 @@ implementation
if shdrs[shstrndx].sh_type<>SHT_STRTAB then if shdrs[shstrndx].sh_type<>SHT_STRTAB then
InternalError(2012060202); InternalError(2012060202);
shstrtablen:=shdrs[shstrndx].sh_size; shstrtablen:=shdrs[shstrndx].sh_size;
GetMem(shstrtab,shstrtablen); GetMem(shstrtab,shstrtablen+1);
FReader.seek(shdrs[shstrndx].sh_offset); FReader.seek(shdrs[shstrndx].sh_offset);
FReader.read(shstrtab^,shstrtablen); FReader.read(shstrtab^,shstrtablen);
shstrtab[shstrtablen]:=#0;
FLoaded[shstrndx]:=True; FLoaded[shstrndx]:=True;
{ Locate the symtable, it is typically at the end so loop backwards. { Locate the symtable, it is typically at the end so loop backwards.
@ -1580,9 +1581,10 @@ implementation
if shdrs[strndx].sh_type<>SHT_STRTAB then if shdrs[strndx].sh_type<>SHT_STRTAB then
InternalError(2012062703); InternalError(2012062703);
strtablen:=shdrs[strndx].sh_size; strtablen:=shdrs[strndx].sh_size;
GetMem(strtab,strtablen); GetMem(strtab,strtablen+1);
FReader.seek(shdrs[strndx].sh_offset); FReader.seek(shdrs[strndx].sh_offset);
FReader.read(strtab^,strtablen); FReader.read(strtab^,strtablen);
strtab[strtablen]:=#0;
symtaboffset:=shdrs[i].sh_offset; symtaboffset:=shdrs[i].sh_offset;
syms:=shdrs[i].sh_size div sizeof(TElfSymbol); syms:=shdrs[i].sh_size div sizeof(TElfSymbol);

View File

@ -1137,7 +1137,8 @@ type
consttyp:=conststring; consttyp:=conststring;
len:=length(s); len:=length(s);
getmem(sp,len+1); getmem(sp,len+1);
move(s[1],sp^,len+1); move(s[1],sp^,len);
sp[len]:=#0;
value.valueptr:=sp; value.valueptr:=sp;
value.len:=len; value.len:=len;
def:=strdef; def:=strdef;

View File

@ -160,7 +160,7 @@ begin
else begin else begin
// Use UTF-8 conversion from RTL // Use UTF-8 conversion from RTL
cp:=CP_UTF8; cp:=CP_UTF8;
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1; len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
end; end;
if len2 > Length(dest) then begin if len2 > Length(dest) then begin
SetLength(dest, len2); SetLength(dest, len2);
@ -168,7 +168,7 @@ begin
if conv <> nil then if conv <> nil then
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err) len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
else else
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1; len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
end; end;
if len2 < 0 then if len2 < 0 then
len2:=0; len2:=0;

View File

@ -1143,7 +1143,7 @@ begin
while hp^<>#0 do while hp^<>#0 do
begin begin
len:=UnicodeToUTF8(Nil, hp, 0); len:=UnicodeToUTF8(Nil, hp, 0);
SetLength(s,len); SetLength(s,len-1);
UnicodeToUTF8(PChar(s), hp, len); UnicodeToUTF8(PChar(s), hp, len);
i:=pos('=',s); i:=pos('=',s);
if uppercase(copy(s,1,i-1))=upperenvvar then if uppercase(copy(s,1,i-1))=upperenvvar then
@ -1191,7 +1191,7 @@ begin
Result:=hp; Result:=hp;
{$else} {$else}
len:=UnicodeToUTF8(Nil, hp, 0); len:=UnicodeToUTF8(Nil, hp, 0);
SetLength(Result, len); SetLength(Result, len-1);
UnicodeToUTF8(PChar(Result), hp, len); UnicodeToUTF8(PChar(Result), hp, len);
SetCodePage(RawByteString(Result),CP_UTF8,false); SetCodePage(RawByteString(Result),CP_UTF8,false);
{$endif} {$endif}

View File

@ -313,7 +313,7 @@ begin
if (cp=CP_UTF8) then if (cp=CP_UTF8) then
begin begin
destLen:=Utf8ToUnicode(nil,high(SizeUint),source,len); destLen:=Utf8ToUnicode(nil,0,source,len);
if destLen > 0 then if destLen > 0 then
SetLength(dest,destLen-1) SetLength(dest,destLen-1)
else else
@ -534,15 +534,14 @@ begin
if (DefaultSystemCodePage=CP_UTF8) then if (DefaultSystemCodePage=CP_UTF8) then
begin begin
//convert to UnicodeString,uppercase,convert back to utf8 //convert to UnicodeString,uppercase,convert back to utf8
ulen:=Utf8ToUnicode(nil,high(SizeUint),@s[1],Length(s)); ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
if ulen>0 then
SetLength(us,ulen-1); SetLength(us,ulen-1);
Utf8ToUnicode(@us[1],ulen,@s[1],Length(s)); Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
us:=UpperUnicodeString(us); us:=UpperUnicodeString(us);
ulen:=Length(us); ulen:=Length(us);
slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen); slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
SetLength(Result,slen); SetLength(Result,slen-1);
UnicodeToUtf8(@Result[1],slen,@us[1],ulen); UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
exit; exit;
end; end;
@ -600,15 +599,14 @@ begin
if (DefaultSystemCodePage=CP_UTF8) then if (DefaultSystemCodePage=CP_UTF8) then
begin begin
//convert to UnicodeString,lowercase,convert back to utf8 //convert to UnicodeString,lowercase,convert back to utf8
ulen:=Utf8ToUnicode(nil,high(SizeUInt),@s[1],Length(s)); ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
if ulen>0 then
SetLength(us,ulen-1); SetLength(us,ulen-1);
Utf8ToUnicode(@us[1],ulen,@s[1],Length(s)); Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
us:=LowerUnicodeString(us); us:=LowerUnicodeString(us);
ulen:=Length(us); ulen:=Length(us);
slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen); slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
SetLength(Result,slen); SetLength(Result,slen-1);
UnicodeToUtf8(@Result[1],slen,@us[1],ulen); UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
exit; exit;
end; end;