Merge branch 'compiller-str-term0-fix' into 'main'

Fixed missing zero terminating for strings, fixed conversion using UnicodeToUtf8, unicode2ascii

Closes #39746

See merge request freepascal.org/fpc/source!158
This commit is contained in:
Sergey Larin 2023-07-07 21:43:30 +00:00
commit d52d8fcfa9
7 changed files with 39 additions and 26 deletions

View File

@ -2446,7 +2446,7 @@ implementation
len:=ppufile.getlongint;
getmem(str,len+1);
ppufile.getdata(str^,len);
str[len]:=#0
str[len]:=#0;
end;
@ -2463,7 +2463,13 @@ implementation
p : tlinkedlistitem;
begin
p:=inherited getcopy;
getmem(tai_string(p).str,len);
if (len>0) and (str[len-1]=#0) then
getmem(tai_string(p).str,len)
else
begin
getmem(tai_string(p).str,len+1);
tai_string(p).str[len]:=#0;
end;
move(str^,tai_string(p).str^,len);
getcopy:=p;
end;

View File

@ -1056,6 +1056,7 @@ implementation
ascii2unicode(value_str,len,current_settings.sourcecodepage,pw);
ansistringdispose(value_str,len);
pcompilerwidestring(value_str):=pw;
len:=getlengthwidestring(pw);
end
else
{ convert unicode 2 ascii }
@ -1069,6 +1070,7 @@ implementation
begin
pw:=pcompilerwidestring(value_str);
l2:=len;
{ returns room for terminating 0 }
l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),l2);
getmem(pc,l);
UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),l2);
@ -1081,6 +1083,7 @@ implementation
pw:=pcompilerwidestring(value_str);
getmem(pc,getlengthwidestring(pw)+1);
unicode2ascii(pw,pc,cp1);
pc[getlengthwidestring(pw)]:=#0;
donewidestring(pw);
value_str:=pc;
end;
@ -1119,14 +1122,15 @@ implementation
end;
initwidestring(pw);
setlengthwidestring(pw,len);
{ returns room for terminating 0 }
{ returns room for terminating 0, Utf8ToUnicode does not write terminating 0 }
l:=Utf8ToUnicode(PUnicodeChar(pw^.data),len,value_str,len);
if (l<>getlengthwidestring(pw)) then
if (l-1<>len) then
begin
setlengthwidestring(pw,l);
setlengthwidestring(pw,l-1);
ReAllocMem(value_str,l);
end;
unicode2ascii(pw,value_str,cp1);
value_str[l-1]:=#0;
len:=l-1;
donewidestring(pw);
end
@ -1144,7 +1148,7 @@ implementation
ascii2unicode(value_str,len,cp2,pw);
{ returns room for terminating 0 }
l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),len);
if l<>len then
if (l-1<>len) then
ReAllocMem(value_str,l);
UnicodeToUtf8(value_str,l,PUnicodeChar(pw^.data),len);
len:=l-1;
@ -1216,6 +1220,7 @@ implementation
{$ifdef DEBUG_NODE_XML}
procedure TStringConstNode.XMLPrintNodeData(var T: Text);
var
l: longint;
OutputStr: ansistring;
begin
inherited XMLPrintNodeData(T);
@ -1247,8 +1252,9 @@ implementation
cst_widestring, cst_unicodestring:
begin
{ value_str is of type PCompilerWideString }
SetLength(OutputStr, len);
UnicodeToUtf8(PChar(OutputStr), PUnicodeChar(PCompilerWideString(value_str)^.data), len + 1); { +1 for the null terminator }
l := UnicodeToUtf8(nil, 0, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
SetLength(OutputStr, l - 1);
UnicodeToUtf8(PChar(OutputStr), l, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
end;
else
OutputStr := ansistring(value_str);

View File

@ -1557,9 +1557,10 @@ implementation
if shdrs[shstrndx].sh_type<>SHT_STRTAB then
InternalError(2012060202);
shstrtablen:=shdrs[shstrndx].sh_size;
GetMem(shstrtab,shstrtablen);
GetMem(shstrtab,shstrtablen+1);
FReader.seek(shdrs[shstrndx].sh_offset);
FReader.read(shstrtab^,shstrtablen);
shstrtab[shstrtablen]:=#0;
FLoaded[shstrndx]:=True;
{ Locate the symtable, it is typically at the end so loop backwards.
@ -1580,9 +1581,10 @@ implementation
if shdrs[strndx].sh_type<>SHT_STRTAB then
InternalError(2012062703);
strtablen:=shdrs[strndx].sh_size;
GetMem(strtab,strtablen);
GetMem(strtab,strtablen+1);
FReader.seek(shdrs[strndx].sh_offset);
FReader.read(strtab^,strtablen);
strtab[strtablen]:=#0;
symtaboffset:=shdrs[i].sh_offset;
syms:=shdrs[i].sh_size div sizeof(TElfSymbol);

View File

@ -1137,7 +1137,8 @@ type
consttyp:=conststring;
len:=length(s);
getmem(sp,len+1);
move(s[1],sp^,len+1);
move(s[1],sp^,len);
sp[len]:=#0;
value.valueptr:=sp;
value.len:=len;
def:=strdef;

View File

@ -160,7 +160,7 @@ begin
else begin
// Use UTF-8 conversion from RTL
cp:=CP_UTF8;
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
end;
if len2 > Length(dest) then begin
SetLength(dest, len2);
@ -168,7 +168,7 @@ begin
if conv <> nil then
len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
else
len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
end;
if len2 < 0 then
len2:=0;

View File

@ -1143,7 +1143,7 @@ begin
while hp^<>#0 do
begin
len:=UnicodeToUTF8(Nil, hp, 0);
SetLength(s,len);
SetLength(s,len-1);
UnicodeToUTF8(PChar(s), hp, len);
i:=pos('=',s);
if uppercase(copy(s,1,i-1))=upperenvvar then
@ -1191,7 +1191,7 @@ begin
Result:=hp;
{$else}
len:=UnicodeToUTF8(Nil, hp, 0);
SetLength(Result, len);
SetLength(Result, len-1);
UnicodeToUTF8(PChar(Result), hp, len);
SetCodePage(RawByteString(Result),CP_UTF8,false);
{$endif}

View File

@ -313,7 +313,7 @@ begin
if (cp=CP_UTF8) then
begin
destLen:=Utf8ToUnicode(nil,high(SizeUint),source,len);
destLen:=Utf8ToUnicode(nil,0,source,len);
if destLen > 0 then
SetLength(dest,destLen-1)
else
@ -534,15 +534,14 @@ begin
if (DefaultSystemCodePage=CP_UTF8) then
begin
//convert to UnicodeString,uppercase,convert back to utf8
ulen:=Utf8ToUnicode(nil,high(SizeUint),@s[1],Length(s));
if ulen>0 then
SetLength(us,ulen-1);
ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
SetLength(us,ulen-1);
Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
us:=UpperUnicodeString(us);
ulen:=Length(us);
slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
SetLength(Result,slen);
slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
SetLength(Result,slen-1);
UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
exit;
end;
@ -600,15 +599,14 @@ begin
if (DefaultSystemCodePage=CP_UTF8) then
begin
//convert to UnicodeString,lowercase,convert back to utf8
ulen:=Utf8ToUnicode(nil,high(SizeUInt),@s[1],Length(s));
if ulen>0 then
SetLength(us,ulen-1);
ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
SetLength(us,ulen-1);
Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
us:=LowerUnicodeString(us);
ulen:=Length(us);
slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
SetLength(Result,slen);
slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
SetLength(Result,slen-1);
UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
exit;
end;