Merge branch 'compiller-str-term0-fix' into 'main'

Fixed missing zero terminating for strings, fixed conversion using UnicodeToUtf8, unicode2ascii Closes #39746 See merge request freepascal.org/fpc/source!158
2025-04-21 01:29:29 +02:00 · 2023-07-07 21:43:30 +00:00 · 2023-07-07 21:43:30 +00:00 · d52d8fcfa9
commit d52d8fcfa9
parent 216183c31f d4bdb96163
7 changed files with 39 additions and 26 deletions
--- a/compiler/aasmtai.pas
+++ b/compiler/aasmtai.pas
@ -2446,7 +2446,7 @@ implementation
        len:=ppufile.getlongint;
        getmem(str,len+1);
        ppufile.getdata(str^,len);
-        str[len]:=#0
+        str[len]:=#0;
      end;


@ -2463,7 +2463,13 @@ implementation
        p : tlinkedlistitem;
      begin
        p:=inherited getcopy;
-        getmem(tai_string(p).str,len);
+        if (len>0) and (str[len-1]=#0) then
+          getmem(tai_string(p).str,len)
+        else
+          begin
+            getmem(tai_string(p).str,len+1);
+            tai_string(p).str[len]:=#0;
+          end;
        move(str^,tai_string(p).str^,len);
        getcopy:=p;
      end;
--- a/compiler/ncon.pas
+++ b/compiler/ncon.pas
@ -1056,6 +1056,7 @@ implementation
            ascii2unicode(value_str,len,current_settings.sourcecodepage,pw);
            ansistringdispose(value_str,len);
            pcompilerwidestring(value_str):=pw;
+            len:=getlengthwidestring(pw);
          end
        else
          { convert unicode 2 ascii }
@ -1069,6 +1070,7 @@ implementation
                begin
                  pw:=pcompilerwidestring(value_str);
                  l2:=len;
+                  { returns room for terminating 0 }
                  l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),l2);
                  getmem(pc,l);
                  UnicodeToUtf8(pc,l,PUnicodeChar(pw^.data),l2);
@ -1081,6 +1083,7 @@ implementation
                  pw:=pcompilerwidestring(value_str);
                  getmem(pc,getlengthwidestring(pw)+1);
                  unicode2ascii(pw,pc,cp1);
+                  pc[getlengthwidestring(pw)]:=#0;
                  donewidestring(pw);
                  value_str:=pc;
                end;
@ -1119,14 +1122,15 @@ implementation
                            end;
                          initwidestring(pw);
                          setlengthwidestring(pw,len);
-                          { returns room for terminating 0 }
+                          { returns room for terminating 0, Utf8ToUnicode does not write terminating 0 }
                          l:=Utf8ToUnicode(PUnicodeChar(pw^.data),len,value_str,len);
-                          if (l<>getlengthwidestring(pw)) then
+                          if (l-1<>len) then
                            begin
-                              setlengthwidestring(pw,l);
+                              setlengthwidestring(pw,l-1);
                              ReAllocMem(value_str,l);
                            end;
                          unicode2ascii(pw,value_str,cp1);
+                          value_str[l-1]:=#0;
                          len:=l-1;
                          donewidestring(pw);
                        end
@ -1144,7 +1148,7 @@ implementation
                          ascii2unicode(value_str,len,cp2,pw);
                          { returns room for terminating 0 }
                          l:=UnicodeToUtf8(nil,0,PUnicodeChar(pw^.data),len);
-                          if l<>len then
+                          if (l-1<>len) then
                            ReAllocMem(value_str,l);
                          UnicodeToUtf8(value_str,l,PUnicodeChar(pw^.data),len);
                          len:=l-1;
@ -1216,6 +1220,7 @@ implementation
 {$ifdef DEBUG_NODE_XML}
    procedure TStringConstNode.XMLPrintNodeData(var T: Text);
      var
+        l: longint;
        OutputStr: ansistring;
      begin
        inherited XMLPrintNodeData(T);
@ -1247,8 +1252,9 @@ implementation
        cst_widestring, cst_unicodestring:
          begin
            { value_str is of type PCompilerWideString }
-            SetLength(OutputStr, len);
-            UnicodeToUtf8(PChar(OutputStr), PUnicodeChar(PCompilerWideString(value_str)^.data), len + 1); { +1 for the null terminator }
+            l := UnicodeToUtf8(nil, 0, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
+            SetLength(OutputStr, l - 1);
+            UnicodeToUtf8(PChar(OutputStr), l, PUnicodeChar(PCompilerWideString(value_str)^.data), len);
          end;
        else
          OutputStr := ansistring(value_str);
--- a/compiler/ogelf.pas
+++ b/compiler/ogelf.pas
@ -1557,9 +1557,10 @@ implementation
        if shdrs[shstrndx].sh_type<>SHT_STRTAB then
          InternalError(2012060202);
        shstrtablen:=shdrs[shstrndx].sh_size;
-        GetMem(shstrtab,shstrtablen);
+        GetMem(shstrtab,shstrtablen+1);
        FReader.seek(shdrs[shstrndx].sh_offset);
        FReader.read(shstrtab^,shstrtablen);
+        shstrtab[shstrtablen]:=#0;
        FLoaded[shstrndx]:=True;

        { Locate the symtable, it is typically at the end so loop backwards.
@ -1580,9 +1581,10 @@ implementation
            if shdrs[strndx].sh_type<>SHT_STRTAB then
              InternalError(2012062703);
            strtablen:=shdrs[strndx].sh_size;
-            GetMem(strtab,strtablen);
+            GetMem(strtab,strtablen+1);
            FReader.seek(shdrs[strndx].sh_offset);
            FReader.read(strtab^,strtablen);
+            strtab[strtablen]:=#0;

            symtaboffset:=shdrs[i].sh_offset;
            syms:=shdrs[i].sh_size div sizeof(TElfSymbol);
--- a/compiler/scanner.pas
+++ b/compiler/scanner.pas
@ -1137,7 +1137,8 @@ type
      consttyp:=conststring;
      len:=length(s);
      getmem(sp,len+1);
-      move(s[1],sp^,len+1);
+      move(s[1],sp^,len);
+      sp[len]:=#0;
      value.valueptr:=sp;
      value.len:=len;
      def:=strdef;
--- a/rtl/android/cwstring.pp
+++ b/rtl/android/cwstring.pp
@ -160,7 +160,7 @@ begin
  else begin
    // Use UTF-8 conversion from RTL
    cp:=CP_UTF8;
-    len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
+    len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
  end;
  if len2 > Length(dest) then begin
    SetLength(dest, len2);
@ -168,7 +168,7 @@ begin
    if conv <> nil then
      len2:=ucnv_fromUChars(conv, PAnsiChar(dest), len2, source, len, err)
    else
-      len2:=UnicodeToUtf8(PAnsiChar(dest), len2, source, len) - 1;
+      len2:=UnicodeToUtf8(PAnsiChar(dest), len2 + 1, source, len) - 1;
  end;
  if len2 < 0 then
    len2:=0;
--- a/rtl/nativent/sysutils.pp
+++ b/rtl/nativent/sysutils.pp
@ -1143,7 +1143,7 @@ begin
   while hp^<>#0 do
     begin
        len:=UnicodeToUTF8(Nil, hp, 0);
-        SetLength(s,len);
+        SetLength(s,len-1);
        UnicodeToUTF8(PChar(s), hp, len);
        i:=pos('=',s);
        if uppercase(copy(s,1,i-1))=upperenvvar then
@ -1191,7 +1191,7 @@ begin
        Result:=hp;
 {$else}
        len:=UnicodeToUTF8(Nil, hp, 0);
-        SetLength(Result, len);
+        SetLength(Result, len-1);
        UnicodeToUTF8(PChar(Result), hp, len);
        SetCodePage(RawByteString(Result),CP_UTF8,false);
 {$endif}
--- a/rtl/objpas/fpwidestring.pp
+++ b/rtl/objpas/fpwidestring.pp
@ -313,7 +313,7 @@ begin

  if (cp=CP_UTF8) then
    begin
-      destLen:=Utf8ToUnicode(nil,high(SizeUint),source,len);
+      destLen:=Utf8ToUnicode(nil,0,source,len);
      if destLen > 0 then
        SetLength(dest,destLen-1)
      else
@ -534,15 +534,14 @@ begin
  if (DefaultSystemCodePage=CP_UTF8) then
    begin
      //convert to UnicodeString,uppercase,convert back to utf8
-      ulen:=Utf8ToUnicode(nil,high(SizeUint),@s[1],Length(s));
-      if ulen>0 then
-        SetLength(us,ulen-1);
+      ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
+      SetLength(us,ulen-1);
      Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
      us:=UpperUnicodeString(us);

      ulen:=Length(us);
-      slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
-      SetLength(Result,slen);
+      slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
+      SetLength(Result,slen-1);
      UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
      exit;
    end;
@ -600,15 +599,14 @@ begin
  if (DefaultSystemCodePage=CP_UTF8) then
    begin
      //convert to UnicodeString,lowercase,convert back to utf8
-      ulen:=Utf8ToUnicode(nil,high(SizeUInt),@s[1],Length(s));
-      if ulen>0 then
-        SetLength(us,ulen-1);
+      ulen:=Utf8ToUnicode(nil,0,@s[1],Length(s));
+      SetLength(us,ulen-1);
      Utf8ToUnicode(@us[1],ulen,@s[1],Length(s));
      us:=LowerUnicodeString(us);

      ulen:=Length(us);
-      slen:=UnicodeToUtf8(nil,high(SizeUInt),@us[1],ulen);
-      SetLength(Result,slen);
+      slen:=UnicodeToUtf8(nil,0,@us[1],ulen);
+      SetLength(Result,slen-1);
      UnicodeToUtf8(@Result[1],slen,@us[1],ulen);
      exit;
    end;