LazUtils: Rewritten comparison of formatting arguments of translations (in PO files). Now formatting arguments are compared in their entirety (including index, width and precision).

The reasons are: 0. Working with arguments is very prone to typos. 1. Removing index of arguments in translation can lead to exceptions. 2. Adding index to arguments in translation can lead to incorrect argument selection if e. g. '*' usage in arguments and relation of indexed and non-indexed arguments are not properly considered. Validating all this is a big complicated mess which should better be avoided. 3. Allowing to change width/precision can lead to incorrect argument rendering. This means that swapping indexes is now effectively forbidden. This is not expected to create any unavoidable problems for translators, though. In any case such issues can better be solved by changing resource strings instead on a case by case basis. This change allowed to spot many missed argument mismatches in different languages.
2025-08-28 15:40:41 +02:00 · 2022-12-07 04:58:25 +03:00 · 2022-12-07 04:58:25 +03:00 · 045ba029e3
commit 045ba029e3
parent 68c529f404
1 changed files with 53 additions and 52 deletions
--- a/components/lazutils/translations.pas
+++ b/components/lazutils/translations.pas
@ -344,83 +344,84 @@ end;

 function ExtractFormatArgs(S: String; out ArgumentError: Integer): String;
 const
-  FormatArgs = 'DEFGMNPSUX';
  FormatChar = '%';
-  FormatSpecs = ':-.0123456789';
 var
-  p: PtrInt;
-  NewStr, Symb: String;
+  i, ArgStartPosition, ArgCount: Integer;
+  ArgStartDetected, ArgErrorFlag: Boolean;
 begin
-  NewStr := '';
+  Result := '';
+  ArgCount := 0;
  ArgumentError := 0;
-  p := UTF8Pos(FormatChar, S);
-  while (Length(S)>0) and (p>0) and (ArgumentError=0) do
+  ArgStartPosition := 0;
+
+  // note that formatting arguments are strictly ASCII
+  ArgErrorFlag := false;
+  ArgStartDetected := false;
+  i := 1;
+  while (i <= Length(S)) and (ArgErrorFlag = false) do
  begin
-    UTF8Delete(S, 1, p);
-    if Length(S)>0 then
+    if ArgStartDetected = false then
    begin
-      Symb := UTF8UpperCase(UTF8Copy(S, 1, 1));
-      while (Length(S)>1) and (UTF8Pos(Symb, FormatSpecs)>0) do
+      if S[i] = FormatChar then
      begin
-        //weak syntax check for formatting options, skip them if found
-        UTF8Delete(S, 1, 1);
-        Symb := UTF8UpperCase(UTF8Copy(S, 1, 1));
+        ArgStartDetected := true;
+        ArgStartPosition := i;
      end;
-      if Symb <> FormatChar then
-      begin
-        NewStr := NewStr+Symb;
-        if UTF8Pos(Symb, FormatArgs)=0 then
-          ArgumentError := Utf8Length(NewStr);
-      end;
-      //removing processed symbol
-      UTF8Delete(S, 1, 1);
-      //searching for next argument
-      p := UTF8Pos(FormatChar, S);
    end
    else
-      //in this case formatting symbol doesn't have its argument
-      ArgumentError := Utf8Length(NewStr) + 1;
+    begin
+      if (S[i] = FormatChar) and (S[i] = S[i-1]) then
+        // escaped percent sign ('%%') was found, skip it
+        ArgStartDetected := false
+      else
+      begin
+        case S[i] of
+          // the following symbols are normal inside formatting argument, do nothing when they are encountered
+          ':', '-', '.', '*', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':;
+          // the following case insensitive symbols denote the end of the argument
+          'D', 'E', 'F', 'G', 'M', 'N', 'P', 'S', 'U', 'X',
+            'd', 'e', 'f', 'g', 'm', 'n', 'p', 's', 'u', 'x':
+            begin
+              ArgStartDetected := false;
+              Result := Result + Copy(S, ArgStartPosition + 1, i - ArgStartPosition);
+              Inc(ArgCount);
+            end;
+          otherwise
+            ArgErrorFlag := true;
+        end;
+      end;
+    end;
+    Inc(i);
  end;
-  Result := NewStr;
+  // ArgStartDetected=true here means ArgErrorFlag is true or no end of the last argument found
+  if ArgStartDetected = true then
+    ArgumentError := ArgCount + 1;
+  Result := LowerCase(Result);
 end;

 function CompareFormatArgs(S1, S2: String): Boolean;
+// Note, the purpose of this function is to only ensure that S1 and S2 have
+// identical formatting arguments (it is acceptable for them to have formatting
+// errors in this case). This allows to avoid crashes due to typos in formatting
+// arguments of translations.
 var
  Extr1, Extr2: String;
  ArgErr1, ArgErr2: Integer;
 begin
  Result := true;
-  //do not check arguments if strings are equal to save time and avoid some
-  //false positives, e.g. for '{%Region}' string in lazarusidestrconsts
+  // Do not check arguments if strings are equal to save time and avoid some
+  // false positives, e.g. for '{%Region}' string in lazarusidestrconsts.
  if S1 <> S2 then
  begin
    Extr1 := ExtractFormatArgs(S1, ArgErr1);
-    Extr2 := ExtractFormatArgs(S2, ArgErr2);
-    //writeln('Extr1 = ',Extr1,' ArgErr1 = ',ArgErr1);
-    //writeln('Extr2 = ',Extr1,' ArgErr2 = ',ArgErr2);
-    if (ArgErr1 = 0) then
+    // If S1 does not contain arguments, there is no point to analyze S2
+    // further too, just do nothing and thus report positive result.
+    if not ((ArgErr1 = 0) and (Extr1 = '')) then
    begin
-      if (ArgErr2 = 0) then
-      begin
-        Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
-      end
-      else
-      begin
-        //Extr2 can have dangling %'s
-        //e.g. Extr1 = "%s %d" Extr2 = "%s %d {%H}", it does not make sense, but it's not illegal
-        if (ArgErr2 = Utf8Length(Extr1)+1) and not (ArgErr2 > Utf8Length(Extr2)) then Extr2 := Utf8Copy(Extr2,1,ArgErr2-1);
-        Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
-      end;
-    end
-    else
-    begin  //ArgErr1 <> 0
-      //Assume Extr1 is always legal, otherwise the IDE would crash in it's default language...
-      //Only compare until the last valid argument in Extr1
-      if (ArgErr1 = Utf8Length(Extr1)) then Utf8Delete(Extr1, ArgErr1, 1);
-      if Utf8Length(Extr2) > Utf8Length(Extr1) then Extr2 := Utf8Copy(Extr2, 1, Utf8Length(Extr1));
+      // If S1 contains arguments, then analyze S2
+      Extr2 := ExtractFormatArgs(S2, ArgErr2);
      Result := UTF8CompareLatinTextFast(Extr1, Extr2) = 0;
    end;
-    //writeln('CompareFormatArgs: Result = ',Result);
  end;
 end;