+ support UTF-8 in ascii2unicode(), this fixes the UTF-16 output of

resourcestring data .rsj files in case the source file is interpreted as
    UTF-8. Previously, the individual UTF-8 bytes were each stored in a
    separate widechar in the Json file (mantis #28717)

  * due to the fact that rstconv didn't use the cwstring unit on Unix, rstconv
    until now just concatenated the bytes stored in the widechars of the Json
    file on those platforms, i.e., the strings put in the resource file were
    byte for byte equal to what was in the source file. On Windows, these bytes
    were interpreted as individual widechars, converted to the
    DefaultSystemCodePage and then written. This means that for anything but
    ISO-8859-1 (where every widechar from #0000 to #0255 maps to #0 to #255),
    the output got corrupted.

    In order to keep compatibility with the old behaviour whereby rstconv wrote
    the resource strings using the same encoding as in the source file (except
    if the data got completely corrupted, in which case compatibility is
    useless), we now store all resourcestrings twice in the .rsj file: once as
    the exact byte sequence from the source file, and once (properly) encoded
    in UTF-16.

    By default, rstconv will use the byte string and just write that one to the
    resource file. Additionally, there is a new -p option that accepts a code
    page name (see rstconv -h for the list of supported names), which can be
    used to make rstconv use the UTF-16 version and convert that to the desired
    code page (as long as the system on which rstconv runs supports that
    codepage).

    And this also finally resolves mantis #6477.

git-svn-id: trunk@31881 -
This commit is contained in:
Jonas Maebe 2015-09-28 22:14:56 +00:00
parent a1ed7cc162
commit 05bf826342
3 changed files with 102 additions and 24 deletions

View File

@ -237,11 +237,22 @@ uses
message1(general_e_errorwritingresourcefile,ResFileName);
exit;
end;
{ write the data in two formats:
a) backward compatible: the plain bytes from the source file
b) portable: converted to utf-16
}
writeln(f,'{"version":1,"strings":[');
R:=TResourceStringItem(List.First);
while assigned(R) do
begin
write(f, '{"hash":',R.Hash,',"name":"',R.Name,'","value":"');
write(f, '{"hash":',R.Hash,',"name":"',R.Name,'","sourcebytes":[');
for i:=0 to R.Len-1 do
begin
write(f,ord(R.Value[i]));
if i<>R.Len-1 then
write(f,',');
end;
write(f,'],"value":"');
initwidestring(W);
ascii2unicode(R.Value,R.Len,current_settings.sourcecodepage,W);
for I := 0 to W^.len - 1 do

View File

@ -201,6 +201,7 @@ unit widestr;
Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
end;
procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : pcompilerwidestring;codepagetranslation : boolean = true);
var
source : pchar;
@ -212,15 +213,25 @@ unit widestr;
setlengthwidestring(r,l);
source:=p;
dest:=tcompilerwidecharptr(r^.data);
if (cp<>CP_UTF8) and
codepagetranslation then
if codepagetranslation then
begin
for i:=1 to l do
begin
dest^:=getunicode(source^,m);
inc(dest);
inc(source);
end;
if cp<>CP_UTF8 then
begin
for i:=1 to l do
begin
dest^:=getunicode(source^,m);
inc(dest);
inc(source);
end;
end
else
begin
r^.len:=Utf8ToUnicode(punicodechar(r^.data),r^.maxlen,p,l);
{ -1, because utf8tounicode includes room for a terminating 0 in
its result count }
if r^.len>0 then
dec(r^.len);
end;
end
else
begin
@ -233,6 +244,7 @@ unit widestr;
end;
end;
procedure unicode2ascii(r : pcompilerwidestring;p:pchar;cp : tstringencoding);
var
m : punicodemap;

View File

@ -18,7 +18,11 @@
program rstconv;
uses sysutils, classes, jsonparser, fpjson;
uses
{$ifdef unix}
cwstring,
{$endif}
sysutils, classes, jsonparser, fpjson, charset, cpall;
resourcestring
help =
@ -40,7 +44,10 @@ resourcestring
'Resource compiler script only options are:'+LineEnding+
' -s Use STRINGTABLE instead of MESSAGETABLE'+LineEnding+
' -c identifier Use identifier as ID base (ID+n) (OPTIONAL)'+LineEnding+
' -n number Specifies the first ID number (OPTIONAL)'+LineEnding;
' -n number Specifies the first ID number (OPTIONAL)'+LineEnding+
'.rsj-input format-only options are:'+LineEnding+
' -p codepage Convert the string data to the specified code page before'+LineEnding+
' writing it to the output file. Possible values:';
InvalidOption = 'Invalid option - ';
@ -50,7 +57,9 @@ resourcestring
InvalidOutputFormat = 'Invalid output format -';
MessageNumberTooBig = 'Message number too big';
InvalidRange = 'Invalid range of the first message number';
MissingOption = 'Missing option after parameter ';
UnsupportedOutputCodePage = 'Unsupported output code page specified: ';
RstNoOutputCodePage = 'It is not possible to specify an output code page when using a .rst file';
type
@ -62,8 +71,9 @@ type
var
InFilename, OutFilename: String;
ConstItems: TCollection;
CharSet: String;
HeaderCharSet: String;
Identifier: String;
OutputCodePage: Longint;
FirstMessage: Word;
MessageTable: Boolean;
@ -121,12 +131,15 @@ procedure ReadRSJFile;
var
Stream: TFileStream;
Parser: TJSONParser;
JsonItems: TJSONArray;
JsonItems,
RawStringData: TJSONArray;
JsonData, JsonItem: TJSONObject;
S: String;
item: TConstItem;
DotPos, I: Integer;
DotPos, I, J: Integer;
begin
if OutputCodePage<>-1 then
DefaultSystemCodePage:=OutputCodePage;
Stream := TFileStream.Create(InFilename, fmOpenRead or fmShareDenyNone);
Parser := TJSONParser.Create(Stream);
try
@ -141,7 +154,17 @@ begin
DotPos := Pos('.', s);
item.ModuleName := Copy(s, 1, DotPos - 1);
item.ConstName := Copy(s, DotPos + 1, Length(S) - DotPos);
item.Value := JsonItem.Get('value');
if OutputCodePage=-1 then
begin
RawStringData:=JsonItem.Get('sourcebytes',TJSONArray(nil));
SetLength(item.Value, RawStringData.Count);
for J := 1 to Length(item.Value) do
item.Value[J]:=char(RawStringData.Integers[J-1]);
end
else
{ automatically converts from UTF-16 to the correct code page due
to the change of DefaultSystemCodePage to OutputCodePage above }
item.Value := JsonItem.Get('value');
end;
finally
JsonData.Free;
@ -164,12 +187,12 @@ begin
Assign(f, OutFilename);
Rewrite(f);
if CharSet<>'' then begin
if HeaderCharSet<>'' then begin
// Write file header with
WriteLn(f, 'msgid ""');
WriteLn(f, 'msgstr ""');
WriteLn(f, '"MIME-Version: 1.0\n"');
WriteLn(f, '"Content-Type: text/plain; charset=', CharSet, '\n"');
WriteLn(f, '"Content-Type: text/plain; charset=', HeaderCharSet, '\n"');
WriteLn(f, '"Content-Transfer-Encoding: 8bit\n"');
WriteLn(f);
end;
@ -345,15 +368,21 @@ begin
if (ParamStr(1) = '-h') or (ParamStr(1) = '--help') then begin
WriteLn(help);
for i:=low(word) to high(word) do
if mappingavailable(i) then
writeln(' ',getmap(i)^.cpname);
{ UTF-8 is not supported via the CharSet unit }
writeln(' UTF-8');
exit;
end;
ConversionProc := @ConvertToGettextPO;
OutputFormat:='';
CharSet:='';
HeaderCharSet:='';
Identifier:='';
FirstMessage:=0;
MessageTable:=True;
OutputCodePage:=-1;
i := 1;
while i <= ParamCount do begin
@ -391,11 +420,11 @@ begin
Inc(i, 2);
end else if ParamStr(i) = '-c' then begin
if (OutputFormat='') or (OutputFormat='po') then begin
if CharSet <> '' then begin
if HeaderCharSet <> '' then begin
WriteLn(StdErr, OptionAlreadySpecified, '-c');
Halt(1);
end;
CharSet:=ParamStr(i+1);
HeaderCharSet:=ParamStr(i+1);
end else
begin
if Identifier <> '' then begin
@ -428,13 +457,32 @@ begin
end;
end;
Inc(i, 2);
end else begin
end else if ParamStr(i) = '-p' then
begin
if paramcount=i then
begin
WriteLn(StdErr, MissingOption,'-p');
Halt(1)
end;
if UpperCase(paramstr(i+1))<>'UTF-8' then
if not mappingavailable(ParamStr(i+1)) then
begin
WriteLn(StdErr, UnsupportedOutputCodePage, ParamStr(i+1));
Halt(1);
end
else
OutputCodePage:=getmap(ParamStr(i+1))^.cp
else
OutputCodePage:=CP_UTF8;
Inc(i, 2);
end
else begin
WriteLn(StdErr, InvalidOption, ParamStr(i));
Halt(1);
end;
end;
If ((OutputFormat<>'') and (OutputFormat<>'po')) and (CharSet<>'') then begin
If ((OutputFormat<>'') and (OutputFormat<>'po')) and (HeaderCharSet<>'') then begin
WriteLn(StdErr, InvalidOption, '');
Halt(1);
end;
@ -459,7 +507,14 @@ begin
if ExtractFileExt(InFilename) = '.rsj' then
ReadRSJFile
else
ReadRSTFile;
begin
if OutputCodePage<>-1 then
begin
WriteLn(StdErr, RstNoOutputCodePage);
Halt(1);
end;
ReadRSTFile;
end;
ConversionProc;
end.