Converter: change the character encoding of source files to UTF-8

git-svn-id: trunk@31023 -
2025-11-28 14:37:18 +01:00 · 2011-06-03 16:19:19 +00:00 · 2011-06-03 16:19:19 +00:00 · ba43bbf08f
commit ba43bbf08f
parent dfe60001a1
3 changed files with 12 additions and 115 deletions
--- a/converter/convertdelphi.pas
+++ b/converter/convertdelphi.pas
@ -452,9 +452,6 @@ begin
 end;

 function TConvertDelphiUnit.CopyAndLoadFile: TModalResult;
-var
-//  CurEncoding: String;
-  Changed: Boolean;
 begin
  IDEMessagesWindow.AddMsg(Format(lisConvDelphiConvertingFile,
                                  [fOrigUnitFilename]), '', -1);
@ -475,12 +472,11 @@ begin
                         [lbfCheckIfText,lbfUpdateFromDisk],true);
  if Result<>mrOk then exit;
  // Change encoding to UTF-8
-{  CurEncoding:=GuessEncoding(fPascalBuffer.Source); //fPascalBuffer.DiskEncoding;
-  if CurEncoding<>EncodingUTF8 then begin
-    fPascalBuffer.Source:=ConvertEncoding(fPascalBuffer.Source, CurEncoding, EncodingUTF8);
-    fPascalBuffer.DiskEncoding:=EncodingUTF8;
-    fPascalBuffer.MemEncoding:=EncodingUTF8;
-  end; }
+  if fPascalBuffer.DiskEncoding<>EncodingUTF8 then begin
+    IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8,
+                                    [fPascalBuffer.DiskEncoding]), '', -1);
+    fPascalBuffer.DiskEncoding:=EncodingUTF8; // Takes effect when buffer is saved.
+  end;
  // Create a shared link for codetools.
  Assert(fCTLink=Nil, 'fCTLink should be Nil in CopyAndLoadFile');
  fCTLink:=TCodeToolLink.Create(fPascalBuffer);
@ -499,8 +495,6 @@ var
  LfmFilename: string;     // Lazarus .LFM file name.
  DFMConverter: TDFMConverter;
  TempLFMBuffer: TCodeBuffer;
-//  CurEncoding: String;
-  Changed: Boolean;
 begin
  Result:=mrOK;
  fLFMBuffer:=nil;
@ -538,23 +532,13 @@ begin
      DFMConverter.Free;
    end;
    // Change encoding to UTF-8
-    if fSettings.FixEncoding then begin
-      Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename,
-                             [lbfCheckIfText,lbfUpdateFromDisk],true);
-      // Note: EnUnicode is meant to be a temporary solution.
-      // LCL has other functions for char encoding.
-      TempLFMBuffer.Source:=EnUnicode(TempLFMBuffer.Source, Changed);
-      if Changed then
-        IDEMessagesWindow.AddMsg(lisConvDelphiChangedEncodingToUTF8, '', -1);
-//      TempLFMBuffer.SaveToFile(ChangeFileExt(TempLFMBuffer.Filename, '_utf8.lfm'));
+    Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename,
+                           [lbfCheckIfText,lbfUpdateFromDisk],true);
+    if TempLFMBuffer.DiskEncoding<>EncodingUTF8 then begin
+      IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8,
+                                      [TempLFMBuffer.DiskEncoding]), '', -1);
+      TempLFMBuffer.DiskEncoding:=EncodingUTF8;
      TempLFMBuffer.Save;
-{      CurEncoding:=GuessEncoding(TempLFMBuffer.Source);
-      if CurEncoding<>EncodingUTF8 then begin
-        ShowMessage('Encoding = ' + CurEncoding);
-        TempLFMBuffer.Source:=ConvertEncoding(TempLFMBuffer.Source, CurEncoding, EncodingUTF8);
-        TempLFMBuffer.DiskEncoding:=EncodingUTF8;
-        TempLFMBuffer.MemEncoding:=EncodingUTF8;
-      end; }
    end;
    // Read form file code in.
    if not fSettings.SameDfmFile then begin
--- a/converter/missingpropertiesdlg.pas
+++ b/converter/missingpropertiesdlg.pas
@ -129,98 +129,11 @@ type
    destructor Destroy; override;
  end;

-  function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String;

 implementation

 {$R *.lfm}

-{*******************************************************************************
-Function UTFEnc(S:ansiString):WideChar;
-S - string like '#1234' or like '1234'.
-It process only first 4-5 symbols. (Some kind of protection)
-Result - One Unicode symbol.
-If S isn't an unicode function will return symbol #0000.
-*******************************************************************************}
-function UTFEnc(S: ansistring): WideChar;
-var X: word;    //word - to be sure that it will return Unicode symbol. Not ASCII.
-begin
-  if (S[1]='#')and(Length(S)=5)then
-    X:=StrToIntDef(Copy(S,2,4),0)
-  else
-    X:=StrToIntDef(Copy(S,1,4),0);
-  Result:=WideChar(X);
-end;
-
-{*******************************************************************************
-function EnUnicode(TS:UTF8String):UTF8String;
-TS:UTF8String - Processing string like <<Caption = #1234#1235':'#1258>>
-Function converts it to string like <<Caption = 'АБ:В'>>
-It have some troubles with strings that contains several pairs of apostrophes.
-(<<Form1.Caption := 'String1 '+'String2'>> will converts into
-<<Form1.Caption := 'String1 +String2'>>
-*******************************************************************************}
-function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String;
-var
-  i,
-  LPA,              //LPA = Left Position of Apostroph. First pos of ' in TS
-  LPS,              //LPS = Left Position of Sharp. First pos of # in TS
-  RPA,              //RPA = Right Position of Apostroph. Last pos of ' in TS
-  RPS: integer;     //RPS = Right Position of Sharp. Last pos of # in TS
-  insideAp: Boolean;//inside of two Apostrophes.
-  S,WS: ansistring; //S copying "as is"(for better speed). WS - converts symbol by symbol
-begin
-  Changed:=False;
-  S:='';
-  insideAp:=false;
-  RPS:=0; RPA:=0;
-  for i:=1 to Length(TS) do begin    //find Last positions of ' and #
-    if TS[i]='#' then
-      RPS:=i;                        //May be there is a spec function to do it
-    if TS[i]='''' then
-      RPA:=i;                        //but I didn't find it.
-  end;
-  RPS:=RPS+4; //Actually no need for position of #, but pos of last symbol of sequence #1234
-  i:=1;                              //Now let's find FIRST pos of ' and #
-  LPA:=Pos('''',TS);                 //If ' not found I must throw out processing of
-  if LPA=0 then
-    LPA:=Length(TS)+1;               //any apostrophes in the TS.
-  LPS:=Pos('#',TS);                  //Also for #
-  if LPS=0 then LPS:=Length(TS)+1;
-  if (LPA<LPS) AND (LPA<=Length(TS)) then begin //Now I must define position of first
-    i:=LPA;                                     //symbol either ' or #.
-  end else if (LPS<LPA) AND (LPS<=Length(TS)) then begin
-    i:=LPS;
-  end;
-  if (RPS<=LPS) OR (RPS<=4) OR (RPS>Length(TS)+4) then
-    RPS:=0;                                   //More hiding processing if ' or #
-  if (RPA<=LPA) OR (RPA=0) OR (RPA>Length(TS)) then
-    RPA:=0;                                   // not found
-  if (LPA<=Length(TS)) OR (LPS<=Length(TS)) then begin //if we've found ' or # or both - start process
-     S:=LeftStr(TS,i-1);                      //first part of TS (before ' or #) copy "as is"
-     WS:='';
-     while (i<=Length(TS)) AND ((i<=RPA) OR (i<=RPS)) do begin //process
-       if TS[i]='''' then begin             //current symbol = '
-         insideAp:=not insideAp;            //part inside of '' will be copy as is
-       end;                                 //even it contains #1234 sequenses
-       if (not insideAp) AND (TS[i]='#') then begin  //part outside of ''
-         WS:=WS+UTFEnc(Copy(TS,i,5));       //send to UTFEnc
-         i:=i+4;                            //skipping nummbers
-       end else
-         if TS[i]<>'''' then                //skipping apostrophes themselves
-           WS:=WS+TS[i];
-       i:=i+1;
-     end;
-     //Form1.Memo1.Lines.Add(S+'|'+WS+'|'+Copy(TS,i,Length(TS)));//It was an debug output
-     S:=S+''''+WS+'''';                       //adding apostrophes around processed part
-     S:=S+Copy(TS,i,Length(TS));              //adding rest of string as is
-     Changed:=True;
-  end {if (LPA<=Length(TS))OR(LPS<=Length(TS))}
-  else
-    S:=TS;                                    //TS doesn't contain neither ' nor #. Copy as is.
-  Result:=AnsiToUtf8(S);                      //Result must be an UTF8-string
-end;
-
 function IsMissingType(LFMError: TLFMError): boolean;
 begin
  with LFMError do
--- a/ide/lazarusidestrconsts.pas
+++ b/ide/lazarusidestrconsts.pas
@ -5139,7 +5139,7 @@ resourcestring
  lisConvDelphiConvertingUnitFiles = '*** Converting unit files ... ***';
  lisConvDelphiConvertingFile = '* Converting file %s *';
  lisConvDelphiFixingUsedUnits = '* Fixing used units for file %s *';
-  lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding to UTF-8';
+  lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding from %s to UTF-8';
  lisConvDelphiErrorCanTFindUnit = '%s(%s,%s) Error: Can''t find unit %s';
  lisConvDelphiAllSubDirsScanned = 'All sub-directories will be scanned for unit files';
  lisConvDelphiMissingIncludeFile = '%s(%s,%s) missing include file';