From ba43bbf08fc4bc61af3ace233dd7a1fa56fc0c9c Mon Sep 17 00:00:00 2001
From: juha <juha@lazarus-ide.org>
Date: Fri, 3 Jun 2011 16:19:19 +0000
Subject: [PATCH] Converter: change the character encoding of source files to
 UTF-8

git-svn-id: trunk@31023 -
---
 converter/convertdelphi.pas        | 38 ++++---------
 converter/missingpropertiesdlg.pas | 87 ------------------------------
 ide/lazarusidestrconsts.pas        |  2 +-
 3 files changed, 12 insertions(+), 115 deletions(-)

diff --git a/converter/convertdelphi.pas b/converter/convertdelphi.pas
index a208984789..7da2a839bc 100644
--- a/converter/convertdelphi.pas
+++ b/converter/convertdelphi.pas
@@ -452,9 +452,6 @@ begin
 end;
 
 function TConvertDelphiUnit.CopyAndLoadFile: TModalResult;
-var
-//  CurEncoding: String;
-  Changed: Boolean;
 begin
   IDEMessagesWindow.AddMsg(Format(lisConvDelphiConvertingFile,
                                   [fOrigUnitFilename]), '', -1);
@@ -475,12 +472,11 @@ begin
                          [lbfCheckIfText,lbfUpdateFromDisk],true);
   if Result<>mrOk then exit;
   // Change encoding to UTF-8
-{  CurEncoding:=GuessEncoding(fPascalBuffer.Source); //fPascalBuffer.DiskEncoding;
-  if CurEncoding<>EncodingUTF8 then begin
-    fPascalBuffer.Source:=ConvertEncoding(fPascalBuffer.Source, CurEncoding, EncodingUTF8);
-    fPascalBuffer.DiskEncoding:=EncodingUTF8;
-    fPascalBuffer.MemEncoding:=EncodingUTF8;
-  end; }
+  if fPascalBuffer.DiskEncoding<>EncodingUTF8 then begin
+    IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8,
+                                    [fPascalBuffer.DiskEncoding]), '', -1);
+    fPascalBuffer.DiskEncoding:=EncodingUTF8; // Takes effect when buffer is saved.
+  end;
   // Create a shared link for codetools.
   Assert(fCTLink=Nil, 'fCTLink should be Nil in CopyAndLoadFile');
   fCTLink:=TCodeToolLink.Create(fPascalBuffer);
@@ -499,8 +495,6 @@ var
   LfmFilename: string;     // Lazarus .LFM file name.
   DFMConverter: TDFMConverter;
   TempLFMBuffer: TCodeBuffer;
-//  CurEncoding: String;
-  Changed: Boolean;
 begin
   Result:=mrOK;
   fLFMBuffer:=nil;
@@ -538,23 +532,13 @@ begin
       DFMConverter.Free;
     end;
     // Change encoding to UTF-8
-    if fSettings.FixEncoding then begin
-      Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename,
-                             [lbfCheckIfText,lbfUpdateFromDisk],true);
-      // Note: EnUnicode is meant to be a temporary solution.
-      // LCL has other functions for char encoding.
-      TempLFMBuffer.Source:=EnUnicode(TempLFMBuffer.Source, Changed);
-      if Changed then
-        IDEMessagesWindow.AddMsg(lisConvDelphiChangedEncodingToUTF8, '', -1);
-//      TempLFMBuffer.SaveToFile(ChangeFileExt(TempLFMBuffer.Filename, '_utf8.lfm'));
+    Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename,
+                           [lbfCheckIfText,lbfUpdateFromDisk],true);
+    if TempLFMBuffer.DiskEncoding<>EncodingUTF8 then begin
+      IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8,
+                                      [TempLFMBuffer.DiskEncoding]), '', -1);
+      TempLFMBuffer.DiskEncoding:=EncodingUTF8;
       TempLFMBuffer.Save;
-{      CurEncoding:=GuessEncoding(TempLFMBuffer.Source);
-      if CurEncoding<>EncodingUTF8 then begin
-        ShowMessage('Encoding = ' + CurEncoding);
-        TempLFMBuffer.Source:=ConvertEncoding(TempLFMBuffer.Source, CurEncoding, EncodingUTF8);
-        TempLFMBuffer.DiskEncoding:=EncodingUTF8;
-        TempLFMBuffer.MemEncoding:=EncodingUTF8;
-      end; }
     end;
     // Read form file code in.
     if not fSettings.SameDfmFile then begin
diff --git a/converter/missingpropertiesdlg.pas b/converter/missingpropertiesdlg.pas
index ae273bcb85..dc35fc6e54 100644
--- a/converter/missingpropertiesdlg.pas
+++ b/converter/missingpropertiesdlg.pas
@@ -129,98 +129,11 @@ type
     destructor Destroy; override;
   end;
 
-  function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String;
 
 implementation
 
 {$R *.lfm}
 
-{*******************************************************************************
-Function UTFEnc(S:ansiString):WideChar;
-S - string like '#1234' or like '1234'.
-It process only first 4-5 symbols. (Some kind of protection)
-Result - One Unicode symbol.
-If S isn't an unicode function will return symbol #0000.
-*******************************************************************************}
-function UTFEnc(S: ansistring): WideChar;
-var X: word;    //word - to be sure that it will return Unicode symbol. Not ASCII.
-begin
-  if (S[1]='#')and(Length(S)=5)then
-    X:=StrToIntDef(Copy(S,2,4),0)
-  else
-    X:=StrToIntDef(Copy(S,1,4),0);
-  Result:=WideChar(X);
-end;
-
-{*******************************************************************************
-function EnUnicode(TS:UTF8String):UTF8String;
-TS:UTF8String - Processing string like <<Caption = #1234#1235':'#1258>>
-Function converts it to string like <<Caption = 'АБ:В'>>
-It have some troubles with strings that contains several pairs of apostrophes.
-(<<Form1.Caption := 'String1 '+'String2'>> will converts into
-<<Form1.Caption := 'String1 +String2'>>
-*******************************************************************************}
-function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String;
-var
-  i,
-  LPA,              //LPA = Left Position of Apostroph. First pos of ' in TS
-  LPS,              //LPS = Left Position of Sharp. First pos of # in TS
-  RPA,              //RPA = Right Position of Apostroph. Last pos of ' in TS
-  RPS: integer;     //RPS = Right Position of Sharp. Last pos of # in TS
-  insideAp: Boolean;//inside of two Apostrophes.
-  S,WS: ansistring; //S copying "as is"(for better speed). WS - converts symbol by symbol
-begin
-  Changed:=False;
-  S:='';
-  insideAp:=false;
-  RPS:=0; RPA:=0;
-  for i:=1 to Length(TS) do begin    //find Last positions of ' and #
-    if TS[i]='#' then
-      RPS:=i;                        //May be there is a spec function to do it
-    if TS[i]='''' then
-      RPA:=i;                        //but I didn't find it.
-  end;
-  RPS:=RPS+4; //Actually no need for position of #, but pos of last symbol of sequence #1234
-  i:=1;                              //Now let's find FIRST pos of ' and #
-  LPA:=Pos('''',TS);                 //If ' not found I must throw out processing of
-  if LPA=0 then
-    LPA:=Length(TS)+1;               //any apostrophes in the TS.
-  LPS:=Pos('#',TS);                  //Also for #
-  if LPS=0 then LPS:=Length(TS)+1;
-  if (LPA<LPS) AND (LPA<=Length(TS)) then begin //Now I must define position of first
-    i:=LPA;                                     //symbol either ' or #.
-  end else if (LPS<LPA) AND (LPS<=Length(TS)) then begin
-    i:=LPS;
-  end;
-  if (RPS<=LPS) OR (RPS<=4) OR (RPS>Length(TS)+4) then
-    RPS:=0;                                   //More hiding processing if ' or #
-  if (RPA<=LPA) OR (RPA=0) OR (RPA>Length(TS)) then
-    RPA:=0;                                   // not found
-  if (LPA<=Length(TS)) OR (LPS<=Length(TS)) then begin //if we've found ' or # or both - start process
-     S:=LeftStr(TS,i-1);                      //first part of TS (before ' or #) copy "as is"
-     WS:='';
-     while (i<=Length(TS)) AND ((i<=RPA) OR (i<=RPS)) do begin //process
-       if TS[i]='''' then begin             //current symbol = '
-         insideAp:=not insideAp;            //part inside of '' will be copy as is
-       end;                                 //even it contains #1234 sequenses
-       if (not insideAp) AND (TS[i]='#') then begin  //part outside of ''
-         WS:=WS+UTFEnc(Copy(TS,i,5));       //send to UTFEnc
-         i:=i+4;                            //skipping nummbers
-       end else
-         if TS[i]<>'''' then                //skipping apostrophes themselves
-           WS:=WS+TS[i];
-       i:=i+1;
-     end;
-     //Form1.Memo1.Lines.Add(S+'|'+WS+'|'+Copy(TS,i,Length(TS)));//It was an debug output
-     S:=S+''''+WS+'''';                       //adding apostrophes around processed part
-     S:=S+Copy(TS,i,Length(TS));              //adding rest of string as is
-     Changed:=True;
-  end {if (LPA<=Length(TS))OR(LPS<=Length(TS))}
-  else
-    S:=TS;                                    //TS doesn't contain neither ' nor #. Copy as is.
-  Result:=AnsiToUtf8(S);                      //Result must be an UTF8-string
-end;
-
 function IsMissingType(LFMError: TLFMError): boolean;
 begin
   with LFMError do
diff --git a/ide/lazarusidestrconsts.pas b/ide/lazarusidestrconsts.pas
index f13344942c..f3866e35ed 100644
--- a/ide/lazarusidestrconsts.pas
+++ b/ide/lazarusidestrconsts.pas
@@ -5139,7 +5139,7 @@ resourcestring
   lisConvDelphiConvertingUnitFiles = '*** Converting unit files ... ***';
   lisConvDelphiConvertingFile = '* Converting file %s *';
   lisConvDelphiFixingUsedUnits = '* Fixing used units for file %s *';
-  lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding to UTF-8';
+  lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding from %s to UTF-8';
   lisConvDelphiErrorCanTFindUnit = '%s(%s,%s) Error: Can''t find unit %s';
   lisConvDelphiAllSubDirsScanned = 'All sub-directories will be scanned for unit files';
   lisConvDelphiMissingIncludeFile = '%s(%s,%s) missing include file';