From ba43bbf08fc4bc61af3ace233dd7a1fa56fc0c9c Mon Sep 17 00:00:00 2001 From: juha Date: Fri, 3 Jun 2011 16:19:19 +0000 Subject: [PATCH] Converter: change the character encoding of source files to UTF-8 git-svn-id: trunk@31023 - --- converter/convertdelphi.pas | 38 ++++--------- converter/missingpropertiesdlg.pas | 87 ------------------------------ ide/lazarusidestrconsts.pas | 2 +- 3 files changed, 12 insertions(+), 115 deletions(-) diff --git a/converter/convertdelphi.pas b/converter/convertdelphi.pas index a208984789..7da2a839bc 100644 --- a/converter/convertdelphi.pas +++ b/converter/convertdelphi.pas @@ -452,9 +452,6 @@ begin end; function TConvertDelphiUnit.CopyAndLoadFile: TModalResult; -var -// CurEncoding: String; - Changed: Boolean; begin IDEMessagesWindow.AddMsg(Format(lisConvDelphiConvertingFile, [fOrigUnitFilename]), '', -1); @@ -475,12 +472,11 @@ begin [lbfCheckIfText,lbfUpdateFromDisk],true); if Result<>mrOk then exit; // Change encoding to UTF-8 -{ CurEncoding:=GuessEncoding(fPascalBuffer.Source); //fPascalBuffer.DiskEncoding; - if CurEncoding<>EncodingUTF8 then begin - fPascalBuffer.Source:=ConvertEncoding(fPascalBuffer.Source, CurEncoding, EncodingUTF8); - fPascalBuffer.DiskEncoding:=EncodingUTF8; - fPascalBuffer.MemEncoding:=EncodingUTF8; - end; } + if fPascalBuffer.DiskEncoding<>EncodingUTF8 then begin + IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8, + [fPascalBuffer.DiskEncoding]), '', -1); + fPascalBuffer.DiskEncoding:=EncodingUTF8; // Takes effect when buffer is saved. + end; // Create a shared link for codetools. Assert(fCTLink=Nil, 'fCTLink should be Nil in CopyAndLoadFile'); fCTLink:=TCodeToolLink.Create(fPascalBuffer); @@ -499,8 +495,6 @@ var LfmFilename: string; // Lazarus .LFM file name. DFMConverter: TDFMConverter; TempLFMBuffer: TCodeBuffer; -// CurEncoding: String; - Changed: Boolean; begin Result:=mrOK; fLFMBuffer:=nil; @@ -538,23 +532,13 @@ begin DFMConverter.Free; end; // Change encoding to UTF-8 - if fSettings.FixEncoding then begin - Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename, - [lbfCheckIfText,lbfUpdateFromDisk],true); - // Note: EnUnicode is meant to be a temporary solution. - // LCL has other functions for char encoding. - TempLFMBuffer.Source:=EnUnicode(TempLFMBuffer.Source, Changed); - if Changed then - IDEMessagesWindow.AddMsg(lisConvDelphiChangedEncodingToUTF8, '', -1); -// TempLFMBuffer.SaveToFile(ChangeFileExt(TempLFMBuffer.Filename, '_utf8.lfm')); + Result:=LoadCodeBuffer(TempLFMBuffer,LfmFilename, + [lbfCheckIfText,lbfUpdateFromDisk],true); + if TempLFMBuffer.DiskEncoding<>EncodingUTF8 then begin + IDEMessagesWindow.AddMsg(Format(lisConvDelphiChangedEncodingToUTF8, + [TempLFMBuffer.DiskEncoding]), '', -1); + TempLFMBuffer.DiskEncoding:=EncodingUTF8; TempLFMBuffer.Save; -{ CurEncoding:=GuessEncoding(TempLFMBuffer.Source); - if CurEncoding<>EncodingUTF8 then begin - ShowMessage('Encoding = ' + CurEncoding); - TempLFMBuffer.Source:=ConvertEncoding(TempLFMBuffer.Source, CurEncoding, EncodingUTF8); - TempLFMBuffer.DiskEncoding:=EncodingUTF8; - TempLFMBuffer.MemEncoding:=EncodingUTF8; - end; } end; // Read form file code in. if not fSettings.SameDfmFile then begin diff --git a/converter/missingpropertiesdlg.pas b/converter/missingpropertiesdlg.pas index ae273bcb85..dc35fc6e54 100644 --- a/converter/missingpropertiesdlg.pas +++ b/converter/missingpropertiesdlg.pas @@ -129,98 +129,11 @@ type destructor Destroy; override; end; - function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String; implementation {$R *.lfm} -{******************************************************************************* -Function UTFEnc(S:ansiString):WideChar; -S - string like '#1234' or like '1234'. -It process only first 4-5 symbols. (Some kind of protection) -Result - One Unicode symbol. -If S isn't an unicode function will return symbol #0000. -*******************************************************************************} -function UTFEnc(S: ansistring): WideChar; -var X: word; //word - to be sure that it will return Unicode symbol. Not ASCII. -begin - if (S[1]='#')and(Length(S)=5)then - X:=StrToIntDef(Copy(S,2,4),0) - else - X:=StrToIntDef(Copy(S,1,4),0); - Result:=WideChar(X); -end; - -{******************************************************************************* -function EnUnicode(TS:UTF8String):UTF8String; -TS:UTF8String - Processing string like <> -Function converts it to string like <> -It have some troubles with strings that contains several pairs of apostrophes. -(<> will converts into -<> -*******************************************************************************} -function EnUnicode(const TS: UTF8String; var Changed: Boolean): UTF8String; -var - i, - LPA, //LPA = Left Position of Apostroph. First pos of ' in TS - LPS, //LPS = Left Position of Sharp. First pos of # in TS - RPA, //RPA = Right Position of Apostroph. Last pos of ' in TS - RPS: integer; //RPS = Right Position of Sharp. Last pos of # in TS - insideAp: Boolean;//inside of two Apostrophes. - S,WS: ansistring; //S copying "as is"(for better speed). WS - converts symbol by symbol -begin - Changed:=False; - S:=''; - insideAp:=false; - RPS:=0; RPA:=0; - for i:=1 to Length(TS) do begin //find Last positions of ' and # - if TS[i]='#' then - RPS:=i; //May be there is a spec function to do it - if TS[i]='''' then - RPA:=i; //but I didn't find it. - end; - RPS:=RPS+4; //Actually no need for position of #, but pos of last symbol of sequence #1234 - i:=1; //Now let's find FIRST pos of ' and # - LPA:=Pos('''',TS); //If ' not found I must throw out processing of - if LPA=0 then - LPA:=Length(TS)+1; //any apostrophes in the TS. - LPS:=Pos('#',TS); //Also for # - if LPS=0 then LPS:=Length(TS)+1; - if (LPALength(TS)+4) then - RPS:=0; //More hiding processing if ' or # - if (RPA<=LPA) OR (RPA=0) OR (RPA>Length(TS)) then - RPA:=0; // not found - if (LPA<=Length(TS)) OR (LPS<=Length(TS)) then begin //if we've found ' or # or both - start process - S:=LeftStr(TS,i-1); //first part of TS (before ' or #) copy "as is" - WS:=''; - while (i<=Length(TS)) AND ((i<=RPA) OR (i<=RPS)) do begin //process - if TS[i]='''' then begin //current symbol = ' - insideAp:=not insideAp; //part inside of '' will be copy as is - end; //even it contains #1234 sequenses - if (not insideAp) AND (TS[i]='#') then begin //part outside of '' - WS:=WS+UTFEnc(Copy(TS,i,5)); //send to UTFEnc - i:=i+4; //skipping nummbers - end else - if TS[i]<>'''' then //skipping apostrophes themselves - WS:=WS+TS[i]; - i:=i+1; - end; - //Form1.Memo1.Lines.Add(S+'|'+WS+'|'+Copy(TS,i,Length(TS)));//It was an debug output - S:=S+''''+WS+''''; //adding apostrophes around processed part - S:=S+Copy(TS,i,Length(TS)); //adding rest of string as is - Changed:=True; - end {if (LPA<=Length(TS))OR(LPS<=Length(TS))} - else - S:=TS; //TS doesn't contain neither ' nor #. Copy as is. - Result:=AnsiToUtf8(S); //Result must be an UTF8-string -end; - function IsMissingType(LFMError: TLFMError): boolean; begin with LFMError do diff --git a/ide/lazarusidestrconsts.pas b/ide/lazarusidestrconsts.pas index f13344942c..f3866e35ed 100644 --- a/ide/lazarusidestrconsts.pas +++ b/ide/lazarusidestrconsts.pas @@ -5139,7 +5139,7 @@ resourcestring lisConvDelphiConvertingUnitFiles = '*** Converting unit files ... ***'; lisConvDelphiConvertingFile = '* Converting file %s *'; lisConvDelphiFixingUsedUnits = '* Fixing used units for file %s *'; - lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding to UTF-8'; + lisConvDelphiChangedEncodingToUTF8 = 'Changed encoding from %s to UTF-8'; lisConvDelphiErrorCanTFindUnit = '%s(%s,%s) Error: Can''t find unit %s'; lisConvDelphiAllSubDirsScanned = 'All sub-directories will be scanned for unit files'; lisConvDelphiMissingIncludeFile = '%s(%s,%s) missing include file';