{ ***************************************************************************** * * * This file is part of the Lazarus Component Library (LCL) * * * * See the file COPYING.modifiedLGPL, included in this distribution, * * for details about the copyright. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * * ***************************************************************************** } unit LConvEncoding; {$mode objfpc}{$H+} //As iconv is Linux command, there is no sense in Windows {$IFDEF MSWindows} {$DEFINE WINDOWS} {$ENDIF} {$IFDEF WINDOWS} {$WARNING Windows/Wine/ReactOS locale conversion is not fully supported yet. Sorry.} {$ENDIF} interface uses SysUtils, Classes, dos, LCLProc {$IFDEF UNIX},unix{$ENDIF}; const EncodingUTF8 = 'utf8'; function GuessEncoding(const s: string): string; function ConvertEncoding(const s, FromEncoding, ToEncoding: string): string; function GetSystemEncoding: string; implementation var EncodingValid: boolean=false; SystemEncoding: string='ANSI'; function GetSystemEncoding: string; var Lang: string; i: integer; s: string; begin if EncodingValid then begin Result:=SystemEncoding; exit; end; Result:='ANSI'; lang := GetEnv('LC_ALL'); if Length(lang) = 0 then begin lang := GetEnv('LC_MESSAGES'); if Length(lang) = 0 then begin lang := GetEnv('LANG'); end; end; i:=pos('.',Lang); if (i>0) and (i<=length(Lang)) then Result:=copy(Lang,i+1,length(Lang)-i); //Check parameters for i:=1 to ParamCount do begin s:=ParamStr(i); if s='--charset=' then Result:=copy(s,pos(#61,s),length(s)); end; SystemEncoding:=Result; EncodingValid:=true; end; function Utf2Cp1251(s:string):string; var i:integer; Skip,DSkip:boolean; begin //TODO Complete SystemEncoding conversion Skip:=false;DSkip:=false;Result:=''; for i:=1 to length(s) do begin if DSkip then begin Skip:=true;DSkip:=false;continue;end; if Skip then begin Skip:=false;Continue;end; if s[i]<#127 then begin Result:=Result+s[i];continue; end; if i=length(s) then break;//Do not translate 'strange' symbol if (s[i]=chr($D0)) and (s[i+1]>=chr($90))and (s[i+1]=chr($80))and (s[i+1]Chr2 then begin if Chr1 in [97..122] then dec(Chr1,32); if Chr2 in [97..122] then dec(Chr2,32); if Chr1<>Chr2 then exit(false); end; inc(p1); inc(p2); end; Result:=true; end; begin l:=length(s); if l=0 then begin Result:=''; exit; end; // try BOM if CompareI(@s[1],#$EF#$BB#$BF,3) then begin Result:=EncodingUTF8; exit; end; // try {%encoding eee} if CompareI(@s[1],'{%encoding ',11) then begin p:=12; while (p<=l) and (s[p] in [' ',#9]) do inc(p); EndPos:=p; while (EndPos<=l) and (not (s[EndPos] in ['}',' ',#9])) do inc(EndPos); Result:=copy(s,p,EndPos-p); exit; end; // try UTF-8 (this includes ASCII) p:=1; while (p<=l) do begin if ord(s[p])<128 then begin // ASCII inc(p); end else begin i:=UTF8CharacterStrictLength(@s[p]); if i=0 then break; inc(p); end; end; if p>l then begin Result:=EncodingUTF8; exit; end; // use system encoding Result:=GetSystemEncoding; end; function ConvertEncoding(const s, FromEncoding, ToEncoding: string): string; var AFrom,ATo:string; SL:TStringList; FN1,FN2:string; begin Result:=s; AFrom:=LowerCase(FromEncoding); ATo:=LowerCase(ToEncoding); if AFrom=ATo then exit; if ATo='koi8r' then ATo:='koi8-r'; if AFrom='koi8r' then AFrom:='koi8-r'; if (AFrom='utf8') or (AFrom='utf-8') then begin if ATo='cp1251' then begin Result:=utf2cp1251(s);exit;end; if ATo='koi8-r' then begin Result:=cp1251ToKoi8r(utf2cp1251(s));exit;end; end; if (ATo='utf8') or (ATo='utf-8') then begin if AFrom='cp1251' then begin Result:=Cp1251toUTF(s);exit;end; if AFrom='koi8-r' then begin Result:=Cp1251toUTF(Koi8rToCP1251(s));exit;end; end; //Stupid code. Works anyway, but extra-slow {$ifdef Unix} DebugLn(['CPConvert NOTE: using slow iconv workaround to convert from ',AFrom,' to ',ATo]); SL:=TStringList.Create; SL.Text:=s; FN1:=GetTempFileName; SL.SaveToFile(FN1); FN2:=GetTempFileName; fpSystem('iconv -f '+FromEncoding+' -t '+ToEncoding+#60+FN1+' >'+FN2); SL.LoadFromFile(FN2); if SL.Text<>'' then Result:=SL.Text else Result:=s; DeleteFile(FN1); DeleteFile(FN2); {$endif} end; end.