LazUtils:

- An attempt to unify the defines for the different scenario's in the use of (not) codepage aware ansistrings
  and the use of the "Utf8 in RTL" feature.
  It makes for better separation of code and thus better readability and ease of maintainance (and in a later
  stadium it makes it easier to remove code that deals with non codepage aware ansistrings (fpc < 3.0)).
- Also replace (FPC_FULLVERSION >= xxxx) with FPC_HAS_CPSTRING where appropriate.
- Replace the custom HasCP define with built in FPC_HAS_CPSTRING define.

git-svn-id: trunk@50498 -
This commit is contained in:
bart 2015-11-24 16:23:18 +00:00
parent 0493a91c19
commit 0c8df133dd
9 changed files with 77 additions and 70 deletions

View File

@ -130,7 +130,7 @@ begin
end;
{$endif}
{$ifdef HASCP}
{$ifdef FPC_HAS_CPSTRING}
procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
SetTargetCodePage: boolean;
{$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}

View File

@ -16,6 +16,8 @@ unit FPCAdds;
{$mode objfpc}{$H+}{$inline on}
{$i lazutils_defines.inc}
interface
uses
@ -64,7 +66,7 @@ begin
{$ENDIF}
end;
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$ifdef UTF8_RTL}
initialization
SetMultiByteConversionCodePage(CP_UTF8);
// SetMultiByteFileSystemCodePage(CP_UTF8); not needed, this is the default under Windows

View File

@ -4,7 +4,7 @@
unit LazFileUtils;
{$mode objfpc}{$H+}
{$i lazutils_defines.inc}
interface
uses

View File

@ -18,13 +18,12 @@ unit LazUTF8;
{$mode objfpc}{$H+}{$inline on}
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$DEFINE ReallyUseUTF8RTL}
{$IFEND}
{$i lazutils_defines.inc}
interface
uses
{$IFDEF ReallyUseUTF8RTL}
{$IFDEF UTF8_RTL}
{$ifdef unix}
cwstring, // UTF8 RTL on Unix requires this. Must be used although it pulls in clib.
{$endif}
@ -42,8 +41,8 @@ function NeedRTLAnsi: boolean;// true if system encoding is not UTF-8
procedure SetNeedRTLAnsi(NewValue: boolean);
// UTF8ToSys works like UTF8ToAnsi but more independent of widestringmanager
function UTF8ToSys(const s: string): string; overload; {$IFDEF ReallyUseUTF8RTL}inline;{$ENDIF}
function UTF8ToSys(const AFormatSettings: TFormatSettings): TFormatSettings; overload; {$IFDEF ReallyUseUTF8RTL}inline;{$ENDIF}
function UTF8ToSys(const s: string): string; overload; {$IFDEF UTF8_RTL}inline;{$ENDIF}
function UTF8ToSys(const AFormatSettings: TFormatSettings): TFormatSettings; overload; {$IFDEF UTF8_RTL}inline;{$ENDIF}
// SysToUTF8 works like AnsiToUTF8 but more independent of widestringmanager
function SysToUTF8(const s: string): string; overload;
@ -251,7 +250,7 @@ end;
function UTF8ToSys(const s: string): string;
begin
{$IFDEF ReallyUseUTF8RTL}
{$IFDEF UTF8_RTL}
Result:=s;
{$ELSE}
if NeedRTLAnsi and (not IsASCII(s)) then
@ -263,7 +262,7 @@ end;
function SysToUTF8(const s: string): string;
begin
{$IFDEF ReallyUseUTF8RTL}
{$IFDEF UTF8_RTL}
Result:=s;
{$ELSE}
if NeedRTLAnsi and (not IsASCII(s)) then
@ -297,13 +296,13 @@ begin
end;
function UTF8ToSys(const AFormatSettings: TFormatSettings): TFormatSettings;
{$IFnDEF ReallyUseUTF8RTL}
{$IFnDEF UTF8_RTL}
var
i: Integer;
{$ENDIF}
begin
Result := AFormatSettings;
{$IFnDEF ReallyUseUTF8RTL}
{$IFnDEF UTF8_RTL}
Result.CurrencyString := UTF8ToSys(AFormatSettings.CurrencyString);
for i:=1 to 12 do begin
Result.LongMonthNames[i] := UTF8ToSys(AFormatSettings.LongMonthNames[i]);

View File

@ -16,7 +16,7 @@
<Description Value="Useful units for Lazarus packages."/>
<License Value="Modified LGPL-2"/>
<Version Major="1"/>
<Files Count="80">
<Files Count="81">
<Item1>
<Filename Value="laz2_dom.pas"/>
<UnitName Value="Laz2_DOM"/>
@ -338,6 +338,10 @@
<Filename Value="lookupstringlist.pas"/>
<UnitName Value="LookupStringList"/>
</Item80>
<Item81>
<Filename Value="lazutils_defines.inc"/>
<Type Value="Include"/>
</Item81>
</Files>
<LazDoc Paths="../../docs/xml/lazutils"/>
<i18n>

View File

@ -13,17 +13,18 @@ unit LConvEncoding;
{$mode objfpc}{$H+}
{$i lazutils_defines.inc}
interface
{ $Define DisableAsianCodePages}
{$if FPC_FULLVERSION >= 30000}
{$Define HasCP} // AnsiString has codepage
{$IFnDEF DisableUTF8RTL}
//{$if FPC_FULLVERSION >= 30000}
{$IFDEF UTF8_RTL}
// Windows provides conversion functions.
// Unix: unit cwstring provides conversion functions which are used by default UTF-8 encoding system.
{$Define UseSystemCPConv} // use system conversions
{$ENDIF}
{$IFEND}
//{$IFEND}
{$ifdef UseLCPConv}{$undef UseSystemCPConv}{$endif}
uses
@ -77,11 +78,11 @@ const
function GuessEncoding(const s: string): string;
function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
{$ifdef HasCP}; SetTargetCodePage: boolean = false{$endif}): string;
{$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
function ConvertEncodingToUTF8(const s, FromEncoding: string; out Encoded: boolean): string;
// For UTF8 use the above functions, they save you one parameter
function ConvertEncoding(const s, FromEncoding, ToEncoding: string
{$ifdef HasCP}; SetTargetCodePage: boolean = false{$endif}): string;
{$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
// This routine should obtain the encoding utilized by ansistring in the RTL
function GetDefaultTextEncoding: string;
@ -93,7 +94,7 @@ function NormalizeEncoding(const Encoding: string): string;
type
TConvertEncodingFunction = function(const s: string): string;
{$ifdef HasCP}
{$ifdef FPC_HAS_CPSTRING}
TConvertUTF8ToEncodingFunc = function(const s: string; SetTargetCodePage: boolean = false): RawByteString;
{$else}
TConvertUTF8ToEncodingFunc = function(const s: string): string;
@ -129,7 +130,7 @@ function UCS2LEToUTF8(const s: string): string; // UCS2-LE 2byte little endian
function UCS2BEToUTF8(const s: string): string; // UCS2-BE 2byte big endian
function UTF8ToUTF8BOM(const s: string): string; // UTF8 with BOM
{$ifdef HasCP}
{$ifdef FPC_HAS_CPSTRING}
function UTF8ToISO_8859_1(const s: string; SetTargetCodePage: boolean = false): RawByteString; // central europe
function UTF8ToISO_8859_2(const s: string; SetTargetCodePage: boolean = false): RawByteString; // eastern europe
function UTF8ToISO_8859_15(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Western European languages
@ -189,7 +190,7 @@ function CP950ToUTF8(const s: string): string; // Chinese Complex
function DBCSToUTF8(const s: string; CodeP: integer): string;
{$ifdef HasCP}
{$ifdef FPC_HAS_CPSTRING}
function UTF8ToCP932(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Japanese
function UTF8ToCP936(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
function UTF8ToCP949(const s: string; SetTargetCodePage: boolean = false): RawByteString; // Korea
@ -6761,7 +6762,8 @@ begin
end;
end;
{$if FPC_FULLVERSION >= 20701}
//{$if FPC_FULLVERSION >= 20701}
{$IFDEF FPC_HAS_CPSTRING}
procedure InternalUTF8ToCP(const s: string; TargetCodePage: TSystemCodePage;
SetTargetCodePage: boolean;
const UTF8CharConvFunc: TUnicodeToCharID;
@ -7288,11 +7290,11 @@ end;
function ConvertEncodingFromUTF8(const s, ToEncoding: string; out Encoded: boolean
{$ifdef HasCP}; SetTargetCodePage: boolean = false{$endif}): string;
{$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean = false{$endif}): string;
var
ATo: string;
{$ifdef HasCP}
{$ifdef FPC_HAS_CPSTRING}
procedure CheckKeepCP; inline;
begin
if SetTargetCodePage then
@ -7306,33 +7308,33 @@ begin
ATo:=NormalizeEncoding(ToEncoding);
if ATo=EncodingUTF8BOM then begin Result:=UTF8ToUTF8BOM(s); exit; end;
if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPIso1 then begin Result:=UTF8ToISO_8859_1(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPIso15 then begin Result:=UTF8ToISO_8859_15(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPIso2 then begin Result:=UTF8ToISO_8859_2(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1250 then begin Result:=UTF8ToCP1250(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1251 then begin Result:=UTF8ToCP1251(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1252 then begin Result:=UTF8ToCP1252(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1253 then begin Result:=UTF8ToCP1253(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1254 then begin Result:=UTF8ToCP1254(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1255 then begin Result:=UTF8ToCP1255(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1256 then begin Result:=UTF8ToCP1256(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1257 then begin Result:=UTF8ToCP1257(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP1258 then begin Result:=UTF8ToCP1258(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP437 then begin Result:=UTF8ToCP437(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP850 then begin Result:=UTF8ToCP850(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP852 then begin Result:=UTF8ToCP852(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP866 then begin Result:=UTF8ToCP866(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP874 then begin Result:=UTF8ToCP874(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
{$IFnDEF DisableAsianCodePages}
if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP936 then begin Result:=UTF8ToCP936(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP950 then begin Result:=UTF8ToCP950(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP949 then begin Result:=UTF8ToCP949(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCP932 then begin Result:=UTF8ToCP932(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
{$ENDIF}
if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef HasCP},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingUCS2LE then begin {$ifdef HasCP}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
if ATo=EncodingUCS2BE then begin {$ifdef HasCP}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
if ATo=EncodingCPKOI8 then begin Result:=UTF8ToKOI8(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingCPMac then begin Result:=UTF8ToMacintosh(s{$ifdef FPC_HAS_CPSTRING},SetTargetCodePage{$endif}); exit; end;
if ATo=EncodingUCS2LE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2LE(s); exit; end;
if ATo=EncodingUCS2BE then begin {$ifdef FPC_HAS_CPSTRING}CheckKeepCP;{$endif} Result:=UTF8ToUCS2BE(s); exit; end;
if (ATo=GetDefaultTextEncoding) and Assigned(ConvertUTF8ToAnsi) then begin
Result:=ConvertUTF8ToAnsi(s);
@ -7388,7 +7390,7 @@ begin
end;
function ConvertEncoding(const s, FromEncoding, ToEncoding: string
{$ifdef HasCP}; SetTargetCodePage: boolean{$endif}): string;
{$ifdef FPC_HAS_CPSTRING}; SetTargetCodePage: boolean{$endif}): string;
var
AFrom, ATo, SysEnc : String;
Encoded : Boolean;
@ -7416,7 +7418,7 @@ begin
//DebugLn(['ConvertEncoding ',AFrom,' ',ATo]);
if AFrom=EncodingUTF8 then begin
Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef HasCP}, SetTargetCodePage{$endif});
Result:=ConvertEncodingFromUTF8(s, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
if Encoded then exit;
end
else
@ -7428,7 +7430,7 @@ begin
begin
Result:=ConvertEncodingToUTF8(s, AFrom, Encoded);
if Encoded then
Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef HasCP}, SetTargetCodePage{$endif});
Result:=ConvertEncodingFromUTF8(Result, ATo, Encoded{$ifdef FPC_HAS_CPSTRING}, SetTargetCodePage{$endif});
if Encoded then exit;
end;

View File

@ -16,13 +16,16 @@ unit PasWString;
{$mode objfpc}
{$inline on}
{$i lazutils_defines.inc}
//{$define PASWSTRING_VERBOSE}
//{.$define PASWSTRING_SUPPORT_NONUTF8_ANSISTRING} disabled by default because
// non utf-8 ansistring is rare in UNIXes and lconvencoding makes the executable big
// sanity checks for defines
{$IF FPC_FULLVERSION >= 30000}
{$IFnDEF DisableUTF8RTL}
//{$IF FPC_FULLVERSION >= 30000}
{$IFnDEF NO_CP_RTL}
{$IFDEF UTF8_RTL}
{$IFDEF PASWSTRING_SUPPORT_NONUTF8_ANSISTRING}
{$error UTF8 or not UTF8}
{$ENDIF}

View File

@ -542,12 +542,9 @@ end;
{$IF DEFINED(WinCE) OR (FPC_FULLVERSION>=30000)}
{$define FindData_W}
{$IFEND}
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$DEFINE ReallyUseUTF8RTL}
{$IFEND}
function FindMatch(var f: TSearchRec) : Longint;
{$IFnDEF ReallyUseUTF8RTL}
{$IFDEF ACP_RTL}
var
Dummy: String;
{$ENDIF}
@ -569,7 +566,7 @@ begin
in win32 it is the ansi structure with a utf-8 string
in wince it is a wide structure }
{$ifdef FindData_W}
{$IFDEF ReallyUseUTF8RTL}
{$IFnDEF ACP_RTL}
f.Name:=UTF8Encode(UnicodeString(F.FindData.cFileName));
{$ELSE}
Dummy := '';

View File

@ -7,7 +7,7 @@ var
var
ArgsW: Array of WideString;
ArgsWCount: Integer; // length(ArgsW)+1
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL) AND NOT DEFINED(WINCE)}
{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)}
ArgsUTF8: Array of String; // the ArgsW array as UTF8
OldArgV: PPChar = nil;
{$IFEND}
@ -38,7 +38,7 @@ end;
//*************** START WideString impementations
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL) AND NOT DEFINED(WINCE)}
{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)}
procedure SetupArgvAsUtf8;
var
i: Integer;
@ -192,7 +192,7 @@ begin
//Note:
//On WinCe Argsv is a static function, so we cannot change it.
//This might change in the future if Argsv on WinCE will be declared as a function variable
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL) AND NOT DEFINED(WINCE)}
{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)}
if DefaultSystemCodePage=CP_UTF8 then
SetupArgvAsUtf8;
{$IFEND}
@ -327,7 +327,7 @@ begin
end;
{$endif}
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$IFDEF UTF8_RTL}
function GetLocaleStr(aLocaleID, aLCType: Longint; const Def: string): String;
var
L: Integer;
@ -493,7 +493,7 @@ begin
end else
Result:=0;
end;
{$IFEND}
{$ENDIF}
procedure InitLazUtf8;
begin
@ -521,7 +521,7 @@ begin
end;
end;
end;
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$IFDEF UTF8_RTL}
{$ifndef wince}
GetFormatSettingsUTF8(GetThreadLocale,FormatSettings);
{$else}
@ -542,12 +542,12 @@ begin
end;
procedure FinalizeLazUTF8;
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL)}
{$IFDEF UTF8_RTL}
var
p: PPChar;
{$IFEND}
{$ENDIF}
begin
{$IF (FPC_FULLVERSION >= 30000) AND NOT DEFINED(DisableUTF8RTL) AND NOT DEFINED(WINCE)}
{$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)}
// restore argv and free memory
if OldArgV<>nil then
begin