LazUtils: Move ConsoleToUTF8, UTF8ToConsole, ParamStrUTF8,

GetEnvironmentStringUTF8 and GetEnvironmentVariableUTF8 to LazUtf8 (and inline them in FileUtil).
Part of the ongoing restructuring of LazUtils.

git-svn-id: trunk@41587 -
This commit is contained in:
bart 2013-06-08 14:41:16 +00:00
parent 4fd391ebc5
commit bcea8662ff
9 changed files with 375 additions and 295 deletions

2
.gitattributes vendored
View File

@ -2355,9 +2355,11 @@ components/lazutils/tttables.pas svneol=native#text/pascal
components/lazutils/tttypes.pas svneol=native#text/pascal
components/lazutils/unixfileutil.inc svneol=native#text/pascal
components/lazutils/unixlazfileutils.inc svneol=native#text/plain
components/lazutils/unixlazutf8.inc svneol=native#text/plain
components/lazutils/utf8process.pp svneol=native#text/pascal
components/lazutils/winfileutil.inc svneol=native#text/pascal
components/lazutils/winlazfileutils.inc svneol=native#text/plain
components/lazutils/winlazutf8.inc svneol=native#text/plain
components/leakview/Makefile svneol=native#text/plain
components/leakview/Makefile.compiled svneol=native#text/plain
components/leakview/Makefile.fpc svneol=native#text/plain

View File

@ -33,6 +33,31 @@ begin
Result := LazUtf8.SysToUTF8(s);
end;
function ConsoleToUTF8(const s: string): string;// converts OEM encoded string to UTF8 (used with some Windows specific functions)
begin
Result := LazUtf8.ConsoleToUTF8(s);
end;
function UTF8ToConsole(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
begin
Result := LazUtf8.UTF8ToConsole(s);
end;
function ParamStrUTF8(Param: Integer): string;
begin
Result := LazUtf8.ParamStrUTF8(Param);
end;
function GetEnvironmentStringUTF8(Index: Integer): string;
begin
Result := LazUtf8.GetEnvironmentStringUTF8(Index);
end;
function GetEnvironmentVariableUTF8(const EnvVar: string): String;
begin
Result := LazUtf8.GetEnvironmentVariableUTF8(EnvVar);
end;
function FileGetAttrUTF8(const FileName: String): Longint;
begin
Result := LazFileUtils.FileGetAttrUtf8(FileName);
@ -171,24 +196,6 @@ end;
function GetEnvironmentStringUTF8(Index: Integer): String;
begin
// on Windows SysUtils.GetEnvironmentString returns OEM encoded string
// so ConsoleToUTF8 function should be used!
// RTL issue: http://bugs.freepascal.org/view.php?id=15233
Result:=ConsoleToUTF8(SysUtils.GetEnvironmentString(Index));
end;
function GetEnvironmentVariableUTF8(const EnvVar: String): String;
begin
// on Windows SysUtils.GetEnvironmentString returns OEM encoded string
// so ConsoleToUTF8 function should be used!
// RTL issue: http://bugs.freepascal.org/view.php?id=15233
Result:=ConsoleToUTF8(SysUtils.GetEnvironmentVariable(UTF8ToSys(EnvVar)));
end;
function GetAppConfigDirUTF8(Global: Boolean; Create: boolean = false): string;
begin
Result:=SysToUTF8(SysUtils.GetAppConfigDir(Global));

View File

@ -231,8 +231,8 @@ function NeedRTLAnsi: boolean; inline;// true if system encoding is not UTF-8
procedure SetNeedRTLAnsi(NewValue: boolean); inline;
function UTF8ToSys(const s: string): string; inline;// as UTF8ToAnsi but more independent of widestringmanager
function SysToUTF8(const s: string): string; inline;// as AnsiToUTF8 but more independent of widestringmanager
function ConsoleToUTF8(const s: string): string;// converts OEM encoded string to UTF8 (used with some Windows specific functions)
function UTF8ToConsole(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
function ConsoleToUTF8(const s: string): string; inline;// converts OEM encoded string to UTF8 (used with some Windows specific functions)
function UTF8ToConsole(const s: string): string; inline;// converts UTF8 string to console encoding (used by Write, WriteLn)
// file operations
function FileExistsUTF8(const Filename: string): boolean; inline;
@ -261,9 +261,9 @@ function FileCreateUTF8(Const FileName : string) : THandle; overload; inline;
function FileCreateUTF8(Const FileName : string; Rights: Cardinal) : THandle; overload; inline;
// environment
function ParamStrUTF8(Param: Integer): string;
function GetEnvironmentStringUTF8(Index: Integer): string;
function GetEnvironmentVariableUTF8(const EnvVar: string): String;
function ParamStrUTF8(Param: Integer): string; inline;
function GetEnvironmentStringUTF8(Index: Integer): string; inline;
function GetEnvironmentVariableUTF8(const EnvVar: string): String; inline;
function GetAppConfigDirUTF8(Global: Boolean; Create: boolean = false): string;
function GetAppConfigFileUTF8(Global: Boolean; SubDir: boolean = false;
CreateDir: boolean = false): string;
@ -290,7 +290,5 @@ uses
{$i unixfileutil.inc}
{$ENDIF}
initialization
InitFileUtil;
end.

View File

@ -31,6 +31,14 @@ function NeedRTLAnsi: boolean;// true if system encoding is not UTF-8
procedure SetNeedRTLAnsi(NewValue: boolean);
function UTF8ToSys(const s: string): string;// as UTF8ToAnsi but more independent of widestringmanager
function SysToUTF8(const s: string): string;// as AnsiToUTF8 but more independent of widestringmanager
function ConsoleToUTF8(const s: string): string;// converts OEM encoded string to UTF8 (used with some Windows specific functions)
function UTF8ToConsole(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
function ParamStrUTF8(Param: Integer): string;
function GetEnvironmentStringUTF8(Index: Integer): string;
function GetEnvironmentVariableUTF8(const EnvVar: string): String;
function UTF8CharacterLength(p: PChar): integer;
function UTF8Length(const s: string): PtrInt;
@ -121,6 +129,12 @@ uses
{$IFDEF Darwin}, MacOSAll{$ENDIF}
;
{$ifdef windows}
{$i winlazutf8.inc}
{$else}
{$i unixlazutf8.inc}
{$endif}
var
FNeedRTLAnsi: boolean = false;
FNeedRTLAnsiValid: boolean = false;
@ -201,6 +215,24 @@ begin
Result:=s;
end;
function GetEnvironmentStringUTF8(Index: Integer): String;
begin
// on Windows SysUtils.GetEnvironmentString returns OEM encoded string
// so ConsoleToUTF8 function should be used!
// RTL issue: http://bugs.freepascal.org/view.php?id=15233
Result:=ConsoleToUTF8(SysUtils.GetEnvironmentString(Index));
end;
function GetEnvironmentVariableUTF8(const EnvVar: String): String;
begin
// on Windows SysUtils.GetEnvironmentString returns OEM encoded string
// so ConsoleToUTF8 function should be used!
// RTL issue: http://bugs.freepascal.org/view.php?id=15233
Result:=ConsoleToUTF8(SysUtils.GetEnvironmentVariable(UTF8ToSys(EnvVar)));
end;
function UTF8CharacterLength(p: PChar): integer;
begin
if p<>nil then begin
@ -3075,7 +3107,7 @@ begin
if Length(Lang) > 2 then Lang := Lang[1] + Lang[2];
end;
procedure InternalInit;
procedure InitFPUpchars;
var
c: Char;
begin
@ -3085,7 +3117,10 @@ begin
end;
initialization
InternalInit;
begin
InitFPUpchars;
InitLazUtf8;
end;
end.

View File

@ -19,7 +19,7 @@
<Description Value="Useful units for Lazarus packages."/>
<License Value="Modified LGPL-2"/>
<Version Major="1"/>
<Files Count="70">
<Files Count="72">
<Item1>
<Filename Value="laz2_dom.pas"/>
<UnitName Value="laz2_DOM"/>
@ -299,8 +299,16 @@
</Item69>
<Item70>
<Filename Value="dictionarystringlist.pas"/>
<UnitName Value="dictionarystringlist"/>
<UnitName Value="DictionaryStringList"/>
</Item70>
<Item71>
<Filename Value="unixlazutf8.inc"/>
<Type Value="Include"/>
</Item71>
<Item72>
<Filename Value="winlazutf8.inc"/>
<Type Value="Include"/>
</Item72>
</Files>
<LazDoc Paths="../../docs/xml/lazutils"/>
<i18n>

View File

@ -146,30 +146,9 @@ begin
end;
function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
begin
Result := SysToUTF8(S);
end;
function UTF8ToConsole(const s: string): string;
begin
Result := UTF8ToSys(s);
end;
function ExtractShortPathNameUTF8(const FileName: String): String;
begin
Result:=SysToUTF8(SysUtils.ExtractShortPathName(UTF8ToSys(FileName)));
end;
function ParamStrUTF8(Param: Integer): string;
begin
Result:=SysToUTF8(ObjPas.ParamStr(Param));
end;
procedure InitFileUtil;
begin
//dummy procedure
end;

View File

@ -0,0 +1,23 @@
{%MainUnit lazutf8.pas}
function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
begin
Result := SysToUTF8(S);
end;
function UTF8ToConsole(const s: string): string;
begin
Result := UTF8ToSys(s);
end;
function ParamStrUTF8(Param: Integer): string;
begin
Result:=SysToUTF8(ObjPas.ParamStr(Param));
end;
procedure InitLazUtf8;
begin
//dummy procedure
end;

View File

@ -1,200 +1,5 @@
{%MainUnit fileutil.pas}
var
//Function prototypes
_ParamStrUtf8: Function(Param: Integer): string;
var
ArgsW: Array of WideString;
ArgsWCount: Integer;
//************ START "Stubs" that just call Ansi or Wide implementation
function ParamStrUTF8(Param: Integer): string;
begin
Result := _ParamStrUtf8(Param);
end;
//************ END "Stubs" that just call Ansi or Wide implementation
//*************** START Non WideString implementations
{$ifndef wince}
function ParamStrUtf8Ansi(Param: Integer): String;
begin
Result:=SysToUTF8(ObjPas.ParamStr(Param));
end;
{$endif wince}
//*************** END Non WideString impementations
//*************** START WideString impementations
procedure SetupCommandlineParametersWide;
var
ArgLen, Start, CmdLen, i, j: SizeInt;
argstart,
Quote : Boolean;
Buf: array[0..259] of WChar; // need MAX_PATH bytes, not 256!
PCmdLineW: PWChar;
CmdLineW: WideString;
procedure AllocArg(Idx, Len:longint);
begin
if (Idx >= ArgsWCount) then
begin
SetLength(ArgsW, Idx + 1);
SetLength(ArgsW[Idx], Len);
end;
end;
begin
{ create commandline, it starts with the executed filename which is argv[0] }
{ Win32 passes the command NOT via the args, but via getmodulefilename}
ArgsWCount := 0;
ArgLen := GetModuleFileNameW(0, @buf[0], sizeof(buf));
//writeln('ArgLen = ',Arglen);
buf[ArgLen] := #0; // be safe, no terminating 0 on XP
allocarg(0,arglen);
move(buf[0],ArgsW[0][1],arglen * SizeOf(WChar));
//writeln('ArgsW[0] = ',ArgsW[0]);
PCmdLineW := nil;
{ Setup cmdline variable }
PCmdLineW := GetCommandLineW;
CmdLen := StrLen(PCmdLineW);
//writeln('StrLen(PCmdLineW) = ',CmdLen);
SetLength(CmdLineW, CmdLen);
Move(PCmdLineW^, CmdLineW[1], CmdLen * SizeOf(WChar));
//debugln(CmdLineW);
//for i := 1 to CmdLen do DbgOut(DbgS(i mod 10)); debugln;
i := 1;
while (i <= CmdLen) do
begin
//debugln('Next');
//DbgOut('i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
//skip leading spaces
while (i <= CmdLen) and (CmdLineW[i] <= #32) do Inc(i);
//DbgOut('After skipping spaces: i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
if (i > CmdLen) then Break;
Quote := False;
Start := i;
ArgLen := 0;
while (i <= CmdLen) do
begin //find next commandline parameter
case CmdLineW[i] of
#1..#32:
begin
if Quote then
begin
//debugln('i=',DbgS(i),': Space in Quote');
Inc(ArgLen)
end
else
begin
//debugln('i=',DbgS(i),': Space in NOT Quote');
Break;
end;
end;
'"':
begin
if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
begin
//debugln('i=',DbgS(i),': Quote := not Quote');
Quote := not Quote
end
else
begin
//debugln('i=',DbgS(i),': Skip Quote');
Inc(i);
end;
end;
else Inc(ArgLen);
end;//case
Inc(i);
end; //find next commandline parameter
//debugln('ArgWCount=',DbgS(ArgsWCount),' Start=',DbgS(start),' ArgLen=',DbgS(arglen),' i=',DbgS(i));
//we already have (a better) ArgW[0]
if (ArgsWCount > 0) then
begin //Process commadline parameter
AllocArg(ArgsWCount, ArgLen);
Quote := False;
i := Start;
j := 1;
while (i <= CmdLen) do
begin
case CmdLineW[i] of
#1..#32:
begin
if Quote then
begin
//if j > ArgLen then debugln('Error whitespace: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
ArgsW[ArgsWCount][j] := CmdLineW[i];
Inc(j);
end
else
Break;
end;
'"':
begin
if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
Quote := not Quote
else
Inc(i);
end;
else
begin
//if j > ArgLen then debugln('Error Quote: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
ArgsW[ArgsWCount][j] := CmdLineW[i];
Inc(j);
end;
end;
Inc(i);
end;
//debugln('ArgsW[',DbgS(ArgsWCount),'] = ',ArgsW[ArgsWCount]);
end; // Process commandline parameter
Inc(ArgsWCount);
end;
Dec(ArgsWCount);
end;
function ParamStrUtf8Wide(Param: Integer): String;
begin
if ArgsWCount <> ParamCount then
begin
//DebugLn('Error: ParamCount <> ArgsWCount!');
Result := SysToUtf8(ObjPas.ParamStr(Param));
end
else
begin
if (Param <= ArgsWCount) then
Result := Utf8Encode(ArgsW[Param])
else
Result := '';
end;
end;
//*************** END WideString impementations
{------------------------------------------------------------------------------
GetFileDescription
@ -221,37 +26,6 @@ begin
Result:=Filename;
end;
function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
var
Dst: PChar;
begin
{$ifdef WinCE}
Result := SysToUTF8(s);
{$else}
Dst := AllocMem((Length(s) + 1) * SizeOf(Char));
if OemToChar(PChar(s), Dst) then
Result := StrPas(Dst)
else
Result := s;
FreeMem(Dst);
Result := SysToUTF8(Result);
{$endif}
end;
function UTF8ToConsole(const s: string): string;
var
Dst: PChar;
begin
{$ifdef WinCE}
Result := UTF8ToSys(s);
{$else}
Result := UTF8ToSys(s);
Dst := AllocMem((Length(Result) + 1) * SizeOf(Char));
if CharToOEM(PChar(Result), Dst) then
Result := StrPas(Dst);
FreeMem(Dst);
{$endif}
end;
function ExtractShortPathNameUTF8(const FileName: String): String;
@ -277,22 +51,4 @@ begin
end;
procedure InitFileUtil;
begin
{$ifndef WinCE}
if Win32MajorVersion <= 4 then
begin
_ParamStrUtf8 := @ParamStrUtf8Ansi;
end
else
{$endif}
begin
try
ArgsWCount := -1;
_ParamStrUtf8 := @ParamStrUtf8Wide;
SetupCommandlineParametersWide;
Except
ArgsWCount := -1;
end;
end;
end;

View File

@ -0,0 +1,272 @@
{%MainUnit lazutf8.pas}
var
//Function prototypes
_ParamStrUtf8: Function(Param: Integer): string;
var
ArgsW: Array of WideString;
ArgsWCount: Integer;
//************ START "Stubs" that just call Ansi or Wide implementation
function ParamStrUTF8(Param: Integer): string;
begin
Result := _ParamStrUtf8(Param);
end;
//************ END "Stubs" that just call Ansi or Wide implementation
//*************** START Non WideString implementations
{$ifndef wince}
function ParamStrUtf8Ansi(Param: Integer): String;
begin
Result:=SysToUTF8(ObjPas.ParamStr(Param));
end;
{$endif wince}
//*************** END Non WideString impementations
//*************** START WideString impementations
procedure SetupCommandlineParametersWide;
var
ArgLen, Start, CmdLen, i, j: SizeInt;
argstart,
Quote : Boolean;
Buf: array[0..259] of WChar; // need MAX_PATH bytes, not 256!
PCmdLineW: PWChar;
CmdLineW: WideString;
procedure AllocArg(Idx, Len:longint);
begin
if (Idx >= ArgsWCount) then
begin
SetLength(ArgsW, Idx + 1);
SetLength(ArgsW[Idx], Len);
end;
end;
begin
{ create commandline, it starts with the executed filename which is argv[0] }
{ Win32 passes the command NOT via the args, but via getmodulefilename}
ArgsWCount := 0;
ArgLen := GetModuleFileNameW(0, @buf[0], sizeof(buf));
//writeln('ArgLen = ',Arglen);
buf[ArgLen] := #0; // be safe, no terminating 0 on XP
allocarg(0,arglen);
move(buf[0],ArgsW[0][1],arglen * SizeOf(WChar));
//writeln('ArgsW[0] = ',ArgsW[0]);
PCmdLineW := nil;
{ Setup cmdline variable }
PCmdLineW := GetCommandLineW;
CmdLen := StrLen(PCmdLineW);
//writeln('StrLen(PCmdLineW) = ',CmdLen);
SetLength(CmdLineW, CmdLen);
Move(PCmdLineW^, CmdLineW[1], CmdLen * SizeOf(WChar));
//debugln(CmdLineW);
//for i := 1 to CmdLen do DbgOut(DbgS(i mod 10)); debugln;
i := 1;
while (i <= CmdLen) do
begin
//debugln('Next');
//DbgOut('i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
//skip leading spaces
while (i <= CmdLen) and (CmdLineW[i] <= #32) do Inc(i);
//DbgOut('After skipping spaces: i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0');
if (i > CmdLen) then Break;
Quote := False;
Start := i;
ArgLen := 0;
while (i <= CmdLen) do
begin //find next commandline parameter
case CmdLineW[i] of
#1..#32:
begin
if Quote then
begin
//debugln('i=',DbgS(i),': Space in Quote');
Inc(ArgLen)
end
else
begin
//debugln('i=',DbgS(i),': Space in NOT Quote');
Break;
end;
end;
'"':
begin
if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
begin
//debugln('i=',DbgS(i),': Quote := not Quote');
Quote := not Quote
end
else
begin
//debugln('i=',DbgS(i),': Skip Quote');
Inc(i);
end;
end;
else Inc(ArgLen);
end;//case
Inc(i);
end; //find next commandline parameter
//debugln('ArgWCount=',DbgS(ArgsWCount),' Start=',DbgS(start),' ArgLen=',DbgS(arglen),' i=',DbgS(i));
//we already have (a better) ArgW[0]
if (ArgsWCount > 0) then
begin //Process commadline parameter
AllocArg(ArgsWCount, ArgLen);
Quote := False;
i := Start;
j := 1;
while (i <= CmdLen) do
begin
case CmdLineW[i] of
#1..#32:
begin
if Quote then
begin
//if j > ArgLen then debugln('Error whitespace: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
ArgsW[ArgsWCount][j] := CmdLineW[i];
Inc(j);
end
else
Break;
end;
'"':
begin
if (i < CmdLen) and (CmdLineW[i+1] <> '"') then
Quote := not Quote
else
Inc(i);
end;
else
begin
//if j > ArgLen then debugln('Error Quote: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen));
ArgsW[ArgsWCount][j] := CmdLineW[i];
Inc(j);
end;
end;
Inc(i);
end;
//debugln('ArgsW[',DbgS(ArgsWCount),'] = ',ArgsW[ArgsWCount]);
end; // Process commandline parameter
Inc(ArgsWCount);
end;
Dec(ArgsWCount);
end;
function ParamStrUtf8Wide(Param: Integer): String;
begin
if ArgsWCount <> ParamCount then
begin
//DebugLn('Error: ParamCount <> ArgsWCount!');
Result := SysToUtf8(ObjPas.ParamStr(Param));
end
else
begin
if (Param <= ArgsWCount) then
Result := Utf8Encode(ArgsW[Param])
else
Result := '';
end;
end;
//*************** END WideString impementations
function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn)
var
Dst: PChar;
begin
{$ifdef WinCE}
Result := SysToUTF8(s);
{$else}
Dst := AllocMem((Length(s) + 1) * SizeOf(Char));
if OemToChar(PChar(s), Dst) then
Result := StrPas(Dst)
else
Result := s;
FreeMem(Dst);
Result := SysToUTF8(Result);
{$endif}
end;
function UTF8ToConsole(const s: string): string;
var
Dst: PChar;
begin
{$ifdef WinCE}
Result := UTF8ToSys(s);
{$else}
Result := UTF8ToSys(s);
Dst := AllocMem((Length(Result) + 1) * SizeOf(Char));
if CharToOEM(PChar(Result), Dst) then
Result := StrPas(Dst);
FreeMem(Dst);
{$endif}
end;
{$ifdef debugparamstrutf8}
procedure ParamStrUtf8Error;
var
i: Integer;
begin
writeln('Error in Windows WideString implementation of ParamStrUtf8');
writeln('Using SysToUtf8(ParamsStr(Param)) as fallback');
writeln('ParamCount = ',ParamCount,', ArgsWCount = ',ArgsWCount);
for i := 0 to ParamCount do writeln('ParamStr(',i,') = "',ParamStr(i),'"');
writeln;
for i := 0 to ArgsWCount do writeln('ParamStrUtf8(',i,') = "',ArgsW[i],'"');
end;
{$endif}
procedure InitLazUtf8;
begin
{$ifndef WinCE}
if Win32MajorVersion <= 4 then
begin
_ParamStrUtf8 := @ParamStrUtf8Ansi;
end
else
{$endif}
begin
try
ArgsWCount := -1;
_ParamStrUtf8 := @ParamStrUtf8Wide;
SetupCommandlineParametersWide;
{$ifdef debugparamstrutf8}
if ParamCount <> ArgsWCount then ParamStrUtf8Error;
{$endif}
Except
begin
ArgsWCount := -1;
{$ifdef debugparamstrutf8}
ParamStrUtf8Error;
{$endif}
end;
end;
end;
end;