{%MainUnit lazutf8.pas} var //Function prototypes _ParamStrUtf8: Function(Param: Integer): string; var ArgsW: Array of WideString; ArgsWCount: Integer; // length(ArgsW)+1 {$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)} ArgsUTF8: Array of String; // the ArgsW array as UTF8 OldArgV: PPChar = nil; {$IFEND} //************ START "Stubs" that just call Ansi or Wide implementation function ParamStrUTF8(Param: Integer): string; begin Result := _ParamStrUtf8(Param); end; //************ END "Stubs" that just call Ansi or Wide implementation //*************** START Non WideString implementations {$ifndef wince} function ParamStrUtf8Ansi(Param: Integer): String; begin Result:=SysToUTF8(ObjPas.ParamStr(Param)); end; {$endif wince} //*************** END Non WideString impementations //*************** START WideString impementations {$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)} procedure SetupArgvAsUtf8; var i: Integer; begin SetLength(ArgsUTF8,length(ArgsW)); OldArgV:=argv; GetMem(argv,SizeOf(Pointer)*length(ArgsW)); for i:=0 to length(ArgsW)-1 do begin ArgsUTF8[i]:=ArgsW{%H-}[i]; argv[i]:=PChar(ArgsUTF8[i]); end; end; {$endif} procedure SetupCommandlineParametersWide; var ArgLen, Start, CmdLen, i, j: SizeInt; Quote : Boolean; Buf: array[0..259] of WChar; // need MAX_PATH bytes, not 256! PCmdLineW: PWideChar; CmdLineW: WideString; procedure AllocArg(Idx, Len:longint); begin if (Idx >= ArgsWCount) then SetLength(ArgsW, Idx + 1); SetLength(ArgsW[Idx], Len); end; begin { create commandline, it starts with the executed filename which is argv[0] } { Win32 passes the command NOT via the args, but via getmodulefilename} ArgsWCount := 0; ArgLen := GetModuleFileNameW(0, @buf[0], sizeof(buf)); //writeln('ArgLen = ',Arglen); buf[ArgLen] := #0; // be safe, no terminating 0 on XP allocarg(0,arglen); move(buf[0],ArgsW[0][1],arglen * SizeOf(WChar)); //writeln('ArgsW[0] = ',ArgsW[0]); PCmdLineW := nil; { Setup cmdline variable } PCmdLineW := GetCommandLineW; CmdLen := StrLen(PCmdLineW); //writeln('StrLen(PCmdLineW) = ',CmdLen); SetLength(CmdLineW, CmdLen); Move(PCmdLineW^, CmdLineW[1], CmdLen * SizeOf(WChar)); //debugln(CmdLineW); //for i := 1 to CmdLen do DbgOut(DbgS(i mod 10)); debugln; i := 1; while (i <= CmdLen) do begin //debugln('Next'); //DbgOut('i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0'); //skip leading spaces while (i <= CmdLen) and (CmdLineW[i] <= #32) do Inc(i); //DbgOut('After skipping spaces: i=',DbgS(i),' CmdLineW[',DbgS(i),']=');if i<=CmdLen then debugln(CmdLineW[i]) else debugln('#0'); if (i > CmdLen) then Break; Quote := False; Start := i; ArgLen := 0; while (i <= CmdLen) do begin //find next commandline parameter case CmdLineW[i] of #1..#32: begin if Quote then begin //debugln('i=',DbgS(i),': Space in Quote'); Inc(ArgLen) end else begin //debugln('i=',DbgS(i),': Space in NOT Quote'); Break; end; end; '"': begin if (i < CmdLen) and (CmdLineW[i+1] <> '"') then begin //debugln('i=',DbgS(i),': Quote := not Quote'); Quote := not Quote end else begin //debugln('i=',DbgS(i),': Skip Quote'); Inc(i); end; end; else Inc(ArgLen); end;//case Inc(i); end; //find next commandline parameter //debugln('ArgWCount=',DbgS(ArgsWCount),' Start=',DbgS(start),' ArgLen=',DbgS(arglen),' i=',DbgS(i)); //we already have (a better) ArgW[0] if (ArgsWCount > 0) then begin //Process commandline parameter AllocArg(ArgsWCount, ArgLen); Quote := False; i := Start; j := 1; while (i <= CmdLen) do begin case CmdLineW[i] of #1..#32: begin if Quote then begin //if j > ArgLen then debugln('Error whitespace: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen)); ArgsW[ArgsWCount][j] := CmdLineW[i]; Inc(j); end else Break; end; '"': begin if (i < CmdLen) and (CmdLineW[i+1] <> '"') then Quote := not Quote else Inc(i); end; else begin //if j > ArgLen then debugln('Error Quote: j > ArgLen: j=',DbgS(j),' ArgLen=',DbgS(arglen)); ArgsW[ArgsWCount][j] := CmdLineW[i]; Inc(j); end; end; Inc(i); end; //debugln('ArgsW[',DbgS(ArgsWCount),'] = ',ArgsW[ArgsWCount]); end; // Process commandline parameter Inc(ArgsWCount); end; Dec(ArgsWCount); //Note: //On WinCe Argsv is a static function, so we cannot change it. //This might change in the future if Argsv on WinCE will be declared as a function variable {$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)} if DefaultSystemCodePage=CP_UTF8 then SetupArgvAsUtf8; {$IFEND} end; function ParamStrUtf8Wide(Param: Integer): String; begin if ArgsWCount <> ParamCount then begin //DebugLn('Error: ParamCount <> ArgsWCount!'); Result := SysToUtf8(ObjPas.ParamStr(Param)); end else begin if (Param <= ArgsWCount) then {$IFDEF ACP_RTL} Result := String(UnicodeString(ArgsW[Param])) {$ELSE} Result := Utf8Encode(ArgsW[Param]) {$ENDIF ACP_RTL} else Result := ''; end; end; //*************** END WideString impementations function ConsoleToUTF8(const s: string): string;// converts UTF8 string to console encoding (used by Write, WriteLn) {$ifNdef WinCE} var Dst: PChar; {$endif} begin {$ifdef WinCE} Result := SysToUTF8(s); {$else} Dst := AllocMem((Length(s) + 1) * SizeOf(Char)); if OemToChar(PChar(s), Dst) then Result := StrPas(Dst) else Result := s; FreeMem(Dst); Result := SysToUTF8(Result); {$endif} end; function UTF8ToConsole(const s: string): string; {$ifNdef WinCE} var Dst: PChar; {$endif} begin {$ifdef WinCE} Result := UTF8ToSys(s); {$else WinCE} {$ifndef NO_CP_RTL} Result := UTF8ToWinCP(s); {$else NO_CP_RTL} Result := UTF8ToSys(s); // Kept for compatibility {$endif NO_CP_RTL} Dst := AllocMem((Length(Result) + 1) * SizeOf(Char)); if CharToOEM(PChar(Result), Dst) then Result := StrPas(Dst); FreeMem(Dst); {$ifndef NO_CP_RTL} SetCodePage(RawByteString(Result), CP_OEMCP, False); {$endif NO_CP_RTL} {$endif WinCE} end; {$IFDEF MSWindows} // for all Windows supporting 8bit codepages (e.g. not WinCE) function WinCPToUTF8(const s: string): string; // result has codepage CP_ACP var UTF16WordCnt: SizeInt; UTF16Str: UnicodeString; begin Result:=s; if IsASCII(Result) then begin {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} exit; end; UTF16WordCnt:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), nil, 0); // this will null-terminate if UTF16WordCnt>0 then begin setlength(UTF16Str, UTF16WordCnt); MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, Pointer(s), length(s), @UTF16Str[1], UTF16WordCnt); Result:=UTF8Encode(UTF16Str); {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} end; end; function UTF8ToWinCP(const s: string): string; // result has codepage CP_ACP var src: UnicodeString; len: LongInt; begin Result:=s; if IsASCII(Result) then begin {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} exit; end; src:=UTF8Decode(s); if src='' then exit; len:=WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),nil,0,nil,nil); SetLength(Result,len); if len>0 then begin WideCharToMultiByte(CP_ACP,0,PUnicodeChar(src),length(src),@Result[1],length(Result),nil,nil); {$ifdef FPC_HAS_CPSTRING} // prevent codepage conversion magic SetCodePage(RawByteString(Result), CP_ACP, False); {$endif} end; end; {$ENDIF} {$ifdef debugparamstrutf8} procedure ParamStrUtf8Error; var i: Integer; begin writeln('Error in Windows WideString implementation of ParamStrUtf8'); writeln('Using SysToUtf8(ParamsStr(Param)) as fallback'); writeln('ParamCount = ',ParamCount,', ArgsWCount = ',ArgsWCount); for i := 0 to ParamCount do writeln('ParamStr(',i,') = "',ParamStr(i),'"'); writeln; for i := 0 to ArgsWCount do writeln('ParamStrUtf8(',i,') = "',ArgsW[i],'"'); end; {$endif} {$IFDEF UTF8_RTL} function GetLocaleStr(aLocaleID, aLCType: Longint; const Def: string): String; var L: Integer; Buf: array[0..255] of WideChar; begin L := GetLocaleInfoW(aLocaleID, aLCType, Buf, SizeOf(Buf)); if L > 0 then begin Result:=''; widestringmanager.Wide2AnsiMoveProc(PWideChar(@Buf[0]),Result,CP_UTF8,L-1); end else Result := Def; end; function GetLocaleCharUTF8(aLocaleID, aLCType: Longint; Def: Char): Char; var Buf: array[0..3] of WChar; // sdate allows 4 chars. GLI: LongInt; begin //Use Widestring Api so it works on WinCE as well GLI := GetLocaleInfoW(aLocaleID, aLCType, Buf, sizeof(buf)); if (GLI > 0) and (ord(Buf[0])<128) then Result := Buf[0] else begin Result := Def; { case Buf[0] of #$C2: case Buf[1] of #$A0: Result:=' '; // non breakable space #$B7: Result:='.'; // middle stop end; #$CB: if Buf[1]=#$99 then Result:=''''; // dot above, italian handwriting #$D9: case Buf[1] of #$AB: Result:=','; // arabic decimal separator, persian thousand separator #$AC: Result:=''''; // arabic thousand separator end; #$E2: case Buf[1] of #$80: case Buf[2] of #$82, // long space #$83, // long space #$89, // thin space #$AF: // narrow non breakable space Result := ' '; #$94: Result := '-'; // persian decimal mark end; #$8E: if Buf[2]=#$96 then Result := ''''; // codepoint 9110 decimal separator end; end; } if (GLI = 1) then begin case Buf[0] of #$00A0: Result := ' '; // non breakable space #$00B7: Result := '.'; // middle stop #$02D9: Result := ''''; // dot above, italian handwriting #$066B: Result := ','; // arabic decimal separator, persian thousand separator #$066C: Result := ''''; // arabic thousand separator #$2002: Result := ' '; // long space #$2003: Result := ' '; // long space #$2009: Result := ' '; // thin space #$202F: Result := ' '; // narrow non breakable space #$2014: Result := '-'; // persian decimal mark #$2396: Result := ''''; // codepoint 9110 decimal separator { Utf8 Utf16 C2 A0 -> 00A0 C2 B7 -> 00B7 CB 99 -> 02D9 D9 AB -> 066B D9 AC -> 066C E2 80 82 -> 2002 E2 80 83 -> 2003 E2 80 89 -> 2009 E2 80 AF -> 202F E2 80 94 -> 2014 E2 8E 96 -> 2396 } end; //case end; //GLI = 1 end; end; procedure GetFormatSettingsUTF8(LCID: Integer; var aFormatSettings: TFormatSettings); var HF : Shortstring; LID : Windows.LCID; I,Day : longint; begin LID := LCID; with aFormatSettings do begin { Date stuff } for I := 1 to 12 do begin ShortMonthNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVMONTHNAME1+I-1,ShortMonthNames[i]); LongMonthNames[I]:=GetLocaleStr(LID,LOCALE_SMONTHNAME1+I-1,LongMonthNames[i]); end; for I := 1 to 7 do begin Day := (I + 5) mod 7; ShortDayNames[I]:=GetLocaleStr(LID,LOCALE_SABBREVDAYNAME1+Day,ShortDayNames[i]); LongDayNames[I]:=GetLocaleStr(LID,LOCALE_SDAYNAME1+Day,LongDayNames[i]); end; DateSeparator := GetLocaleCharUTF8(LID, LOCALE_SDATE, '/'); ShortDateFormat := GetLocaleStr(LID, LOCALE_SSHORTDATE, 'm/d/yy'); LongDateFormat := GetLocaleStr(LID, LOCALE_SLONGDATE, 'mmmm d, yyyy'); { Time stuff } TimeSeparator := GetLocaleCharUTF8(LID, LOCALE_STIME, ':'); TimeAMString := GetLocaleStr(LID, LOCALE_S1159, 'AM'); TimePMString := GetLocaleStr(LID, LOCALE_S2359, 'PM'); if StrToIntDef(GetLocaleStr(LID, LOCALE_ITLZERO, '0'), 0) = 0 then HF:='h' else HF:='hh'; // No support for 12 hour stuff at the moment... ShortTimeFormat := HF+':nn'; LongTimeFormat := HF + ':nn:ss'; { Currency stuff } CurrencyString:=GetLocaleStr(LID, LOCALE_SCURRENCY, ''); CurrencyFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRENCY, '0'), 0); NegCurrFormat:=StrToIntDef(GetLocaleStr(LID, LOCALE_INEGCURR, '0'), 0); { Number stuff } ThousandSeparator:=GetLocaleCharUTF8(LID, LOCALE_STHOUSAND, ','); DecimalSeparator:=GetLocaleCharUTF8(LID, LOCALE_SDECIMAL, '.'); CurrencyDecimals:=StrToIntDef(GetLocaleStr(LID, LOCALE_ICURRDIGITS, '0'), 0); ListSeparator := GetLocaleCharUTF8(LID, LOCALE_SLIST, ','); end; end; function UTF8StrCompAnsiString(S1, S2: PChar): PtrInt; begin Result:=UTF8CompareStrP(S1,S2); end; function UTF8StrICompAnsiString(S1, S2: PChar): PtrInt; var U1, U2: String; begin U1:=StrPas(S1); U2:=StrPas(S2); Result:=UTF8CompareText(U1,U2); end; function UTF8StrLCompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt; begin Result:=UTF8CompareStr(S1,Count,S2,Count); end; function UTF8StrLICompAnsiString(S1, S2: PChar; Count: PtrUInt): PtrInt; var U1, U2: String; begin if Count>0 then begin SetLength(U1,Count); Move(S1^,PByte(U1)^,Count); SetLength(U2,Count); Move(S2^,PByte(U2)^,Count); Result:=UTF8CompareText(U1,U2); end else Result:=0; end; {$ENDIF} procedure InitLazUtf8; begin {$ifndef WinCE} if Win32MajorVersion <= 4 then begin _ParamStrUtf8 := @ParamStrUtf8Ansi; end else {$endif} begin try ArgsWCount := -1; _ParamStrUtf8 := @ParamStrUtf8Wide; SetupCommandlineParametersWide; {$ifdef debugparamstrutf8} if ParamCount <> ArgsWCount then ParamStrUtf8Error; {$endif} Except begin ArgsWCount := -1; {$ifdef debugparamstrutf8} ParamStrUtf8Error; {$endif} end; end; end; {$IFDEF UTF8_RTL} {$ifndef wince} GetFormatSettingsUTF8(GetThreadLocale,FormatSettings); {$else} GetFormatSettingsUTF8(GetUserDefaultLCID ,FormatSettings); {$endif} widestringmanager.UpperAnsiStringProc:=@UTF8UpperString; widestringmanager.LowerAnsiStringProc:=@UTF8LowerString; widestringmanager.CompareStrAnsiStringProc:=@UTF8CompareStr; widestringmanager.CompareTextAnsiStringProc:=@UTF8CompareText; widestringmanager.StrCompAnsiStringProc:=@UTF8StrCompAnsiString; widestringmanager.StrICompAnsiStringProc:=@UTF8StrICompAnsiString; widestringmanager.StrLCompAnsiStringProc:=@UTF8StrLCompAnsiString; widestringmanager.StrLICompAnsiStringProc:=@UTF8StrLICompAnsiString; // Does anyone need these two? //widestringmanager.StrLowerAnsiStringProc; //widestringmanager.StrUpperAnsiStringProc; {$IFEND} end; procedure FinalizeLazUTF8; {$IFDEF UTF8_RTL} var p: PPChar; {$ENDIF} begin {$IF DEFINED(UTF8_RTL) AND NOT DEFINED(WINCE)} // restore argv and free memory if OldArgV<>nil then begin p:=argv; argv:=OldArgV; Freemem(p); end; {$IFEND} end;