+ several unicode (to/from utf-8 conversion) stuff added

* some longint -> SizeInt changes
This commit is contained in:
florian 2004-07-18 20:21:44 +00:00
parent 4d28b94218
commit 7a7c34e1b7
5 changed files with 291 additions and 56 deletions

View File

@ -35,7 +35,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_MOVE} {$ifndef FPC_SYSTEM_HAS_MOVE}
{$define FPC_SYSTEM_HAS_MOVE} {$define FPC_SYSTEM_HAS_MOVE}
procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler; procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
var var
saveesi,saveedi : longint; saveesi,saveedi : longint;
asm asm
@ -119,7 +119,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLCHAR} {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
{$define FPC_SYSTEM_HAS_FILLCHAR} {$define FPC_SYSTEM_HAS_FILLCHAR}
Procedure FillChar(var x;count:longint;value:byte);assembler; Procedure FillChar(var x;count:SizeInt;value:byte);assembler;
asm asm
{A push is prefered over a local variable because a local {A push is prefered over a local variable because a local
@ -160,7 +160,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLWORD} {$ifndef FPC_SYSTEM_HAS_FILLWORD}
{$define FPC_SYSTEM_HAS_FILLWORD} {$define FPC_SYSTEM_HAS_FILLWORD}
procedure fillword(var x;count : longint;value : word);assembler; procedure fillword(var x;count : SizeInt;value : word);assembler;
var var
saveedi : longint; saveedi : longint;
asm asm
@ -197,7 +197,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLDWORD} {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
{$define FPC_SYSTEM_HAS_FILLDWORD} {$define FPC_SYSTEM_HAS_FILLDWORD}
procedure filldword(var x;count : longint;value : dword);assembler; procedure filldword(var x;count : SizeInt;value : dword);assembler;
var var
saveedi : longint; saveedi : longint;
asm asm
@ -225,7 +225,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE} {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
{$define FPC_SYSTEM_HAS_INDEXBYTE} {$define FPC_SYSTEM_HAS_INDEXBYTE}
function IndexByte(Const buf;len:longint;b:byte):longint; assembler; function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler;
var var
saveedi,saveebx : longint; saveedi,saveebx : longint;
asm asm
@ -258,7 +258,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXWORD} {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
{$define FPC_SYSTEM_HAS_INDEXWORD} {$define FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(Const buf;len:longint;b:word):longint; assembler; function Indexword(Const buf;len:SizeInt;b:word):SizeInt; assembler;
var var
saveedi,saveebx : longint; saveedi,saveebx : longint;
asm asm
@ -291,7 +291,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXDWORD} {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
{$define FPC_SYSTEM_HAS_INDEXDWORD} {$define FPC_SYSTEM_HAS_INDEXDWORD}
function IndexDWord(Const buf;len:longint;b:DWord):longint; assembler; function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt; assembler;
var var
saveedi,saveebx : longint; saveedi,saveebx : longint;
asm asm
@ -330,7 +330,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREBYTE} {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
{$define FPC_SYSTEM_HAS_COMPAREBYTE} {$define FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(Const buf1,buf2;len:longint):longint; assembler; function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var var
saveesi,saveedi : longint; saveesi,saveedi : longint;
asm asm
@ -387,7 +387,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREWORD} {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
{$define FPC_SYSTEM_HAS_COMPAREWORD} {$define FPC_SYSTEM_HAS_COMPAREWORD}
function CompareWord(Const buf1,buf2;len:longint):longint; assembler; function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var var
saveesi,saveedi,saveebx : longint; saveesi,saveedi,saveebx : longint;
asm asm
@ -455,7 +455,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD} {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_COMPAREDWORD} {$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:longint):longint; assembler; function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var var
saveesi,saveedi,saveebx : longint; saveesi,saveedi,saveebx : longint;
asm asm
@ -521,7 +521,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR0} {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
{$define FPC_SYSTEM_HAS_INDEXCHAR0} {$define FPC_SYSTEM_HAS_INDEXCHAR0}
function IndexChar0(Const buf;len:longint;b:Char):longint; assembler; function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt; assembler;
var var
saveesi,saveebx : longint; saveesi,saveebx : longint;
asm asm
@ -1450,7 +1450,11 @@ end;
{ {
$Log$ $Log$
Revision 1.63 2004-07-18 16:40:08 jonas Revision 1.64 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.63 2004/07/18 16:40:08 jonas
* fixed indexbyte/word/dword when length is 0 (return -1 instead of 0) * fixed indexbyte/word/dword when length is 0 (return -1 instead of 0)
Revision 1.62 2004/07/07 17:38:58 daniel Revision 1.62 2004/07/07 17:38:58 daniel
@ -1606,4 +1610,4 @@ end;
Revision 1.20 2002/03/30 14:52:04 carl Revision 1.20 2002/03/30 14:52:04 carl
* cause runtime error 203 on failed class creation * cause runtime error 203 on failed class creation
} }

View File

@ -23,7 +23,7 @@ type
pstring = ^shortstring; pstring = ^shortstring;
{$ifndef FPC_SYSTEM_HAS_MOVE} {$ifndef FPC_SYSTEM_HAS_MOVE}
procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE']; procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];
type type
bytearray = array [0..maxlongint-1] of byte; bytearray = array [0..maxlongint-1] of byte;
var var
@ -46,7 +46,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLCHAR} {$ifndef FPC_SYSTEM_HAS_FILLCHAR}
Procedure FillChar(var x;count:longint;value:byte); Procedure FillChar(var x;count:SizeInt;value:byte);
type type
longintarray = array [0..maxlongint div 4] of longint; longintarray = array [0..maxlongint div 4] of longint;
bytearray = array [0..maxlongint-1] of byte; bytearray = array [0..maxlongint-1] of byte;
@ -75,7 +75,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLBYTE} {$ifndef FPC_SYSTEM_HAS_FILLBYTE}
procedure FillByte (var x;count : longint;value : byte ); procedure FillByte (var x;count : SizeInt;value : byte );
begin begin
FillChar (X,Count,CHR(VALUE)); FillChar (X,Count,CHR(VALUE));
end; end;
@ -83,7 +83,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLWORD} {$ifndef FPC_SYSTEM_HAS_FILLWORD}
procedure fillword(var x;count : longint;value : word); procedure fillword(var x;count : SizeInt;value : word);
type type
longintarray = array [0..maxlongint div 4] of longint; longintarray = array [0..maxlongint div 4] of longint;
wordarray = array [0..maxlongint div 2] of word; wordarray = array [0..maxlongint div 2] of word;
@ -110,7 +110,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLDWORD} {$ifndef FPC_SYSTEM_HAS_FILLDWORD}
procedure FillDWord(var x;count : longint;value : DWord); procedure FillDWord(var x;count : SizeInt;value : DWord);
type type
longintarray = array [0..maxlongint div 4] of longint; longintarray = array [0..maxlongint div 4] of longint;
begin begin
@ -126,7 +126,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR} {$ifndef FPC_SYSTEM_HAS_INDEXCHAR}
function IndexChar(Const buf;len:longint;b:char):longint; function IndexChar(Const buf;len:SizeInt;b:char):SizeInt;
begin begin
IndexChar:=IndexByte(Buf,Len,byte(B)); IndexChar:=IndexByte(Buf,Len,byte(B));
end; end;
@ -134,7 +134,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE} {$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
function IndexByte(Const buf;len:longint;b:byte):longint; function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt;
type type
bytearray = array [0..maxlongint-1] of byte; bytearray = array [0..maxlongint-1] of byte;
var var
@ -155,7 +155,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXWORD} {$ifndef FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(Const buf;len:longint;b:word):longint; function Indexword(Const buf;len:SizeInt;b:word):SizeInt;
type type
wordarray = array [0..maxlongint div 2] of word; wordarray = array [0..maxlongint div 2] of word;
var var
@ -174,7 +174,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXDWORD} {$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
function IndexDWord(Const buf;len:longint;b:DWord):longint; function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt;
type type
dwordarray = array [0..maxlongint div 4] of dword; dwordarray = array [0..maxlongint div 4] of dword;
var var
@ -193,7 +193,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPARECHAR} {$ifndef FPC_SYSTEM_HAS_COMPARECHAR}
function CompareChar(Const buf1,buf2;len:longint):longint; function CompareChar(Const buf1,buf2;len:SizeInt):SizeInt;
begin begin
CompareChar:=CompareByte(buf1,buf2,len); CompareChar:=CompareByte(buf1,buf2,len);
end; end;
@ -201,7 +201,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREBYTE} {$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(Const buf1,buf2;len:longint):longint; function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt;
type type
bytearray = array [0..maxlongint-1] of byte; bytearray = array [0..maxlongint-1] of byte;
var var
@ -230,7 +230,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREWORD} {$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
function CompareWord(Const buf1,buf2;len:longint):longint; function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt;
type type
wordarray = array [0..maxlongint div 2] of word; wordarray = array [0..maxlongint div 2] of word;
var var
@ -259,7 +259,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD} {$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:longint):longint; function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt;
type type
longintarray = array [0..maxlongint div 4] of longint; longintarray = array [0..maxlongint div 4] of longint;
var var
@ -288,7 +288,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_MOVECHAR0} {$ifndef FPC_SYSTEM_HAS_MOVECHAR0}
procedure MoveChar0(Const buf1;var buf2;len:longint); procedure MoveChar0(Const buf1;var buf2;len:SizeInt);
var var
I : longint; I : longint;
begin begin
@ -303,7 +303,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR0} {$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
function IndexChar0(Const buf;len:longint;b:Char):longint; function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt;
var var
I : longint; I : longint;
begin begin
@ -319,7 +319,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPARECHAR0} {$ifndef FPC_SYSTEM_HAS_COMPARECHAR0}
function CompareChar0(Const buf1,buf2;len:longint):longint; function CompareChar0(Const buf1,buf2;len:SizeInt):SizeInt;
type type
bytearray = array [0..maxlongint-1] of byte; bytearray = array [0..maxlongint-1] of byte;
var var
@ -1225,7 +1225,11 @@ end;
{ {
$Log$ $Log$
Revision 1.79 2004-05-31 20:25:04 peter Revision 1.80 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.79 2004/05/31 20:25:04 peter
* removed warnings * removed warnings
Revision 1.78 2004/05/02 15:15:45 peter Revision 1.78 2004/05/02 15:15:45 peter
@ -1423,4 +1427,4 @@ end;
instead of direct comparisons of low/high values of orddefs because instead of direct comparisons of low/high values of orddefs because
qword is a special case qword is a special case
} }

View File

@ -150,12 +150,12 @@ Procedure Rewrite(var f : TypedFile); [INTERNPROC: In_Rewrite_TypedFile];
{$define SYSPROCDEFINED} {$define SYSPROCDEFINED}
{$endif cpuarm} {$endif cpuarm}
procedure fillchar(var x;count : longint;value : boolean);{$ifdef SYSTEMINLINE}inline;{$endif} procedure fillchar(var x;count : SizeInt;value : boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
begin begin
fillchar(x,count,byte(value)); fillchar(x,count,byte(value));
end; end;
procedure fillchar(var x;count : longint;value : char);{$ifdef SYSTEMINLINE}inline;{$endif} procedure fillchar(var x;count : SizeInt;value : char);{$ifdef SYSTEMINLINE}inline;{$endif}
begin begin
fillchar(x,count,byte(value)); fillchar(x,count,byte(value));
end; end;
@ -249,7 +249,7 @@ operator ** (bas,expo : int64) i: int64;
begin begin
i:=power(bas,expo); i:=power(bas,expo);
end; end;
{**************************************************************************** {****************************************************************************
Subroutines for String handling Subroutines for String handling
@ -886,7 +886,11 @@ end;
{ {
$Log$ $Log$
Revision 1.59 2004-05-27 23:34:23 peter Revision 1.60 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.59 2004/05/27 23:34:23 peter
* stop backtrace also when caller frame is nil * stop backtrace also when caller frame is nil
Revision 1.58 2004/05/16 18:51:20 peter Revision 1.58 2004/05/16 18:51:20 peter

View File

@ -369,24 +369,24 @@ Var
Processor specific routines Processor specific routines
****************************************************************************} ****************************************************************************}
Procedure Move(const source;var dest;count:Longint); Procedure Move(const source;var dest;count:SizeInt);
Procedure FillChar(Var x;count:Longint;Value:Boolean);{$ifdef SYSTEMINLINE}inline;{$endif} Procedure FillChar(Var x;count:SizeInt;Value:Boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:Longint;Value:Char);{$ifdef SYSTEMINLINE}inline;{$endif} Procedure FillChar(Var x;count:SizeInt;Value:Char);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:Longint;Value:Byte); Procedure FillChar(Var x;count:SizeInt;Value:Byte);
procedure FillByte(var x;count:longint;value:byte); procedure FillByte(var x;count:SizeInt;value:byte);
Procedure FillWord(Var x;count:Longint;Value:Word); Procedure FillWord(Var x;count:SizeInt;Value:Word);
procedure FillDWord(var x;count:longint;value:DWord); procedure FillDWord(var x;count:SizeInt;value:DWord);
function IndexChar(const buf;len:longint;b:char):longint; function IndexChar(const buf;len:SizeInt;b:char):SizeInt;
function IndexByte(const buf;len:longint;b:byte):longint; function IndexByte(const buf;len:SizeInt;b:byte):SizeInt;
function Indexword(const buf;len:longint;b:word):longint; function Indexword(const buf;len:SizeInt;b:word):SizeInt;
function IndexDWord(const buf;len:longint;b:DWord):longint; function IndexDWord(const buf;len:SizeInt;b:DWord):SizeInt;
function CompareChar(const buf1,buf2;len:longint):longint; function CompareChar(const buf1,buf2;len:SizeInt):SizeInt;
function CompareByte(const buf1,buf2;len:longint):longint; function CompareByte(const buf1,buf2;len:SizeInt):SizeInt;
function CompareWord(const buf1,buf2;len:longint):longint; function CompareWord(const buf1,buf2;len:SizeInt):SizeInt;
function CompareDWord(const buf1,buf2;len:longint):longint; function CompareDWord(const buf1,buf2;len:SizeInt):SizeInt;
procedure MoveChar0(const buf1;var buf2;len:longint); procedure MoveChar0(const buf1;var buf2;len:SizeInt);
function IndexChar0(const buf;len:longint;b:char):longint; function IndexChar0(const buf;len:SizeInt;b:char):SizeInt;
function CompareChar0(const buf1,buf2;len:longint):longint; function CompareChar0(const buf1,buf2;len:SizeInt):SizeInt;
procedure prefetch(const mem); procedure prefetch(const mem);
@ -568,6 +568,16 @@ Type
} }
end; end;
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
function UTF8Encode(const s : WideString) : UTF8String;
function UTF8Decode(const s : UTF8String): WideString;
function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure GetWideStringManager (Var Manager : TWideStringManager); Procedure GetWideStringManager (Var Manager : TWideStringManager);
Procedure SetWideStringManager (Const New : TWideStringManager); Procedure SetWideStringManager (Const New : TWideStringManager);
Procedure SetWideStringManager (Const New : TWideStringManager; Var Old: TWideStringManager); Procedure SetWideStringManager (Const New : TWideStringManager; Var Old: TWideStringManager);
@ -744,7 +754,11 @@ const
{ {
$Log$ $Log$
Revision 1.97 2004-07-07 15:15:40 daniel Revision 1.98 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.97 2004/07/07 15:15:40 daniel
* Add inline directive to sysresetfpu * Add inline directive to sysresetfpu
Revision 1.96 2004/07/03 21:50:31 daniel Revision 1.96 2004/07/03 21:50:31 daniel

View File

@ -4,7 +4,7 @@
Copyright (c) 1999-2001 by Florian Klaempfl, Copyright (c) 1999-2001 by Florian Klaempfl,
member of the Free Pascal development team. member of the Free Pascal development team.
This file implements support routines for WideStrings with FPC This file implements support routines for WideStrings/Unicode with FPC
See the file COPYING.FPC, included in this distribution, See the file COPYING.FPC, included in this distribution,
for details about the copyright. for details about the copyright.
@ -1030,9 +1030,218 @@ end;
{$endif CPU64} {$endif CPU64}
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
else
Result:=0;
end;
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
begin
result:=0;
if source=nil then
exit;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (i<SourceChars) and (j<MaxDestBytes) do
begin
w:=word(Source[i]);
case w of
0..$7f:
begin
Dest[j]:=char(w);
inc(j);
end;
$80..$7ff:
begin
if j+1>=MaxDestBytes then
break;
Dest[j]:=char($c0 or (w shr 6));
Dest[j+1]:=char($80 or (w and $3f));
inc(j,2);
end;
else
begin
if j+2>=MaxDestBytes then
break;
Dest[j]:=char($e0 or (w shr 12));
Dest[j+1]:=char($80 or ((w shr 6)and $3f));
Dest[j+2]:=char($80 or (w and $3f));
inc(j,3);
end;
end;
inc(i);
end;
if j>MaxDestBytes-1 then
j:=MaxDestBytes-1;
Dest[j]:=#0;
end
else
begin
while i<SourceChars do
begin
case word(Source[i]) of
$0..$7f:
inc(j);
$80..$7ff:
inc(j,2);
else
inc(j,3);
end;
end;
end;
result:=j+1;
end;
function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
else
Result:=0;
end;
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
b : byte;
begin
result:=0;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (j<MaxDestChars) and (i<SourceBytes) do
begin
b:=byte(Source[i]);
inc(i);
// 2 or 3 bytes?
if w>=$80 then
begin
w:=b and $3c;
if i>=SourceBytes then
exit;
// 3 bytes?
if (b and $20)<>0 then
begin
b:=byte(Source[i]);
inc(i);
if i>=SourceBytes then
exit;
if (b and $c0)<>$80 then
exit;
w:=(w shl 6) or (b and $3c);
end;
b:=byte(Source[i]);
w:=(w shl 6) or (b and $3c);
if (b and $c0)<>$80 then
exit;
inc(i);
end;
Dest[j]:=WideChar(w);
inc(j);
end;
end
else
begin
while i<SourceBytes do
begin
b:=byte(Source[i]);
inc(i);
// 2 or 3 bytes?
if b>=$80 then
begin
if i>=SourceBytes then
exit;
// 3 bytes?
if (b and $20)<>0 then
begin
b:=byte(Source[i]);
inc(i);
if i>=SourceBytes then
exit;
if (b and $c0)<>$80 then
exit;
end;
if (byte(Source[i]) and $c0)<>$80 then
exit;
inc(i);
end;
inc(j);
end;
end;
result:=j+1;
end;
function UTF8Encode(const s : WideString) : UTF8String;
var
i : SizeInt;
hs : UTF8String;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s)*3);
i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function UTF8Decode(const s : UTF8String): WideString;
var
i : SizeInt;
hs : WideString;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s));
i:=Utf8ToUnicode(PWideChar(hs),length(hs)+1,pchar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Encode(s);
end;
function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Decode(s);
end;
{ {
$Log$ $Log$
Revision 1.41 2004-07-12 17:58:19 peter Revision 1.42 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.41 2004/07/12 17:58:19 peter
* remove maxlen field from ansistring/widestrings * remove maxlen field from ansistring/widestrings
Revision 1.40 2004/07/02 21:21:09 peter Revision 1.40 2004/07/02 21:21:09 peter