+ several unicode (to/from utf-8 conversion) stuff added

* some longint -> SizeInt changes
This commit is contained in:
florian 2004-07-18 20:21:44 +00:00
parent 4d28b94218
commit 7a7c34e1b7
5 changed files with 291 additions and 56 deletions

View File

@ -35,7 +35,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_MOVE}
{$define FPC_SYSTEM_HAS_MOVE}
procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];assembler;
procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];assembler;
var
saveesi,saveedi : longint;
asm
@ -119,7 +119,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLCHAR}
{$define FPC_SYSTEM_HAS_FILLCHAR}
Procedure FillChar(var x;count:longint;value:byte);assembler;
Procedure FillChar(var x;count:SizeInt;value:byte);assembler;
asm
{A push is prefered over a local variable because a local
@ -160,7 +160,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLWORD}
{$define FPC_SYSTEM_HAS_FILLWORD}
procedure fillword(var x;count : longint;value : word);assembler;
procedure fillword(var x;count : SizeInt;value : word);assembler;
var
saveedi : longint;
asm
@ -197,7 +197,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLDWORD}
{$define FPC_SYSTEM_HAS_FILLDWORD}
procedure filldword(var x;count : longint;value : dword);assembler;
procedure filldword(var x;count : SizeInt;value : dword);assembler;
var
saveedi : longint;
asm
@ -225,7 +225,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
{$define FPC_SYSTEM_HAS_INDEXBYTE}
function IndexByte(Const buf;len:longint;b:byte):longint; assembler;
function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt; assembler;
var
saveedi,saveebx : longint;
asm
@ -258,7 +258,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXWORD}
{$define FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(Const buf;len:longint;b:word):longint; assembler;
function Indexword(Const buf;len:SizeInt;b:word):SizeInt; assembler;
var
saveedi,saveebx : longint;
asm
@ -291,7 +291,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
{$define FPC_SYSTEM_HAS_INDEXDWORD}
function IndexDWord(Const buf;len:longint;b:DWord):longint; assembler;
function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt; assembler;
var
saveedi,saveebx : longint;
asm
@ -330,7 +330,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
{$define FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(Const buf1,buf2;len:longint):longint; assembler;
function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var
saveesi,saveedi : longint;
asm
@ -387,7 +387,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
{$define FPC_SYSTEM_HAS_COMPAREWORD}
function CompareWord(Const buf1,buf2;len:longint):longint; assembler;
function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var
saveesi,saveedi,saveebx : longint;
asm
@ -455,7 +455,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
{$define FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:longint):longint; assembler;
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var
saveesi,saveedi,saveebx : longint;
asm
@ -521,7 +521,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
{$define FPC_SYSTEM_HAS_INDEXCHAR0}
function IndexChar0(Const buf;len:longint;b:Char):longint; assembler;
function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt; assembler;
var
saveesi,saveebx : longint;
asm
@ -1450,7 +1450,11 @@ end;
{
$Log$
Revision 1.63 2004-07-18 16:40:08 jonas
Revision 1.64 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.63 2004/07/18 16:40:08 jonas
* fixed indexbyte/word/dword when length is 0 (return -1 instead of 0)
Revision 1.62 2004/07/07 17:38:58 daniel
@ -1606,4 +1610,4 @@ end;
Revision 1.20 2002/03/30 14:52:04 carl
* cause runtime error 203 on failed class creation
}
}

View File

@ -23,7 +23,7 @@ type
pstring = ^shortstring;
{$ifndef FPC_SYSTEM_HAS_MOVE}
procedure Move(const source;var dest;count:longint);[public, alias: 'FPC_MOVE'];
procedure Move(const source;var dest;count:SizeInt);[public, alias: 'FPC_MOVE'];
type
bytearray = array [0..maxlongint-1] of byte;
var
@ -46,7 +46,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLCHAR}
Procedure FillChar(var x;count:longint;value:byte);
Procedure FillChar(var x;count:SizeInt;value:byte);
type
longintarray = array [0..maxlongint div 4] of longint;
bytearray = array [0..maxlongint-1] of byte;
@ -75,7 +75,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLBYTE}
procedure FillByte (var x;count : longint;value : byte );
procedure FillByte (var x;count : SizeInt;value : byte );
begin
FillChar (X,Count,CHR(VALUE));
end;
@ -83,7 +83,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLWORD}
procedure fillword(var x;count : longint;value : word);
procedure fillword(var x;count : SizeInt;value : word);
type
longintarray = array [0..maxlongint div 4] of longint;
wordarray = array [0..maxlongint div 2] of word;
@ -110,7 +110,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_FILLDWORD}
procedure FillDWord(var x;count : longint;value : DWord);
procedure FillDWord(var x;count : SizeInt;value : DWord);
type
longintarray = array [0..maxlongint div 4] of longint;
begin
@ -126,7 +126,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR}
function IndexChar(Const buf;len:longint;b:char):longint;
function IndexChar(Const buf;len:SizeInt;b:char):SizeInt;
begin
IndexChar:=IndexByte(Buf,Len,byte(B));
end;
@ -134,7 +134,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXBYTE}
function IndexByte(Const buf;len:longint;b:byte):longint;
function IndexByte(Const buf;len:SizeInt;b:byte):SizeInt;
type
bytearray = array [0..maxlongint-1] of byte;
var
@ -155,7 +155,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXWORD}
function Indexword(Const buf;len:longint;b:word):longint;
function Indexword(Const buf;len:SizeInt;b:word):SizeInt;
type
wordarray = array [0..maxlongint div 2] of word;
var
@ -174,7 +174,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXDWORD}
function IndexDWord(Const buf;len:longint;b:DWord):longint;
function IndexDWord(Const buf;len:SizeInt;b:DWord):SizeInt;
type
dwordarray = array [0..maxlongint div 4] of dword;
var
@ -193,7 +193,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPARECHAR}
function CompareChar(Const buf1,buf2;len:longint):longint;
function CompareChar(Const buf1,buf2;len:SizeInt):SizeInt;
begin
CompareChar:=CompareByte(buf1,buf2,len);
end;
@ -201,7 +201,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREBYTE}
function CompareByte(Const buf1,buf2;len:longint):longint;
function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt;
type
bytearray = array [0..maxlongint-1] of byte;
var
@ -230,7 +230,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREWORD}
function CompareWord(Const buf1,buf2;len:longint):longint;
function CompareWord(Const buf1,buf2;len:SizeInt):SizeInt;
type
wordarray = array [0..maxlongint div 2] of word;
var
@ -259,7 +259,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPAREDWORD}
function CompareDWord(Const buf1,buf2;len:longint):longint;
function CompareDWord(Const buf1,buf2;len:SizeInt):SizeInt;
type
longintarray = array [0..maxlongint div 4] of longint;
var
@ -288,7 +288,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_MOVECHAR0}
procedure MoveChar0(Const buf1;var buf2;len:longint);
procedure MoveChar0(Const buf1;var buf2;len:SizeInt);
var
I : longint;
begin
@ -303,7 +303,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_INDEXCHAR0}
function IndexChar0(Const buf;len:longint;b:Char):longint;
function IndexChar0(Const buf;len:SizeInt;b:Char):SizeInt;
var
I : longint;
begin
@ -319,7 +319,7 @@ end;
{$ifndef FPC_SYSTEM_HAS_COMPARECHAR0}
function CompareChar0(Const buf1,buf2;len:longint):longint;
function CompareChar0(Const buf1,buf2;len:SizeInt):SizeInt;
type
bytearray = array [0..maxlongint-1] of byte;
var
@ -1225,7 +1225,11 @@ end;
{
$Log$
Revision 1.79 2004-05-31 20:25:04 peter
Revision 1.80 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.79 2004/05/31 20:25:04 peter
* removed warnings
Revision 1.78 2004/05/02 15:15:45 peter
@ -1423,4 +1427,4 @@ end;
instead of direct comparisons of low/high values of orddefs because
qword is a special case
}
}

View File

@ -150,12 +150,12 @@ Procedure Rewrite(var f : TypedFile); [INTERNPROC: In_Rewrite_TypedFile];
{$define SYSPROCDEFINED}
{$endif cpuarm}
procedure fillchar(var x;count : longint;value : boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
procedure fillchar(var x;count : SizeInt;value : boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
begin
fillchar(x,count,byte(value));
end;
procedure fillchar(var x;count : longint;value : char);{$ifdef SYSTEMINLINE}inline;{$endif}
procedure fillchar(var x;count : SizeInt;value : char);{$ifdef SYSTEMINLINE}inline;{$endif}
begin
fillchar(x,count,byte(value));
end;
@ -249,7 +249,7 @@ operator ** (bas,expo : int64) i: int64;
begin
i:=power(bas,expo);
end;
{****************************************************************************
Subroutines for String handling
@ -886,7 +886,11 @@ end;
{
$Log$
Revision 1.59 2004-05-27 23:34:23 peter
Revision 1.60 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.59 2004/05/27 23:34:23 peter
* stop backtrace also when caller frame is nil
Revision 1.58 2004/05/16 18:51:20 peter

View File

@ -369,24 +369,24 @@ Var
Processor specific routines
****************************************************************************}
Procedure Move(const source;var dest;count:Longint);
Procedure FillChar(Var x;count:Longint;Value:Boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:Longint;Value:Char);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:Longint;Value:Byte);
procedure FillByte(var x;count:longint;value:byte);
Procedure FillWord(Var x;count:Longint;Value:Word);
procedure FillDWord(var x;count:longint;value:DWord);
function IndexChar(const buf;len:longint;b:char):longint;
function IndexByte(const buf;len:longint;b:byte):longint;
function Indexword(const buf;len:longint;b:word):longint;
function IndexDWord(const buf;len:longint;b:DWord):longint;
function CompareChar(const buf1,buf2;len:longint):longint;
function CompareByte(const buf1,buf2;len:longint):longint;
function CompareWord(const buf1,buf2;len:longint):longint;
function CompareDWord(const buf1,buf2;len:longint):longint;
procedure MoveChar0(const buf1;var buf2;len:longint);
function IndexChar0(const buf;len:longint;b:char):longint;
function CompareChar0(const buf1,buf2;len:longint):longint;
Procedure Move(const source;var dest;count:SizeInt);
Procedure FillChar(Var x;count:SizeInt;Value:Boolean);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:SizeInt;Value:Char);{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure FillChar(Var x;count:SizeInt;Value:Byte);
procedure FillByte(var x;count:SizeInt;value:byte);
Procedure FillWord(Var x;count:SizeInt;Value:Word);
procedure FillDWord(var x;count:SizeInt;value:DWord);
function IndexChar(const buf;len:SizeInt;b:char):SizeInt;
function IndexByte(const buf;len:SizeInt;b:byte):SizeInt;
function Indexword(const buf;len:SizeInt;b:word):SizeInt;
function IndexDWord(const buf;len:SizeInt;b:DWord):SizeInt;
function CompareChar(const buf1,buf2;len:SizeInt):SizeInt;
function CompareByte(const buf1,buf2;len:SizeInt):SizeInt;
function CompareWord(const buf1,buf2;len:SizeInt):SizeInt;
function CompareDWord(const buf1,buf2;len:SizeInt):SizeInt;
procedure MoveChar0(const buf1;var buf2;len:SizeInt);
function IndexChar0(const buf;len:SizeInt;b:char):SizeInt;
function CompareChar0(const buf1,buf2;len:SizeInt):SizeInt;
procedure prefetch(const mem);
@ -568,6 +568,16 @@ Type
}
end;
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
function UTF8Encode(const s : WideString) : UTF8String;
function UTF8Decode(const s : UTF8String): WideString;
function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
Procedure GetWideStringManager (Var Manager : TWideStringManager);
Procedure SetWideStringManager (Const New : TWideStringManager);
Procedure SetWideStringManager (Const New : TWideStringManager; Var Old: TWideStringManager);
@ -744,7 +754,11 @@ const
{
$Log$
Revision 1.97 2004-07-07 15:15:40 daniel
Revision 1.98 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.97 2004/07/07 15:15:40 daniel
* Add inline directive to sysresetfpu
Revision 1.96 2004/07/03 21:50:31 daniel

View File

@ -4,7 +4,7 @@
Copyright (c) 1999-2001 by Florian Klaempfl,
member of the Free Pascal development team.
This file implements support routines for WideStrings with FPC
This file implements support routines for WideStrings/Unicode with FPC
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
@ -1030,9 +1030,218 @@ end;
{$endif CPU64}
function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=UnicodeToUtf8(Dest,MaxBytes,Source,IndexWord(Source^,-1,0))
else
Result:=0;
end;
function UnicodeToUtf8(Dest: PChar; MaxDestBytes: SizeUInt; Source: PWideChar; SourceChars: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
begin
result:=0;
if source=nil then
exit;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (i<SourceChars) and (j<MaxDestBytes) do
begin
w:=word(Source[i]);
case w of
0..$7f:
begin
Dest[j]:=char(w);
inc(j);
end;
$80..$7ff:
begin
if j+1>=MaxDestBytes then
break;
Dest[j]:=char($c0 or (w shr 6));
Dest[j+1]:=char($80 or (w and $3f));
inc(j,2);
end;
else
begin
if j+2>=MaxDestBytes then
break;
Dest[j]:=char($e0 or (w shr 12));
Dest[j+1]:=char($80 or ((w shr 6)and $3f));
Dest[j+2]:=char($80 or (w and $3f));
inc(j,3);
end;
end;
inc(i);
end;
if j>MaxDestBytes-1 then
j:=MaxDestBytes-1;
Dest[j]:=#0;
end
else
begin
while i<SourceChars do
begin
case word(Source[i]) of
$0..$7f:
inc(j);
$80..$7ff:
inc(j,2);
else
inc(j,3);
end;
end;
end;
result:=j+1;
end;
function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: SizeInt): SizeInt;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
if assigned(Source) then
Result:=Utf8ToUnicode(Dest,MaxChars,Source,strlen(Source))
else
Result:=0;
end;
function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: SizeUInt; Source: PChar; SourceBytes: SizeUInt): SizeUInt;
var
i,j : SizeUInt;
w : word;
b : byte;
begin
result:=0;
i:=0;
j:=0;
if assigned(Dest) then
begin
while (j<MaxDestChars) and (i<SourceBytes) do
begin
b:=byte(Source[i]);
inc(i);
// 2 or 3 bytes?
if w>=$80 then
begin
w:=b and $3c;
if i>=SourceBytes then
exit;
// 3 bytes?
if (b and $20)<>0 then
begin
b:=byte(Source[i]);
inc(i);
if i>=SourceBytes then
exit;
if (b and $c0)<>$80 then
exit;
w:=(w shl 6) or (b and $3c);
end;
b:=byte(Source[i]);
w:=(w shl 6) or (b and $3c);
if (b and $c0)<>$80 then
exit;
inc(i);
end;
Dest[j]:=WideChar(w);
inc(j);
end;
end
else
begin
while i<SourceBytes do
begin
b:=byte(Source[i]);
inc(i);
// 2 or 3 bytes?
if b>=$80 then
begin
if i>=SourceBytes then
exit;
// 3 bytes?
if (b and $20)<>0 then
begin
b:=byte(Source[i]);
inc(i);
if i>=SourceBytes then
exit;
if (b and $c0)<>$80 then
exit;
end;
if (byte(Source[i]) and $c0)<>$80 then
exit;
inc(i);
end;
inc(j);
end;
end;
result:=j+1;
end;
function UTF8Encode(const s : WideString) : UTF8String;
var
i : SizeInt;
hs : UTF8String;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s)*3);
i:=UnicodeToUtf8(pchar(hs),length(hs)+1,PWideChar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function UTF8Decode(const s : UTF8String): WideString;
var
i : SizeInt;
hs : WideString;
begin
result:='';
if s='' then
exit;
SetLength(hs,length(s));
i:=Utf8ToUnicode(PWideChar(hs),length(hs)+1,pchar(s),length(s));
if i>0 then
begin
SetLength(hs,i-1);
result:=hs;
end;
end;
function AnsiToUtf8(const s : ansistring): UTF8String;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Encode(s);
end;
function Utf8ToAnsi(const s : UTF8String) : ansistring;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
Result:=Utf8Decode(s);
end;
{
$Log$
Revision 1.41 2004-07-12 17:58:19 peter
Revision 1.42 2004-07-18 20:21:44 florian
+ several unicode (to/from utf-8 conversion) stuff added
* some longint -> SizeInt changes
Revision 1.41 2004/07/12 17:58:19 peter
* remove maxlen field from ansistring/widestrings
Revision 1.40 2004/07/02 21:21:09 peter