mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 19:48:01 +02:00
399 lines
12 KiB
ObjectPascal
399 lines
12 KiB
ObjectPascal
{
|
|
Copyright (c) 2000-2002 by Florian Klaempfl
|
|
|
|
This unit contains basic functions for unicode support in the
|
|
compiler, this unit is mainly necessary to bootstrap widestring
|
|
support ...
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit widestr;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
charset,globtype;
|
|
|
|
|
|
type
|
|
tcompilerwidechar = word;
|
|
tcompilerwidecharptr = ^tcompilerwidechar;
|
|
pcompilerwidechar = ^tcompilerwidechar;
|
|
tcompilerwidechararray = array of tcompilerwidechar;
|
|
|
|
tcompilerwidestring = class
|
|
data : tcompilerwidechararray;
|
|
maxlen,len : SizeInt;
|
|
function asconstpunicodechar : PUnicodeChar;
|
|
end;
|
|
|
|
procedure initwidestring(out r : tcompilerwidestring);
|
|
procedure donewidestring(var r : tcompilerwidestring);
|
|
procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt);
|
|
function getlengthwidestring(r : tcompilerwidestring) : SizeInt;
|
|
procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar);
|
|
procedure concatwidestrings(s1,s2 : tcompilerwidestring);
|
|
function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt;
|
|
procedure copywidestring(s,d : tcompilerwidestring);
|
|
function asciichar2unicode(c : char) : tcompilerwidechar;
|
|
function unicode2asciichar(c : tcompilerwidechar) : char;
|
|
procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true);
|
|
procedure unicode2ascii(r : tcompilerwidestring;p : pchar;cp : tstringencoding);
|
|
procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding);
|
|
function hasnonasciichars(const p: tcompilerwidestring): boolean;
|
|
function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar;
|
|
function cpavailable(const s: string) : boolean;
|
|
function cpavailable(cp: word) : boolean;
|
|
procedure changecodepage(
|
|
s : pchar; l : SizeInt; scp : tstringencoding;
|
|
d : pchar; dcp : tstringencoding
|
|
);
|
|
function codepagebyname(const s : string) : tstringencoding;
|
|
function charlength(p: pchar; len: sizeint): sizeint;
|
|
function charlength(const s: string): sizeint;
|
|
|
|
implementation
|
|
|
|
uses
|
|
{ use only small codepage maps, others will be }
|
|
{ loaded on demand from -FM path }
|
|
|
|
{ cyrillic code pages }
|
|
cp1251,cp866,cp8859_5,
|
|
{ greek code page }
|
|
cp1253,
|
|
{ other code pages }
|
|
cp8859_1,cp850,cp437,cp1252,cp646,
|
|
cp874, cp856,cp852,cp8859_2,
|
|
cp1250,cp1254,cp1255,cp1256,cp1257,cp1258,
|
|
globals,cutils;
|
|
|
|
|
|
procedure initwidestring(out r : tcompilerwidestring);
|
|
|
|
begin
|
|
R:=tcompilerwidestring.create;
|
|
r.data:=nil;
|
|
r.len:=0;
|
|
r.maxlen:=0;
|
|
end;
|
|
|
|
procedure donewidestring(var r : tcompilerwidestring);
|
|
|
|
begin
|
|
r.Free;
|
|
r:=nil;
|
|
end;
|
|
|
|
function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar;
|
|
|
|
begin
|
|
getcharwidestring:=r.data[l];
|
|
end;
|
|
|
|
function getlengthwidestring(r : tcompilerwidestring) : SizeInt;
|
|
|
|
begin
|
|
getlengthwidestring:=r.len;
|
|
end;
|
|
|
|
procedure growwidestring(r : tcompilerwidestring;l : SizeInt);
|
|
|
|
begin
|
|
if r.maxlen>=l then
|
|
exit;
|
|
setlength(r.data,l);
|
|
r.maxlen:=l;
|
|
end;
|
|
|
|
procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt);
|
|
|
|
begin
|
|
r.len:=l;
|
|
if l>r.maxlen then
|
|
growwidestring(r,l);
|
|
end;
|
|
|
|
procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar);
|
|
|
|
begin
|
|
if r.len>=r.maxlen then
|
|
growwidestring(r,r.len+16);
|
|
r.data[r.len]:=c;
|
|
inc(r.len);
|
|
end;
|
|
|
|
procedure concatwidestrings(s1,s2 : tcompilerwidestring);
|
|
begin
|
|
growwidestring(s1,s1.len+s2.len);
|
|
if s2.len>0 then
|
|
move(s2.data[0],s1.data[s1.len],s2.len*sizeof(tcompilerwidechar));
|
|
inc(s1.len,s2.len);
|
|
end;
|
|
|
|
procedure copywidestring(s,d : tcompilerwidestring);
|
|
|
|
begin
|
|
setlengthwidestring(d,s.len);
|
|
if s.len>0 then
|
|
move(s.data[0],d.data[0],s.len*sizeof(tcompilerwidechar));
|
|
end;
|
|
|
|
function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt;
|
|
var
|
|
maxi,temp : SizeInt;
|
|
begin
|
|
if pointer(s1)=pointer(s2) then
|
|
begin
|
|
comparewidestrings:=0;
|
|
exit;
|
|
end;
|
|
maxi:=s1.len;
|
|
temp:=s2.len;
|
|
if maxi>temp then
|
|
maxi:=Temp;
|
|
temp:=compareword(s1.data[0],s2.data[0],maxi);
|
|
if temp=0 then
|
|
temp:=s1.len-s2.len;
|
|
comparewidestrings:=temp;
|
|
end;
|
|
|
|
function asciichar2unicode(c : char) : tcompilerwidechar;
|
|
var
|
|
m : punicodemap;
|
|
begin
|
|
if (current_settings.sourcecodepage <> CP_UTF8) then
|
|
begin
|
|
m:=getmap(current_settings.sourcecodepage);
|
|
asciichar2unicode:=getunicode(c,m);
|
|
end
|
|
else
|
|
result:=tcompilerwidechar(c);
|
|
end;
|
|
|
|
function unicode2asciichar(c : tcompilerwidechar) : char;
|
|
{begin
|
|
if word(c)<128 then
|
|
unicode2asciichar:=char(word(c))
|
|
else
|
|
unicode2asciichar:='?';
|
|
end;}
|
|
begin
|
|
Result := getascii(c,getmap(current_settings.sourcecodepage))[1];
|
|
end;
|
|
|
|
|
|
procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true);
|
|
var
|
|
source : pchar;
|
|
dest : tcompilerwidecharptr;
|
|
i : SizeInt;
|
|
m : punicodemap;
|
|
begin
|
|
m:=getmap(cp);
|
|
setlengthwidestring(r,l);
|
|
source:=p;
|
|
dest:=tcompilerwidecharptr(r.data);
|
|
if codepagetranslation then
|
|
begin
|
|
if cp<>CP_UTF8 then
|
|
begin
|
|
for i:=1 to l do
|
|
begin
|
|
dest^:=getunicode(source^,m);
|
|
inc(dest);
|
|
inc(source);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
r.len:=Utf8ToUnicode(punicodechar(r.data),r.maxlen,p,l);
|
|
{ -1, because utf8tounicode includes room for a terminating 0 in
|
|
its result count }
|
|
if r.len>0 then
|
|
dec(r.len);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
for i:=1 to l do
|
|
begin
|
|
dest^:=tcompilerwidechar(source^);
|
|
inc(dest);
|
|
inc(source);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding);
|
|
begin
|
|
if (r.len=0) or (length(arr)=0) then
|
|
exit;
|
|
unicode2ascii(r,Pchar(@arr[0]),cp);
|
|
end;
|
|
|
|
procedure unicode2ascii(r : tcompilerwidestring;p:pchar;cp : tstringencoding);
|
|
var
|
|
m : punicodemap;
|
|
source : tcompilerwidecharptr;
|
|
dest : pchar;
|
|
i : longint;
|
|
begin
|
|
{ can't implement that here, because the memory size for p() cannot
|
|
be changed here, and we may need more bytes than have been allocated }
|
|
if cp=CP_UTF8 then
|
|
internalerrorproc(2015092701);
|
|
|
|
if (cp = 0) or (cp=CP_NONE) then
|
|
m:=getmap(current_settings.sourcecodepage)
|
|
else
|
|
m:=getmap(cp);
|
|
source:=tcompilerwidecharptr(r.data);
|
|
dest:=p;
|
|
for i:=1 to r.len do
|
|
begin
|
|
dest^ := getascii(source^,m)[1];
|
|
inc(dest);
|
|
inc(source);
|
|
end;
|
|
end;
|
|
|
|
|
|
function hasnonasciichars(const p: tcompilerwidestring): boolean;
|
|
var
|
|
source : tcompilerwidecharptr;
|
|
i : longint;
|
|
begin
|
|
source:=tcompilerwidecharptr(p.data);
|
|
result:=true;
|
|
for i:=1 to p.len do
|
|
begin
|
|
if word(source^)>=128 then
|
|
exit;
|
|
inc(source);
|
|
end;
|
|
result:=false;
|
|
end;
|
|
|
|
|
|
function cpavailable(const s: string): boolean;
|
|
begin
|
|
result:=mappingavailable(lower(s));
|
|
if not result then
|
|
result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset',lower(s)));
|
|
end;
|
|
|
|
function cpavailable(cp: word): boolean;
|
|
begin
|
|
result:=mappingavailable(cp);
|
|
if not result then
|
|
result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset','cp'+tostr(cp)));
|
|
end;
|
|
|
|
procedure changecodepage(
|
|
s : pchar; l : SizeInt; scp : tstringencoding;
|
|
d : pchar; dcp : tstringencoding
|
|
);
|
|
var
|
|
ms, md : punicodemap;
|
|
source : pchar;
|
|
dest : pchar;
|
|
i : longint;
|
|
begin
|
|
ms:=getmap(scp);
|
|
md:=getmap(dcp);
|
|
source:=s;
|
|
dest:=d;
|
|
for i:=1 to l do
|
|
begin
|
|
dest^ := getascii(getunicode(source^,ms),md)[1];
|
|
inc(dest);
|
|
inc(source);
|
|
end;
|
|
end;
|
|
|
|
function codepagebyname(const s : string) : tstringencoding;
|
|
var
|
|
p : punicodemap;
|
|
begin
|
|
Result:=0;
|
|
p:=getmap(lower(s));
|
|
if (p<>nil) then
|
|
Result:=p^.cp;
|
|
end;
|
|
|
|
|
|
function charlength(p: pchar; len: sizeint): sizeint;
|
|
var
|
|
p2: pchar;
|
|
i, chars, codepointlen: sizeint;
|
|
begin
|
|
if len=0 then
|
|
begin
|
|
result:=0;
|
|
exit;
|
|
end;
|
|
{ Length of the string converted to a SBCS codepage (e.g. ISO 8859-1)
|
|
should be equal to the amount of characters in the source string. }
|
|
if defaultsystemcodepage=cp_utf8 then
|
|
{ ChangeCodePage does not work for UTF-8 apparently... :-( }
|
|
begin
|
|
i:=1;
|
|
chars:=0;
|
|
while i<=len do
|
|
begin
|
|
codepointlen:=utf8codepointlen(p,len-i+1,true);
|
|
inc(i,codepointlen);
|
|
inc(p,codepointlen);
|
|
inc(chars);
|
|
end;
|
|
result:=chars;
|
|
end
|
|
else if cpavailable(defaultsystemcodepage) then
|
|
begin
|
|
getmem(p2,succ(len));
|
|
fillchar(p2^,succ(len),0);
|
|
changecodepage(p,len,defaultsystemcodepage,p2,28591);
|
|
result:=strlen(p2);
|
|
freemem(p2,succ(len));
|
|
end
|
|
else
|
|
result:=len;
|
|
end;
|
|
|
|
function charlength(const s: string): sizeint;
|
|
begin
|
|
result:=charlength(@s[1],length(s));
|
|
end;
|
|
|
|
{ tcompilerwidestring }
|
|
const
|
|
cEmptyUnicodeChar : UnicodeChar = #0;
|
|
|
|
function tcompilerwidestring.asconstpunicodechar: PUnicodeChar;
|
|
begin
|
|
if length(data)>0 then
|
|
result:=@Data[0]
|
|
else
|
|
result:=@cEmptyUnicodeChar;
|
|
end;
|
|
|
|
end.
|