{ Copyright (c) 2000-2002 by Florian Klaempfl This unit contains basic functions for unicode support in the compiler, this unit is mainly necessary to bootstrap widestring support ... This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. **************************************************************************** } unit widestr; {$i fpcdefs.inc} interface uses charset,globtype; type tcompilerwidechar = word; tcompilerwidecharptr = ^tcompilerwidechar; pcompilerwidechar = ^tcompilerwidechar; tcompilerwidechararray = array of tcompilerwidechar; tcompilerwidestring = class data : tcompilerwidechararray; maxlen,len : SizeInt; function asconstpunicodechar : PUnicodeChar; end; procedure initwidestring(out r : tcompilerwidestring); procedure donewidestring(var r : tcompilerwidestring); procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt); function getlengthwidestring(r : tcompilerwidestring) : SizeInt; procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar); procedure concatwidestrings(s1,s2 : tcompilerwidestring); function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt; procedure copywidestring(s,d : tcompilerwidestring); function asciichar2unicode(c : char) : tcompilerwidechar; function unicode2asciichar(c : tcompilerwidechar) : char; procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true); procedure unicode2ascii(r : tcompilerwidestring;p : pchar;cp : tstringencoding); procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding); function hasnonasciichars(const p: tcompilerwidestring): boolean; function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar; function cpavailable(const s: string) : boolean; function cpavailable(cp: word) : boolean; procedure changecodepage( s : pchar; l : SizeInt; scp : tstringencoding; d : pchar; dcp : tstringencoding ); function codepagebyname(const s : string) : tstringencoding; function charlength(p: pchar; len: sizeint): sizeint; function charlength(const s: string): sizeint; implementation uses { use only small codepage maps, others will be } { loaded on demand from -FM path } { cyrillic code pages } cp1251,cp866,cp8859_5, { greek code page } cp1253, { other code pages } cp8859_1,cp850,cp437,cp1252,cp646, cp874, cp856,cp852,cp8859_2, cp1250,cp1254,cp1255,cp1256,cp1257,cp1258, globals,cutils; procedure initwidestring(out r : tcompilerwidestring); begin R:=tcompilerwidestring.create; r.data:=nil; r.len:=0; r.maxlen:=0; end; procedure donewidestring(var r : tcompilerwidestring); begin r.Free; r:=nil; end; function getcharwidestring(r : tcompilerwidestring;l : SizeInt) : tcompilerwidechar; begin getcharwidestring:=r.data[l]; end; function getlengthwidestring(r : tcompilerwidestring) : SizeInt; begin getlengthwidestring:=r.len; end; procedure growwidestring(r : tcompilerwidestring;l : SizeInt); begin if r.maxlen>=l then exit; setlength(r.data,l); r.maxlen:=l; end; procedure setlengthwidestring(r : tcompilerwidestring;l : SizeInt); begin r.len:=l; if l>r.maxlen then growwidestring(r,l); end; procedure concatwidestringchar(r : tcompilerwidestring;c : tcompilerwidechar); begin if r.len>=r.maxlen then growwidestring(r,r.len+16); r.data[r.len]:=c; inc(r.len); end; procedure concatwidestrings(s1,s2 : tcompilerwidestring); begin growwidestring(s1,s1.len+s2.len); if s2.len>0 then move(s2.data[0],s1.data[s1.len],s2.len*sizeof(tcompilerwidechar)); inc(s1.len,s2.len); end; procedure copywidestring(s,d : tcompilerwidestring); begin setlengthwidestring(d,s.len); if s.len>0 then move(s.data[0],d.data[0],s.len*sizeof(tcompilerwidechar)); end; function comparewidestrings(s1,s2 : tcompilerwidestring) : SizeInt; var maxi,temp : SizeInt; begin if pointer(s1)=pointer(s2) then begin comparewidestrings:=0; exit; end; maxi:=s1.len; temp:=s2.len; if maxi>temp then maxi:=Temp; temp:=compareword(s1.data[0],s2.data[0],maxi); if temp=0 then temp:=s1.len-s2.len; comparewidestrings:=temp; end; function asciichar2unicode(c : char) : tcompilerwidechar; var m : punicodemap; begin if (current_settings.sourcecodepage <> CP_UTF8) then begin m:=getmap(current_settings.sourcecodepage); asciichar2unicode:=getunicode(c,m); end else result:=tcompilerwidechar(c); end; function unicode2asciichar(c : tcompilerwidechar) : char; {begin if word(c)<128 then unicode2asciichar:=char(word(c)) else unicode2asciichar:='?'; end;} begin Result := getascii(c,getmap(current_settings.sourcecodepage))[1]; end; procedure ascii2unicode(p : pchar;l : SizeInt;cp : tstringencoding;r : tcompilerwidestring;codepagetranslation : boolean = true); var source : pchar; dest : tcompilerwidecharptr; i : SizeInt; m : punicodemap; begin m:=getmap(cp); setlengthwidestring(r,l); source:=p; dest:=tcompilerwidecharptr(r.data); if codepagetranslation then begin if cp<>CP_UTF8 then begin for i:=1 to l do begin dest^:=getunicode(source^,m); inc(dest); inc(source); end; end else begin r.len:=Utf8ToUnicode(punicodechar(r.data),r.maxlen,p,l); { -1, because utf8tounicode includes room for a terminating 0 in its result count } if r.len>0 then dec(r.len); end; end else begin for i:=1 to l do begin dest^:=tcompilerwidechar(source^); inc(dest); inc(source); end; end; end; procedure unicode2ascii(r : tcompilerwidestring;arr:TAnsiCharDynArray;cp : tstringencoding); begin if (r.len=0) or (length(arr)=0) then exit; unicode2ascii(r,Pchar(@arr[0]),cp); end; procedure unicode2ascii(r : tcompilerwidestring;p:pchar;cp : tstringencoding); var m : punicodemap; source : tcompilerwidecharptr; dest : pchar; i : longint; begin { can't implement that here, because the memory size for p() cannot be changed here, and we may need more bytes than have been allocated } if cp=CP_UTF8 then internalerrorproc(2015092701); if (cp = 0) or (cp=CP_NONE) then m:=getmap(current_settings.sourcecodepage) else m:=getmap(cp); source:=tcompilerwidecharptr(r.data); dest:=p; for i:=1 to r.len do begin dest^ := getascii(source^,m)[1]; inc(dest); inc(source); end; end; function hasnonasciichars(const p: tcompilerwidestring): boolean; var source : tcompilerwidecharptr; i : longint; begin source:=tcompilerwidecharptr(p.data); result:=true; for i:=1 to p.len do begin if word(source^)>=128 then exit; inc(source); end; result:=false; end; function cpavailable(const s: string): boolean; begin result:=mappingavailable(lower(s)); if not result then result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset',lower(s))); end; function cpavailable(cp: word): boolean; begin result:=mappingavailable(cp); if not result then result:=(unicodepath<>'')and(registerbinarymapping(unicodepath+'charset','cp'+tostr(cp))); end; procedure changecodepage( s : pchar; l : SizeInt; scp : tstringencoding; d : pchar; dcp : tstringencoding ); var ms, md : punicodemap; source : pchar; dest : pchar; i : longint; begin ms:=getmap(scp); md:=getmap(dcp); source:=s; dest:=d; for i:=1 to l do begin dest^ := getascii(getunicode(source^,ms),md)[1]; inc(dest); inc(source); end; end; function codepagebyname(const s : string) : tstringencoding; var p : punicodemap; begin Result:=0; p:=getmap(lower(s)); if (p<>nil) then Result:=p^.cp; end; function charlength(p: pchar; len: sizeint): sizeint; var p2: pchar; i, chars, codepointlen: sizeint; begin if len=0 then begin result:=0; exit; end; { Length of the string converted to a SBCS codepage (e.g. ISO 8859-1) should be equal to the amount of characters in the source string. } if defaultsystemcodepage=cp_utf8 then { ChangeCodePage does not work for UTF-8 apparently... :-( } begin i:=1; chars:=0; while i<=len do begin codepointlen:=utf8codepointlen(p,len-i+1,true); inc(i,codepointlen); inc(p,codepointlen); inc(chars); end; result:=chars; end else if cpavailable(defaultsystemcodepage) then begin getmem(p2,succ(len)); fillchar(p2^,succ(len),0); changecodepage(p,len,defaultsystemcodepage,p2,28591); result:=strlen(p2); freemem(p2,succ(len)); end else result:=len; end; function charlength(const s: string): sizeint; begin result:=charlength(@s[1],length(s)); end; { tcompilerwidestring } const cEmptyUnicodeChar : UnicodeChar = #0; function tcompilerwidestring.asconstpunicodechar: PUnicodeChar; begin if length(data)>0 then result:=@Data[0] else result:=@cEmptyUnicodeChar; end; end.