mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-12 14:10:59 +02:00
253 lines
7.1 KiB
ObjectPascal
253 lines
7.1 KiB
ObjectPascal
{
|
|
This file is part of the Free Pascal run time library.
|
|
Copyright (c) 2000 by Florian Klaempfl
|
|
member of the Free Pascal development team.
|
|
|
|
This unit implements several classes for charset conversions
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
**********************************************************************
|
|
}
|
|
unit charset;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
type
|
|
tunicodechar = word;
|
|
tunicodestring = ^tunicodechar;
|
|
|
|
tcsconvert = class
|
|
// !!!!!!1constructor create;
|
|
end;
|
|
|
|
tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
|
|
umf_unused);
|
|
|
|
punicodecharmapping = ^tunicodecharmapping;
|
|
tunicodecharmapping = record
|
|
unicode : tunicodechar;
|
|
flag : tunicodecharmappingflag;
|
|
reserved : byte;
|
|
end;
|
|
|
|
punicodemap = ^tunicodemap;
|
|
tunicodemap = record
|
|
cpname : string[20];
|
|
map : punicodecharmapping;
|
|
lastchar : longint;
|
|
next : punicodemap;
|
|
internalmap : boolean;
|
|
end;
|
|
|
|
tcp2unicode = class(tcsconvert)
|
|
end;
|
|
|
|
function loadunicodemapping(const cpname,f : string) : punicodemap;
|
|
procedure registermapping(p : punicodemap);
|
|
function getmap(const s : string) : punicodemap;
|
|
function mappingavailable(const s : string) : boolean;
|
|
function getunicode(c : char;p : punicodemap) : tunicodechar;
|
|
function getascii(c : tunicodechar;p : punicodemap) : string;
|
|
|
|
implementation
|
|
|
|
var
|
|
mappings : punicodemap;
|
|
|
|
function loadunicodemapping(const cpname,f : string) : punicodemap;
|
|
|
|
var
|
|
data : punicodecharmapping;
|
|
datasize : longint;
|
|
t : text;
|
|
s,hs : string;
|
|
scanpos,charpos,unicodevalue : longint;
|
|
code : integer;
|
|
flag : tunicodecharmappingflag;
|
|
p : punicodemap;
|
|
lastchar : longint;
|
|
|
|
begin
|
|
lastchar:=-1;
|
|
loadunicodemapping:=nil;
|
|
datasize:=256;
|
|
getmem(data,sizeof(tunicodecharmapping)*datasize);
|
|
assign(t,f);
|
|
{$I-}
|
|
reset(t);
|
|
{$I+}
|
|
if ioresult<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
exit;
|
|
end;
|
|
while not(eof(t)) do
|
|
begin
|
|
readln(t,s);
|
|
if (s[1]='0') and (s[2]='x') then
|
|
begin
|
|
flag:=umf_unused;
|
|
scanpos:=3;
|
|
hs:='$';
|
|
while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
|
|
begin
|
|
hs:=hs+s[scanpos];
|
|
inc(scanpos);
|
|
end;
|
|
val(hs,charpos,code);
|
|
if code<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
close(t);
|
|
exit;
|
|
end;
|
|
while not(s[scanpos] in ['0','#']) do
|
|
inc(scanpos);
|
|
if s[scanpos]='#' then
|
|
begin
|
|
{ special char }
|
|
unicodevalue:=$ffff;
|
|
hs:=copy(s,scanpos,length(s)-scanpos+1);
|
|
if hs='#DBCS LEAD BYTE' then
|
|
flag:=umf_leadbyte;
|
|
end
|
|
else
|
|
begin
|
|
{ C hex prefix }
|
|
inc(scanpos,2);
|
|
hs:='$';
|
|
while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
|
|
begin
|
|
hs:=hs+s[scanpos];
|
|
inc(scanpos);
|
|
end;
|
|
val(hs,unicodevalue,code);
|
|
if code<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
close(t);
|
|
exit;
|
|
end;
|
|
if charpos>datasize then
|
|
begin
|
|
{ allocate 1024 bytes more because }
|
|
{ if we need more than 256 entries it's }
|
|
{ probably a mbcs with a lot of }
|
|
{ entries }
|
|
datasize:=charpos+1024;
|
|
reallocmem(data,sizeof(tunicodecharmapping)*datasize);
|
|
end;
|
|
flag:=umf_noinfo;
|
|
end;
|
|
data[charpos].flag:=flag;
|
|
data[charpos].unicode:=unicodevalue;
|
|
if charpos>lastchar then
|
|
lastchar:=charpos;
|
|
end;
|
|
end;
|
|
close(t);
|
|
new(p);
|
|
p^.lastchar:=lastchar;
|
|
p^.cpname:=cpname;
|
|
p^.internalmap:=false;
|
|
p^.next:=nil;
|
|
p^.map:=data;
|
|
loadunicodemapping:=p;
|
|
end;
|
|
|
|
procedure registermapping(p : punicodemap);
|
|
|
|
begin
|
|
p^.next:=mappings;
|
|
mappings:=p;
|
|
end;
|
|
|
|
function getmap(const s : string) : punicodemap;
|
|
|
|
var
|
|
hp : punicodemap;
|
|
|
|
const
|
|
mapcachep : punicodemap = nil;
|
|
|
|
begin
|
|
if assigned(mapcachep) and
|
|
(mapcachep^.cpname=s) then
|
|
begin
|
|
getmap:=mapcachep;
|
|
exit;
|
|
end;
|
|
hp:=mappings;
|
|
while assigned(hp) do
|
|
begin
|
|
if hp^.cpname=s then
|
|
begin
|
|
getmap:=hp;
|
|
mapcachep:=hp;
|
|
exit;
|
|
end;
|
|
hp:=hp^.next;
|
|
end;
|
|
getmap:=nil;
|
|
end;
|
|
|
|
function mappingavailable(const s : string) : boolean;
|
|
|
|
begin
|
|
mappingavailable:=getmap(s)<>nil;
|
|
end;
|
|
|
|
function getunicode(c : char;p : punicodemap) : tunicodechar;
|
|
|
|
begin
|
|
if ord(c)<=p^.lastchar then
|
|
getunicode:=p^.map[ord(c)].unicode
|
|
else
|
|
getunicode:=0;
|
|
end;
|
|
|
|
function getascii(c : tunicodechar;p : punicodemap) : string;
|
|
|
|
var
|
|
i : longint;
|
|
|
|
begin
|
|
{ at least map to space }
|
|
getascii:=#32;
|
|
for i:=0 to p^.lastchar do
|
|
if p^.map[i].unicode=c then
|
|
begin
|
|
if i<256 then
|
|
getascii:=chr(i)
|
|
else
|
|
getascii:=chr(i div 256)+chr(i mod 256);
|
|
exit;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
hp : punicodemap;
|
|
|
|
initialization
|
|
mappings:=nil;
|
|
finalization
|
|
while assigned(mappings) do
|
|
begin
|
|
hp:=mappings^.next;
|
|
if not(mappings^.internalmap) then
|
|
begin
|
|
freemem(mappings^.map);
|
|
dispose(mappings);
|
|
end;
|
|
mappings:=hp;
|
|
end;
|
|
end.
|