mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-10-24 06:44:38 +02:00

http://svn.freepascal.org/svn/fpc/branches/unicodestring ........ r11665 | florian | 2008-08-30 13:30:17 +0200 (Sat, 30 Aug 2008) | 1 line * continued to work on unicodestring type support ........ r11666 | florian | 2008-08-30 19:02:26 +0200 (Sat, 30 Aug 2008) | 2 lines * expectloc for wide/ansi/unicode strings is LOC_CONSTANT or LOC_REGISTER now ........ r11667 | florian | 2008-08-30 20:42:37 +0200 (Sat, 30 Aug 2008) | 1 line * more unicodestring stuff fixed, test results on win32 are already good ........ r11670 | florian | 2008-08-30 23:21:48 +0200 (Sat, 30 Aug 2008) | 2 lines * first fixes for unix bootstrapping ........ r11683 | ivost | 2008-09-01 12:46:39 +0200 (Mon, 01 Sep 2008) | 2 lines * fixed 64bit bug in iconvenc.pas ........ r11689 | florian | 2008-09-01 23:12:34 +0200 (Mon, 01 Sep 2008) | 1 line * fixed several errors when building on unix ........ r11694 | florian | 2008-09-03 20:32:43 +0200 (Wed, 03 Sep 2008) | 1 line * fixed unix compilation ........ r11695 | florian | 2008-09-03 21:01:04 +0200 (Wed, 03 Sep 2008) | 1 line * bootstrapping fix ........ r11696 | florian | 2008-09-03 21:07:18 +0200 (Wed, 03 Sep 2008) | 1 line * more bootstrapping fixed ........ r11698 | florian | 2008-09-03 22:47:54 +0200 (Wed, 03 Sep 2008) | 1 line + two missing compiler procs exported ........ r11701 | florian | 2008-09-04 16:42:34 +0200 (Thu, 04 Sep 2008) | 2 lines + lazarus project for the linux rtl ........ r11702 | florian | 2008-09-04 16:43:27 +0200 (Thu, 04 Sep 2008) | 2 lines + set unicode string procedures ........ r11707 | florian | 2008-09-04 23:23:02 +0200 (Thu, 04 Sep 2008) | 2 lines * fixed several type casting stuff ........ r11712 | florian | 2008-09-05 22:46:03 +0200 (Fri, 05 Sep 2008) | 1 line * fixed unicodestring compilation on windows after recent unix changes ........ r11713 | florian | 2008-09-05 23:35:12 +0200 (Fri, 05 Sep 2008) | 1 line + UnicodeString support for Variants ........ r11715 | florian | 2008-09-06 20:59:54 +0200 (Sat, 06 Sep 2008) | 1 line * patch by Martin Schreiber for UnicodeString streaming ........ r11716 | florian | 2008-09-06 22:22:55 +0200 (Sat, 06 Sep 2008) | 2 lines * fixed test ........ r11717 | florian | 2008-09-07 10:25:51 +0200 (Sun, 07 Sep 2008) | 1 line * fixed typo when converting tunicodestring to punicodechar ........ r11718 | florian | 2008-09-07 11:29:52 +0200 (Sun, 07 Sep 2008) | 3 lines * fixed writing of UnicodeString properties * moved some helper routines to unicode headers ........ r11734 | florian | 2008-09-09 22:38:55 +0200 (Tue, 09 Sep 2008) | 1 line * fixed bootstrapping ........ r11735 | florian | 2008-09-10 11:25:28 +0200 (Wed, 10 Sep 2008) | 2 lines * first fixes for persisten unicodestrings ........ r11736 | florian | 2008-09-10 14:31:00 +0200 (Wed, 10 Sep 2008) | 3 lines Initialized merge tracking via "svnmerge" with revisions "1-11663" from http://svn.freepascal.org/svn/fpc/trunk ........ r11737 | florian | 2008-09-10 21:06:57 +0200 (Wed, 10 Sep 2008) | 3 lines * fixed unicodestring <-> variant handling * fixed unicodestring property reading ........ git-svn-id: trunk@11739 -
255 lines
7.3 KiB
ObjectPascal
255 lines
7.3 KiB
ObjectPascal
{
|
|
This file is part of the Free Pascal run time library.
|
|
Copyright (c) 2000 by Florian Klaempfl
|
|
member of the Free Pascal development team.
|
|
|
|
This unit implements several classes for charset conversions
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
**********************************************************************}
|
|
|
|
{ this unit is included temporarily for 2.2 bootstrapping and can be
|
|
removed after the next release after 2.2.2 }
|
|
{$mode objfpc}
|
|
unit ccharset;
|
|
|
|
interface
|
|
|
|
type
|
|
tunicodechar = word;
|
|
tunicodestring = ^tunicodechar;
|
|
|
|
tcsconvert = class
|
|
// !!!!!!1constructor create;
|
|
end;
|
|
|
|
tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
|
|
umf_unused);
|
|
|
|
punicodecharmapping = ^tunicodecharmapping;
|
|
tunicodecharmapping = record
|
|
unicode : tunicodechar;
|
|
flag : tunicodecharmappingflag;
|
|
reserved : byte;
|
|
end;
|
|
|
|
punicodemap = ^tunicodemap;
|
|
tunicodemap = record
|
|
cpname : string[20];
|
|
map : punicodecharmapping;
|
|
lastchar : longint;
|
|
next : punicodemap;
|
|
internalmap : boolean;
|
|
end;
|
|
|
|
tcp2unicode = class(tcsconvert)
|
|
end;
|
|
|
|
function loadunicodemapping(const cpname,f : string) : punicodemap;
|
|
procedure registermapping(p : punicodemap);
|
|
function getmap(const s : string) : punicodemap;
|
|
function mappingavailable(const s : string) : boolean;
|
|
function getunicode(c : char;p : punicodemap) : tunicodechar;
|
|
function getascii(c : tunicodechar;p : punicodemap) : string;
|
|
|
|
implementation
|
|
|
|
var
|
|
mappings : punicodemap;
|
|
|
|
function loadunicodemapping(const cpname,f : string) : punicodemap;
|
|
|
|
var
|
|
data : punicodecharmapping;
|
|
datasize : longint;
|
|
t : text;
|
|
s,hs : string;
|
|
scanpos,charpos,unicodevalue : longint;
|
|
code : word;
|
|
flag : tunicodecharmappingflag;
|
|
p : punicodemap;
|
|
lastchar : longint;
|
|
|
|
begin
|
|
lastchar:=-1;
|
|
loadunicodemapping:=nil;
|
|
datasize:=256;
|
|
getmem(data,sizeof(tunicodecharmapping)*datasize);
|
|
assign(t,f);
|
|
{$I-}
|
|
reset(t);
|
|
{$I+}
|
|
if ioresult<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
exit;
|
|
end;
|
|
while not(eof(t)) do
|
|
begin
|
|
readln(t,s);
|
|
if (s[1]='0') and (s[2]='x') then
|
|
begin
|
|
flag:=umf_unused;
|
|
scanpos:=3;
|
|
hs:='$';
|
|
while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
|
|
begin
|
|
hs:=hs+s[scanpos];
|
|
inc(scanpos);
|
|
end;
|
|
val(hs,charpos,code);
|
|
if code<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
close(t);
|
|
exit;
|
|
end;
|
|
while not(s[scanpos] in ['0','#']) do
|
|
inc(scanpos);
|
|
if s[scanpos]='#' then
|
|
begin
|
|
{ special char }
|
|
unicodevalue:=$ffff;
|
|
hs:=copy(s,scanpos,length(s)-scanpos+1);
|
|
if hs='#DBCS LEAD BYTE' then
|
|
flag:=umf_leadbyte;
|
|
end
|
|
else
|
|
begin
|
|
{ C hex prefix }
|
|
inc(scanpos,2);
|
|
hs:='$';
|
|
while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
|
|
begin
|
|
hs:=hs+s[scanpos];
|
|
inc(scanpos);
|
|
end;
|
|
val(hs,unicodevalue,code);
|
|
if code<>0 then
|
|
begin
|
|
freemem(data,sizeof(tunicodecharmapping)*datasize);
|
|
close(t);
|
|
exit;
|
|
end;
|
|
if charpos>datasize then
|
|
begin
|
|
{ allocate 1024 bytes more because }
|
|
{ if we need more than 256 entries it's }
|
|
{ probably a mbcs with a lot of }
|
|
{ entries }
|
|
datasize:=charpos+1024;
|
|
reallocmem(data,sizeof(tunicodecharmapping)*datasize);
|
|
end;
|
|
flag:=umf_noinfo;
|
|
end;
|
|
data[charpos].flag:=flag;
|
|
data[charpos].unicode:=unicodevalue;
|
|
if charpos>lastchar then
|
|
lastchar:=charpos;
|
|
end;
|
|
end;
|
|
close(t);
|
|
new(p);
|
|
p^.lastchar:=lastchar;
|
|
p^.cpname:=cpname;
|
|
p^.internalmap:=false;
|
|
p^.next:=nil;
|
|
p^.map:=data;
|
|
loadunicodemapping:=p;
|
|
end;
|
|
|
|
procedure registermapping(p : punicodemap);
|
|
|
|
begin
|
|
p^.next:=mappings;
|
|
mappings:=p;
|
|
end;
|
|
|
|
function getmap(const s : string) : punicodemap;
|
|
|
|
var
|
|
hp : punicodemap;
|
|
|
|
const
|
|
mapcache : string = '';
|
|
mapcachep : punicodemap = nil;
|
|
|
|
begin
|
|
if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
|
|
begin
|
|
getmap:=mapcachep;
|
|
exit;
|
|
end;
|
|
hp:=mappings;
|
|
while assigned(hp) do
|
|
begin
|
|
if hp^.cpname=s then
|
|
begin
|
|
getmap:=hp;
|
|
mapcache:=s;
|
|
mapcachep:=hp;
|
|
exit;
|
|
end;
|
|
hp:=hp^.next;
|
|
end;
|
|
getmap:=nil;
|
|
end;
|
|
|
|
function mappingavailable(const s : string) : boolean;
|
|
|
|
begin
|
|
mappingavailable:=getmap(s)<>nil;
|
|
end;
|
|
|
|
function getunicode(c : char;p : punicodemap) : tunicodechar;
|
|
|
|
begin
|
|
if ord(c)<=p^.lastchar then
|
|
getunicode:=p^.map[ord(c)].unicode
|
|
else
|
|
getunicode:=0;
|
|
end;
|
|
|
|
function getascii(c : tunicodechar;p : punicodemap) : string;
|
|
|
|
var
|
|
i : longint;
|
|
|
|
begin
|
|
{ at least map to space }
|
|
getascii:=#32;
|
|
for i:=0 to p^.lastchar do
|
|
if p^.map[i].unicode=c then
|
|
begin
|
|
if i<256 then
|
|
getascii:=chr(i)
|
|
else
|
|
getascii:=chr(i div 256)+chr(i mod 256);
|
|
exit;
|
|
end;
|
|
end;
|
|
|
|
var
|
|
hp : punicodemap;
|
|
|
|
initialization
|
|
mappings:=nil;
|
|
finalization
|
|
while assigned(mappings) do
|
|
begin
|
|
hp:=mappings^.next;
|
|
if not(mappings^.internalmap) then
|
|
begin
|
|
freemem(mappings^.map);
|
|
dispose(mappings);
|
|
end;
|
|
mappings:=hp;
|
|
end;
|
|
end.
|