mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-11-04 04:59:29 +01:00 
			
		
		
		
	http://svn.freepascal.org/svn/fpc/branches/unicodestring ........ r11665 | florian | 2008-08-30 13:30:17 +0200 (Sat, 30 Aug 2008) | 1 line * continued to work on unicodestring type support ........ r11666 | florian | 2008-08-30 19:02:26 +0200 (Sat, 30 Aug 2008) | 2 lines * expectloc for wide/ansi/unicode strings is LOC_CONSTANT or LOC_REGISTER now ........ r11667 | florian | 2008-08-30 20:42:37 +0200 (Sat, 30 Aug 2008) | 1 line * more unicodestring stuff fixed, test results on win32 are already good ........ r11670 | florian | 2008-08-30 23:21:48 +0200 (Sat, 30 Aug 2008) | 2 lines * first fixes for unix bootstrapping ........ r11683 | ivost | 2008-09-01 12:46:39 +0200 (Mon, 01 Sep 2008) | 2 lines * fixed 64bit bug in iconvenc.pas ........ r11689 | florian | 2008-09-01 23:12:34 +0200 (Mon, 01 Sep 2008) | 1 line * fixed several errors when building on unix ........ r11694 | florian | 2008-09-03 20:32:43 +0200 (Wed, 03 Sep 2008) | 1 line * fixed unix compilation ........ r11695 | florian | 2008-09-03 21:01:04 +0200 (Wed, 03 Sep 2008) | 1 line * bootstrapping fix ........ r11696 | florian | 2008-09-03 21:07:18 +0200 (Wed, 03 Sep 2008) | 1 line * more bootstrapping fixed ........ r11698 | florian | 2008-09-03 22:47:54 +0200 (Wed, 03 Sep 2008) | 1 line + two missing compiler procs exported ........ r11701 | florian | 2008-09-04 16:42:34 +0200 (Thu, 04 Sep 2008) | 2 lines + lazarus project for the linux rtl ........ r11702 | florian | 2008-09-04 16:43:27 +0200 (Thu, 04 Sep 2008) | 2 lines + set unicode string procedures ........ r11707 | florian | 2008-09-04 23:23:02 +0200 (Thu, 04 Sep 2008) | 2 lines * fixed several type casting stuff ........ r11712 | florian | 2008-09-05 22:46:03 +0200 (Fri, 05 Sep 2008) | 1 line * fixed unicodestring compilation on windows after recent unix changes ........ r11713 | florian | 2008-09-05 23:35:12 +0200 (Fri, 05 Sep 2008) | 1 line + UnicodeString support for Variants ........ r11715 | florian | 2008-09-06 20:59:54 +0200 (Sat, 06 Sep 2008) | 1 line * patch by Martin Schreiber for UnicodeString streaming ........ r11716 | florian | 2008-09-06 22:22:55 +0200 (Sat, 06 Sep 2008) | 2 lines * fixed test ........ r11717 | florian | 2008-09-07 10:25:51 +0200 (Sun, 07 Sep 2008) | 1 line * fixed typo when converting tunicodestring to punicodechar ........ r11718 | florian | 2008-09-07 11:29:52 +0200 (Sun, 07 Sep 2008) | 3 lines * fixed writing of UnicodeString properties * moved some helper routines to unicode headers ........ r11734 | florian | 2008-09-09 22:38:55 +0200 (Tue, 09 Sep 2008) | 1 line * fixed bootstrapping ........ r11735 | florian | 2008-09-10 11:25:28 +0200 (Wed, 10 Sep 2008) | 2 lines * first fixes for persisten unicodestrings ........ r11736 | florian | 2008-09-10 14:31:00 +0200 (Wed, 10 Sep 2008) | 3 lines Initialized merge tracking via "svnmerge" with revisions "1-11663" from http://svn.freepascal.org/svn/fpc/trunk ........ r11737 | florian | 2008-09-10 21:06:57 +0200 (Wed, 10 Sep 2008) | 3 lines * fixed unicodestring <-> variant handling * fixed unicodestring property reading ........ git-svn-id: trunk@11739 -
		
			
				
	
	
		
			255 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			255 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
{
 | 
						|
    This file is part of the Free Pascal run time library.
 | 
						|
    Copyright (c) 2000 by Florian Klaempfl
 | 
						|
    member of the Free Pascal development team.
 | 
						|
 | 
						|
    This unit implements several classes for charset conversions
 | 
						|
 | 
						|
    See the file COPYING.FPC, included in this distribution,
 | 
						|
    for details about the copyright.
 | 
						|
 | 
						|
    This program is distributed in the hope that it will be useful,
 | 
						|
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | 
						|
 | 
						|
 **********************************************************************}
 | 
						|
 | 
						|
{ this unit is included temporarily for 2.2 bootstrapping and can be
 | 
						|
  removed after the next release after 2.2.2 }
 | 
						|
{$mode objfpc}
 | 
						|
unit ccharset;
 | 
						|
 | 
						|
  interface
 | 
						|
 | 
						|
    type
 | 
						|
       tunicodechar = word;
 | 
						|
       tunicodestring = ^tunicodechar;
 | 
						|
 | 
						|
       tcsconvert = class
 | 
						|
         // !!!!!!1constructor create;
 | 
						|
       end;
 | 
						|
 | 
						|
       tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
 | 
						|
         umf_unused);
 | 
						|
 | 
						|
       punicodecharmapping = ^tunicodecharmapping;
 | 
						|
       tunicodecharmapping = record
 | 
						|
          unicode : tunicodechar;
 | 
						|
          flag : tunicodecharmappingflag;
 | 
						|
          reserved : byte;
 | 
						|
       end;
 | 
						|
 | 
						|
       punicodemap = ^tunicodemap;
 | 
						|
       tunicodemap = record
 | 
						|
          cpname : string[20];
 | 
						|
          map : punicodecharmapping;
 | 
						|
          lastchar : longint;
 | 
						|
          next : punicodemap;
 | 
						|
          internalmap : boolean;
 | 
						|
       end;
 | 
						|
 | 
						|
       tcp2unicode = class(tcsconvert)
 | 
						|
       end;
 | 
						|
 | 
						|
    function loadunicodemapping(const cpname,f : string) : punicodemap;
 | 
						|
    procedure registermapping(p : punicodemap);
 | 
						|
    function getmap(const s : string) : punicodemap;
 | 
						|
    function mappingavailable(const s : string) : boolean;
 | 
						|
    function getunicode(c : char;p : punicodemap) : tunicodechar;
 | 
						|
    function getascii(c : tunicodechar;p : punicodemap) : string;
 | 
						|
 | 
						|
  implementation
 | 
						|
 | 
						|
    var
 | 
						|
       mappings : punicodemap;
 | 
						|
 | 
						|
    function loadunicodemapping(const cpname,f : string) : punicodemap;
 | 
						|
 | 
						|
      var
 | 
						|
         data : punicodecharmapping;
 | 
						|
         datasize : longint;
 | 
						|
         t : text;
 | 
						|
         s,hs : string;
 | 
						|
         scanpos,charpos,unicodevalue : longint;
 | 
						|
         code : word;
 | 
						|
         flag : tunicodecharmappingflag;
 | 
						|
         p : punicodemap;
 | 
						|
         lastchar : longint;
 | 
						|
 | 
						|
      begin
 | 
						|
         lastchar:=-1;
 | 
						|
         loadunicodemapping:=nil;
 | 
						|
         datasize:=256;
 | 
						|
         getmem(data,sizeof(tunicodecharmapping)*datasize);
 | 
						|
         assign(t,f);
 | 
						|
         {$I-}
 | 
						|
         reset(t);
 | 
						|
         {$I+}
 | 
						|
         if ioresult<>0 then
 | 
						|
           begin
 | 
						|
              freemem(data,sizeof(tunicodecharmapping)*datasize);
 | 
						|
              exit;
 | 
						|
           end;
 | 
						|
         while not(eof(t)) do
 | 
						|
           begin
 | 
						|
              readln(t,s);
 | 
						|
              if (s[1]='0') and (s[2]='x') then
 | 
						|
                begin
 | 
						|
                   flag:=umf_unused;
 | 
						|
                   scanpos:=3;
 | 
						|
                   hs:='$';
 | 
						|
                   while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
 | 
						|
                     begin
 | 
						|
                        hs:=hs+s[scanpos];
 | 
						|
                        inc(scanpos);
 | 
						|
                     end;
 | 
						|
                   val(hs,charpos,code);
 | 
						|
                   if code<>0 then
 | 
						|
                     begin
 | 
						|
                        freemem(data,sizeof(tunicodecharmapping)*datasize);
 | 
						|
                        close(t);
 | 
						|
                        exit;
 | 
						|
                     end;
 | 
						|
                   while not(s[scanpos] in ['0','#']) do
 | 
						|
                     inc(scanpos);
 | 
						|
                   if s[scanpos]='#' then
 | 
						|
                     begin
 | 
						|
                        { special char }
 | 
						|
                        unicodevalue:=$ffff;
 | 
						|
                        hs:=copy(s,scanpos,length(s)-scanpos+1);
 | 
						|
                        if hs='#DBCS LEAD BYTE' then
 | 
						|
                          flag:=umf_leadbyte;
 | 
						|
                     end
 | 
						|
                   else
 | 
						|
                     begin
 | 
						|
                        { C hex prefix }
 | 
						|
                        inc(scanpos,2);
 | 
						|
                        hs:='$';
 | 
						|
                        while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
 | 
						|
                          begin
 | 
						|
                             hs:=hs+s[scanpos];
 | 
						|
                             inc(scanpos);
 | 
						|
                          end;
 | 
						|
                        val(hs,unicodevalue,code);
 | 
						|
                        if code<>0 then
 | 
						|
                          begin
 | 
						|
                             freemem(data,sizeof(tunicodecharmapping)*datasize);
 | 
						|
                             close(t);
 | 
						|
                             exit;
 | 
						|
                          end;
 | 
						|
                        if charpos>datasize then
 | 
						|
                          begin
 | 
						|
                             { allocate 1024 bytes more because         }
 | 
						|
                             { if we need more than 256 entries it's    }
 | 
						|
                             { probably a mbcs with a lot of            }
 | 
						|
                             { entries                                  }
 | 
						|
                             datasize:=charpos+1024;
 | 
						|
                             reallocmem(data,sizeof(tunicodecharmapping)*datasize);
 | 
						|
                          end;
 | 
						|
                        flag:=umf_noinfo;
 | 
						|
                     end;
 | 
						|
                   data[charpos].flag:=flag;
 | 
						|
                   data[charpos].unicode:=unicodevalue;
 | 
						|
                   if charpos>lastchar then
 | 
						|
                     lastchar:=charpos;
 | 
						|
                end;
 | 
						|
           end;
 | 
						|
         close(t);
 | 
						|
         new(p);
 | 
						|
         p^.lastchar:=lastchar;
 | 
						|
         p^.cpname:=cpname;
 | 
						|
         p^.internalmap:=false;
 | 
						|
         p^.next:=nil;
 | 
						|
         p^.map:=data;
 | 
						|
         loadunicodemapping:=p;
 | 
						|
      end;
 | 
						|
 | 
						|
    procedure registermapping(p : punicodemap);
 | 
						|
 | 
						|
      begin
 | 
						|
         p^.next:=mappings;
 | 
						|
         mappings:=p;
 | 
						|
      end;
 | 
						|
 | 
						|
    function getmap(const s : string) : punicodemap;
 | 
						|
 | 
						|
      var
 | 
						|
         hp : punicodemap;
 | 
						|
 | 
						|
      const
 | 
						|
         mapcache : string = '';
 | 
						|
         mapcachep : punicodemap = nil;
 | 
						|
 | 
						|
      begin
 | 
						|
         if (mapcache=s) and assigned(mapcachep) and (mapcachep^.cpname=s) then
 | 
						|
           begin
 | 
						|
              getmap:=mapcachep;
 | 
						|
              exit;
 | 
						|
           end;
 | 
						|
         hp:=mappings;
 | 
						|
         while assigned(hp) do
 | 
						|
           begin
 | 
						|
              if hp^.cpname=s then
 | 
						|
                begin
 | 
						|
                   getmap:=hp;
 | 
						|
                   mapcache:=s;
 | 
						|
                   mapcachep:=hp;
 | 
						|
                   exit;
 | 
						|
                end;
 | 
						|
              hp:=hp^.next;
 | 
						|
           end;
 | 
						|
         getmap:=nil;
 | 
						|
      end;
 | 
						|
 | 
						|
    function mappingavailable(const s : string) : boolean;
 | 
						|
 | 
						|
      begin
 | 
						|
         mappingavailable:=getmap(s)<>nil;
 | 
						|
      end;
 | 
						|
 | 
						|
    function getunicode(c : char;p : punicodemap) : tunicodechar;
 | 
						|
 | 
						|
      begin
 | 
						|
         if ord(c)<=p^.lastchar then
 | 
						|
           getunicode:=p^.map[ord(c)].unicode
 | 
						|
         else
 | 
						|
           getunicode:=0;
 | 
						|
      end;
 | 
						|
 | 
						|
    function getascii(c : tunicodechar;p : punicodemap) : string;
 | 
						|
 | 
						|
      var
 | 
						|
         i : longint;
 | 
						|
 | 
						|
      begin
 | 
						|
         { at least map to space }
 | 
						|
         getascii:=#32;
 | 
						|
         for i:=0 to p^.lastchar do
 | 
						|
           if p^.map[i].unicode=c then
 | 
						|
             begin
 | 
						|
                if i<256 then
 | 
						|
                  getascii:=chr(i)
 | 
						|
                else
 | 
						|
                  getascii:=chr(i div 256)+chr(i mod 256);
 | 
						|
                exit;
 | 
						|
             end;
 | 
						|
      end;
 | 
						|
 | 
						|
  var
 | 
						|
     hp : punicodemap;
 | 
						|
 | 
						|
initialization
 | 
						|
  mappings:=nil;
 | 
						|
finalization
 | 
						|
  while assigned(mappings) do
 | 
						|
    begin
 | 
						|
       hp:=mappings^.next;
 | 
						|
       if not(mappings^.internalmap) then
 | 
						|
         begin
 | 
						|
            freemem(mappings^.map);
 | 
						|
            dispose(mappings);
 | 
						|
         end;
 | 
						|
       mappings:=hp;
 | 
						|
    end;
 | 
						|
end.
 |