mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-10-25 07:41:47 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			253 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			253 lines
		
	
	
		
			7.1 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {
 | |
|     This file is part of the Free Pascal run time library.
 | |
|     Copyright (c) 2000 by Florian Klaempfl
 | |
|     member of the Free Pascal development team.
 | |
| 
 | |
|     This unit implements several classes for charset conversions
 | |
| 
 | |
|     See the file COPYING.FPC, included in this distribution,
 | |
|     for details about the copyright.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
|  **********************************************************************
 | |
| }
 | |
| unit charset;
 | |
| 
 | |
| {$i fpcdefs.inc}
 | |
| 
 | |
|   interface
 | |
| 
 | |
|     type
 | |
|        tunicodechar = word;
 | |
|        tunicodestring = ^tunicodechar;
 | |
| 
 | |
|        tcsconvert = class
 | |
|          // !!!!!!1constructor create;
 | |
|        end;
 | |
| 
 | |
|        tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
 | |
|          umf_unused);
 | |
| 
 | |
|        punicodecharmapping = ^tunicodecharmapping;
 | |
|        tunicodecharmapping = record
 | |
|           unicode : tunicodechar;
 | |
|           flag : tunicodecharmappingflag;
 | |
|           reserved : byte;
 | |
|        end;
 | |
| 
 | |
|        punicodemap = ^tunicodemap;
 | |
|        tunicodemap = record
 | |
|           cpname : string[20];
 | |
|           map : punicodecharmapping;
 | |
|           lastchar : longint;
 | |
|           next : punicodemap;
 | |
|           internalmap : boolean;
 | |
|        end;
 | |
| 
 | |
|        tcp2unicode = class(tcsconvert)
 | |
|        end;
 | |
| 
 | |
|     function loadunicodemapping(const cpname,f : string) : punicodemap;
 | |
|     procedure registermapping(p : punicodemap);
 | |
|     function getmap(const s : string) : punicodemap;
 | |
|     function mappingavailable(const s : string) : boolean;
 | |
|     function getunicode(c : char;p : punicodemap) : tunicodechar;
 | |
|     function getascii(c : tunicodechar;p : punicodemap) : string;
 | |
| 
 | |
|   implementation
 | |
| 
 | |
|     var
 | |
|        mappings : punicodemap;
 | |
| 
 | |
|     function loadunicodemapping(const cpname,f : string) : punicodemap;
 | |
| 
 | |
|       var
 | |
|          data : punicodecharmapping;
 | |
|          datasize : longint;
 | |
|          t : text;
 | |
|          s,hs : string;
 | |
|          scanpos,charpos,unicodevalue : longint;
 | |
|          code : integer;
 | |
|          flag : tunicodecharmappingflag;
 | |
|          p : punicodemap;
 | |
|          lastchar : longint;
 | |
| 
 | |
|       begin
 | |
|          lastchar:=-1;
 | |
|          loadunicodemapping:=nil;
 | |
|          datasize:=256;
 | |
|          getmem(data,sizeof(tunicodecharmapping)*datasize);
 | |
|          assign(t,f);
 | |
|          {$I-}
 | |
|          reset(t);
 | |
|          {$I+}
 | |
|          if ioresult<>0 then
 | |
|            begin
 | |
|               freemem(data,sizeof(tunicodecharmapping)*datasize);
 | |
|               exit;
 | |
|            end;
 | |
|          while not(eof(t)) do
 | |
|            begin
 | |
|               readln(t,s);
 | |
|               if (s[1]='0') and (s[2]='x') then
 | |
|                 begin
 | |
|                    flag:=umf_unused;
 | |
|                    scanpos:=3;
 | |
|                    hs:='$';
 | |
|                    while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
 | |
|                      begin
 | |
|                         hs:=hs+s[scanpos];
 | |
|                         inc(scanpos);
 | |
|                      end;
 | |
|                    val(hs,charpos,code);
 | |
|                    if code<>0 then
 | |
|                      begin
 | |
|                         freemem(data,sizeof(tunicodecharmapping)*datasize);
 | |
|                         close(t);
 | |
|                         exit;
 | |
|                      end;
 | |
|                    while not(s[scanpos] in ['0','#']) do
 | |
|                      inc(scanpos);
 | |
|                    if s[scanpos]='#' then
 | |
|                      begin
 | |
|                         { special char }
 | |
|                         unicodevalue:=$ffff;
 | |
|                         hs:=copy(s,scanpos,length(s)-scanpos+1);
 | |
|                         if hs='#DBCS LEAD BYTE' then
 | |
|                           flag:=umf_leadbyte;
 | |
|                      end
 | |
|                    else
 | |
|                      begin
 | |
|                         { C hex prefix }
 | |
|                         inc(scanpos,2);
 | |
|                         hs:='$';
 | |
|                         while s[scanpos] in ['0'..'9','A'..'F','a'..'f'] do
 | |
|                           begin
 | |
|                              hs:=hs+s[scanpos];
 | |
|                              inc(scanpos);
 | |
|                           end;
 | |
|                         val(hs,unicodevalue,code);
 | |
|                         if code<>0 then
 | |
|                           begin
 | |
|                              freemem(data,sizeof(tunicodecharmapping)*datasize);
 | |
|                              close(t);
 | |
|                              exit;
 | |
|                           end;
 | |
|                         if charpos>datasize then
 | |
|                           begin
 | |
|                              { allocate 1024 bytes more because         }
 | |
|                              { if we need more than 256 entries it's    }
 | |
|                              { probably a mbcs with a lot of            }
 | |
|                              { entries                                  }
 | |
|                              datasize:=charpos+1024;
 | |
|                              reallocmem(data,sizeof(tunicodecharmapping)*datasize);
 | |
|                           end;
 | |
|                         flag:=umf_noinfo;
 | |
|                      end;
 | |
|                    data[charpos].flag:=flag;
 | |
|                    data[charpos].unicode:=unicodevalue;
 | |
|                    if charpos>lastchar then
 | |
|                      lastchar:=charpos;
 | |
|                 end;
 | |
|            end;
 | |
|          close(t);
 | |
|          new(p);
 | |
|          p^.lastchar:=lastchar;
 | |
|          p^.cpname:=cpname;
 | |
|          p^.internalmap:=false;
 | |
|          p^.next:=nil;
 | |
|          p^.map:=data;
 | |
|          loadunicodemapping:=p;
 | |
|       end;
 | |
| 
 | |
|     procedure registermapping(p : punicodemap);
 | |
| 
 | |
|       begin
 | |
|          p^.next:=mappings;
 | |
|          mappings:=p;
 | |
|       end;
 | |
| 
 | |
|     function getmap(const s : string) : punicodemap;
 | |
| 
 | |
|       var
 | |
|          hp : punicodemap;
 | |
| 
 | |
|       const
 | |
|          mapcachep : punicodemap = nil;
 | |
| 
 | |
|       begin
 | |
|          if assigned(mapcachep) and
 | |
|             (mapcachep^.cpname=s) then
 | |
|            begin
 | |
|               getmap:=mapcachep;
 | |
|               exit;
 | |
|            end;
 | |
|          hp:=mappings;
 | |
|          while assigned(hp) do
 | |
|            begin
 | |
|               if hp^.cpname=s then
 | |
|                 begin
 | |
|                    getmap:=hp;
 | |
|                    mapcachep:=hp;
 | |
|                    exit;
 | |
|                 end;
 | |
|               hp:=hp^.next;
 | |
|            end;
 | |
|          getmap:=nil;
 | |
|       end;
 | |
| 
 | |
|     function mappingavailable(const s : string) : boolean;
 | |
| 
 | |
|       begin
 | |
|          mappingavailable:=getmap(s)<>nil;
 | |
|       end;
 | |
| 
 | |
|     function getunicode(c : char;p : punicodemap) : tunicodechar;
 | |
| 
 | |
|       begin
 | |
|          if ord(c)<=p^.lastchar then
 | |
|            getunicode:=p^.map[ord(c)].unicode
 | |
|          else
 | |
|            getunicode:=0;
 | |
|       end;
 | |
| 
 | |
|     function getascii(c : tunicodechar;p : punicodemap) : string;
 | |
| 
 | |
|       var
 | |
|          i : longint;
 | |
| 
 | |
|       begin
 | |
|          { at least map to space }
 | |
|          getascii:=#32;
 | |
|          for i:=0 to p^.lastchar do
 | |
|            if p^.map[i].unicode=c then
 | |
|              begin
 | |
|                 if i<256 then
 | |
|                   getascii:=chr(i)
 | |
|                 else
 | |
|                   getascii:=chr(i div 256)+chr(i mod 256);
 | |
|                 exit;
 | |
|              end;
 | |
|       end;
 | |
| 
 | |
|   var
 | |
|      hp : punicodemap;
 | |
| 
 | |
| initialization
 | |
|   mappings:=nil;
 | |
| finalization
 | |
|   while assigned(mappings) do
 | |
|     begin
 | |
|        hp:=mappings^.next;
 | |
|        if not(mappings^.internalmap) then
 | |
|          begin
 | |
|             freemem(mappings^.map);
 | |
|             dispose(mappings);
 | |
|          end;
 | |
|        mappings:=hp;
 | |
|     end;
 | |
| end.
 | 
