mirror of
				https://gitlab.com/freepascal.org/lazarus/lazarus.git
				synced 2025-10-31 09:21:43 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			245 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			245 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| {%MainUnit ../lconvencoding.pp}
 | |
| 
 | |
| {
 | |
|  *****************************************************************************
 | |
|   This file is part of LazUtils.
 | |
| 
 | |
|   See the file COPYING.modifiedLGPL.txt, included in this distribution,
 | |
|   for details about the license.
 | |
|  *****************************************************************************
 | |
| 
 | |
|   Asian Unicode Functions.
 | |
|   The clipboard is able to work with the windows and gtk behaviour/features.
 | |
| }
 | |
| 
 | |
| function DBCSToUTF8(const s: string; const ArrayUni, ArrayCP: array of word): string;
 | |
| var
 | |
|   len, l: Integer;
 | |
|   Src, Dest: PChar;
 | |
|   c: char;
 | |
|   code: word;
 | |
| begin
 | |
|   if s = '' then exit('');
 | |
|   len := length(s);
 | |
|   SetLength(Result, len * 4);// Asia UTF-8 is at most 4 bytes
 | |
|   Src  := PChar(s);
 | |
|   Dest := PChar(Result);
 | |
|   repeat
 | |
|     c := Src^;
 | |
|     Inc(Src);
 | |
|     if Ord(c) < 128 then
 | |
|     begin
 | |
|       if (c=#0) and (Src-PChar(s)>=len) then break;
 | |
|       Dest^ := c;
 | |
|       Inc(Dest);
 | |
|     end
 | |
|     else
 | |
|     begin
 | |
|       code := Byte(c) shl 8;
 | |
|       c:=Src^;
 | |
|       if (c=#0) and (Src-PChar(s)>=len) then break;
 | |
|       code := code + Byte(c);
 | |
|       Inc(Src);
 | |
| 
 | |
|       code := ArrayUni[SearchTable(ArrayCP, code)];
 | |
|       if code>0 then
 | |
|       begin
 | |
|         l:=UnicodeToUTF8Inline(code,Dest);
 | |
|         inc(Dest,l);
 | |
|       end
 | |
|       else
 | |
|       case ConvertEncodingErrorMode of
 | |
|         ceemSkip:
 | |
|           begin end;
 | |
|         ceemException:
 | |
|           raise EConvertError.Create('Cannot convert DBCS code page to UTF8');
 | |
|         ceemReplace:
 | |
|           begin
 | |
|             Dest^:='?';
 | |
|             Inc(Dest);
 | |
|           end;
 | |
|         ceemReturnEmpty:
 | |
|           Exit('');
 | |
|       end;
 | |
|     end;
 | |
|   until false;
 | |
|   SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
 | |
| end;
 | |
| 
 | |
| function CP936ToUTF8(const s: string): string;
 | |
| begin
 | |
|   Result := DBCSToUTF8(s, Uni936C, CP936CC);
 | |
| end;
 | |
| 
 | |
| function CP950ToUTF8(const s: string): string;
 | |
| begin
 | |
|   Result := DBCSToUTF8(s, Uni950C, CP950CC);
 | |
| end;
 | |
| 
 | |
| function CP949ToUTF8(const s: string): string;
 | |
| begin
 | |
|   Result := DBCSToUTF8(s, Uni949C, CP949CC);
 | |
| end;
 | |
| 
 | |
| function CP932ToUTF8(const s: string): string;
 | |
| begin
 | |
|   Result := DBCSToUTF8(s, Uni932C, CP932CC);
 | |
| end;
 | |
| 
 | |
| {$IfNDef UseSystemCPConv}
 | |
| function UnicodeToCP936(Unicode: cardinal): integer;
 | |
| begin
 | |
|   case Unicode of
 | |
|     0..127: Result := Unicode;
 | |
|     else
 | |
|       Result := CP936CU[SearchTable(Uni936U, Unicode)];
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| function UnicodeToCP950(Unicode: cardinal): integer;
 | |
| begin
 | |
|   case Unicode of
 | |
|     0..127: Result := Unicode;
 | |
|     else
 | |
|       Result := CP950CU[SearchTable(Uni950U, Unicode)];
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| function UnicodeToCP949(Unicode: cardinal): integer;
 | |
| begin
 | |
|   case Unicode of
 | |
|     0..127: Result := Unicode;
 | |
|     else
 | |
|       Result := CP949CU[SearchTable(Uni949U, Unicode)];
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| function UnicodeToCP932(Unicode: cardinal): integer;
 | |
| begin
 | |
|   case Unicode of
 | |
|     0..127: Result := Unicode;
 | |
|     else
 | |
|       Result := CP932CU[SearchTable(Uni932U, Unicode)];
 | |
|   end;
 | |
| end;
 | |
| {$endif}
 | |
| 
 | |
| {$ifdef FPC_HAS_CPSTRING}
 | |
| procedure InternalUTF8ToDBCS(const s: string; TargetCodePage: TSystemCodePage;
 | |
|   SetTargetCodePage: boolean;
 | |
|   {$IfNDef UseSystemCPConv}const UTF8CharConvFunc: TUnicodeToCharID;{$endif}
 | |
|   out TheResult: RawByteString); inline;
 | |
| begin
 | |
|   {$ifdef UseSystemCPConv}
 | |
|   TheResult:=s;
 | |
|   SetCodePage(TheResult, TargetCodePage, True);
 | |
|   if not SetTargetCodePage then
 | |
|     SetCodePage(TheResult, CP_ACP, False);
 | |
|   {$else}
 | |
|   TheResult:=UTF8ToDBCS(s,UTF8CharConvFunc);
 | |
|   if SetTargetCodePage then
 | |
|     SetCodePage(TheResult, TargetCodePage, False);
 | |
|   {$endif}
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP932(const s: string; SetTargetCodePage: boolean): RawByteString;
 | |
| begin
 | |
|   InternalUTF8ToDBCS(s,932,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP932{$endif},Result);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP936(const s: string; SetTargetCodePage: boolean): RawByteString;
 | |
| begin
 | |
|   InternalUTF8ToDBCS(s,936,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP936{$endif},Result);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP949(const s: string; SetTargetCodePage: boolean): RawByteString;
 | |
| begin
 | |
|   InternalUTF8ToDBCS(s,949,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP949{$endif},Result);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP950(const s: string; SetTargetCodePage: boolean): RawByteString;
 | |
| begin
 | |
|   InternalUTF8ToDBCS(s,950,SetTargetCodePage{$IfNDef UseSystemCPConv},@UnicodeToCP950{$endif},Result);
 | |
| end;
 | |
| {$ELSE}
 | |
| function UTF8ToCP932(const s: string): string;
 | |
| begin
 | |
|   Result := UTF8ToDBCS(s, @UnicodeToCP932);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP936(const s: string): string;
 | |
| begin
 | |
|   Result := UTF8ToDBCS(s, @UnicodeToCP936);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP949(const s: string): string;
 | |
| begin
 | |
|   Result := UTF8ToDBCS(s, @UnicodeToCP949);
 | |
| end;
 | |
| 
 | |
| function UTF8ToCP950(const s: string): string;
 | |
| begin
 | |
|   Result := UTF8ToDBCS(s, @UnicodeToCP950);
 | |
| end;
 | |
| {$ENDIF}
 | |
| 
 | |
| function UTF8ToDBCS(const s: string; const UTF8CharConvFunc: TUnicodeToCharID): string;
 | |
| var
 | |
|   len, i, CharLen: integer;
 | |
|   Src, Dest: PChar;
 | |
|   c: char;
 | |
|   Unicode: longword;
 | |
| begin
 | |
|   if s = '' then exit('');
 | |
|   len := length(s);
 | |
|   SetLength(Result, len); // DBCS needs at most space as UTF-8
 | |
|   Src  := PChar(s);
 | |
|   Dest := PChar(Result);
 | |
|   repeat
 | |
|     c := Src^;
 | |
|     if c < #128 then
 | |
|     begin
 | |
|       if (c=#0) and (Src-PChar(s)>=len) then break;
 | |
|       Dest^ := c;
 | |
|       Inc(Dest);
 | |
|       Inc(Src);
 | |
|     end
 | |
|     else
 | |
|     begin
 | |
|       Unicode := UTF8CodepointToUnicode(Src, CharLen);
 | |
|       Inc(Src, CharLen);
 | |
|       i := UTF8CharConvFunc(Unicode);
 | |
|       //writeln(Format('%X', [i]));
 | |
|       if i >= 0 then
 | |
|       begin
 | |
|         if i > $ff then
 | |
|         begin
 | |
|           Dest^ := chr(i shr 8);
 | |
|           Inc(Dest);
 | |
|           Dest^ := chr(i);
 | |
|         end
 | |
|         else
 | |
|           Dest^ := chr(i);
 | |
|         Inc(Dest);
 | |
|       end
 | |
|       else
 | |
|       case ConvertEncodingErrorMode of
 | |
|         ceemSkip:
 | |
|           begin end;
 | |
|         ceemException:
 | |
|           raise EConvertError.Create('Cannot convert UTF8 to DBCS code page');
 | |
|         ceemReplace:
 | |
|           begin
 | |
|             Dest^ := '?';
 | |
|             Inc(Dest);
 | |
|           end;
 | |
|         ceemReturnEmpty:
 | |
|           Exit('');
 | |
|       end;
 | |
|     end;
 | |
|   until false;
 | |
|   //SetLength(Result, Dest - PChar(Result));
 | |
|   SetLength(Result, {%H-}PtrUInt(Dest) - PtrUInt(Result));
 | |
| end;
 | |
| 
 | 
