Patch from JiXian Yang, improves asian encoding support

git-svn-id: trunk@27176 -
2025-12-16 02:20:30 +01:00 · 2010-08-24 08:42:29 +00:00 · 2010-08-24 08:42:29 +00:00 · 0ee7468af1
commit 0ee7468af1
parent a2046cf61b
3 changed files with 248 additions and 227 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -4444,6 +4444,7 @@ lcl/imglist.pp svneol=native#text/pascal
 lcl/include/actionlink.inc svneol=native#text/pascal
 lcl/include/application.inc svneol=native#text/pascal
 lcl/include/applicationproperties.inc svneol=native#text/pascal
+lcl/include/asiancodepagefunctions.inc svneol=native#text/plain
 lcl/include/asiancodepages.inc svneol=native#text/plain
 lcl/include/bevel.inc svneol=native#text/pascal
 lcl/include/bitbtn.inc svneol=native#text/pascal
--- a/lcl/include/asiancodepagefunctions.inc
+++ b/lcl/include/asiancodepagefunctions.inc
@ -0,0 +1,228 @@
+{%MainUnit ../lconvencoding.pp}
+
+{******************************************************************************
+                               Asian Unicode Functions
+ ******************************************************************************
+
+ *****************************************************************************
+ *                                                                           *
+ *  This file is part of the Lazarus Component Library (LCL)                 *
+ *                                                                           *
+ *  See the file COPYING.modifiedLGPL.txt, included in this distribution,    *
+ *  for details about the copyright.                                         *
+ *                                                                           *
+ *  This program is distributed in the hope that it will be useful,          *
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of           *
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                     *
+ *                                                                           *
+ *****************************************************************************
+
+  The clipboard is able to work with the windows and gtk behaviour/features.
+}
+
+function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
+var
+  len:  integer;
+  i, j:    integer;
+  Src:  PChar;
+  Dest: PChar;
+  p:    PChar;
+  c:    char;
+  tempstr: ansistring;
+  tempint: integer;
+begin
+  SetLength(tempstr, 4);
+  if s = '' then
+  begin
+    Result := s;
+    exit;
+  end;
+  len := length(s);
+  SetLength(Result, len * 6);// Asia UTF-8 is at most 6 bytes
+  Src  := PChar(s);
+  Dest := PChar(Result);
+  i    := 1;
+  while i < len do
+  begin
+    c := Src^;
+    Inc(Src);
+    i := i + 1;
+    if Ord(c) < 128 then
+    begin
+      Dest^ := c;
+      Inc(Dest);
+    end
+    else
+    begin
+      TempStr[2] := c;
+      if i <= len then
+      begin
+        TempStr[1] := Src^;
+        i := i + 1;
+      end
+      else
+        TempStr[1] := #0;
+      TempStr[4]   := #0;
+      TempStr[3]   := #0;
+      tempint      := PInteger(@TempStr[1])^;
+      Inc(Src);
+
+      case CodeP of
+        936:
+          tempint := Uni936C[SearchTable(CP936CC, tempint)];
+        950:
+          tempint := Uni950C[SearchTable(CP950CC, tempint)];
+        949:
+          tempint := Uni949C[SearchTable(CP949CC, tempint)];
+        932:
+          tempint := Uni932C[SearchTable(CP932CC, tempint)];
+        else
+          tempint := -1;
+      end;
+
+      if tempint <> -1 then
+      begin
+        TempStr := UnicodeToUTF8(tempint);
+
+        for j := 1 to Length(TempStr) do
+        begin
+          Dest^ := TempStr[j];
+          Inc(Dest);
+        end;
+      end;
+    end;
+  end;
+  SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
+end;
+
+function CP936ToUTF8(const s: string): string;
+begin
+  Result := SingleByteToUTF8Ex(s, 936);
+end;
+
+function CP950ToUTF8(const s: string): string;
+begin
+  Result := SingleByteToUTF8Ex(s, 950);
+end;
+
+function CP949ToUTF8(const s: string): string;
+begin
+  Result := SingleByteToUTF8Ex(s, 949);
+end;
+
+function CP932ToUTF8(const s: string): string;
+begin
+  Result := SingleByteToUTF8Ex(s, 932);
+end;
+
+function UnicodeToCP936(Unicode: cardinal): integer;
+begin
+  case Unicode of
+    0..127: Result := Unicode;
+    else
+      Result := CP936CU[SearchTable(Uni936U, Unicode)];
+  end;
+end;
+
+function UnicodeToCP950(Unicode: cardinal): integer;
+begin
+  case Unicode of
+    0..127: Result := Unicode;
+    else
+      Result := CP950CU[SearchTable(Uni950U, Unicode)];
+  end;
+end;
+
+function UnicodeToCP949(Unicode: cardinal): integer;
+begin
+  case Unicode of
+    0..127: Result := Unicode;
+    else
+      Result := CP949CU[SearchTable(Uni949U, Unicode)];
+  end;
+end;
+
+function UnicodeToCP932(Unicode: cardinal): integer;
+begin
+  case Unicode of
+    0..127: Result := Unicode;
+    else
+      Result := CP932CU[SearchTable(Uni932U, Unicode)];
+  end;
+end;
+
+function UTF8ToSingleByteEx(const s: string;
+  const UTF8CharConvFunc: TUnicodeToCharID): string;
+var
+  len:  integer;
+  Src:  PChar;
+  Dest: PChar;
+  c:    char;
+  Unicode: longword;
+  CharLen: integer;
+  i:    integer;
+begin
+  if s = '' then
+  begin
+    Result := '';
+    exit;
+  end;
+  len := length(s);
+  SetLength(Result, len);
+  Src  := PChar(s);
+  Dest := PChar(Result);
+  while len > 0 do
+  begin
+    c := Src^;
+    if c < #128 then
+    begin
+      Dest^ := c;
+      Inc(Dest);
+      Inc(Src);
+      Dec(len);
+    end
+    else
+    begin
+      Unicode := UTF8CharacterToUnicode(Src, CharLen);
+      Inc(Src, CharLen);
+      Dec(len, CharLen);
+      i := UTF8CharConvFunc(Unicode);
+      //writeln(Format('%X', [i]));
+      if i >= 0 then
+      begin
+        if i > $ff then
+        begin
+          Dest^ := chr(i shr 8);
+          Inc(Dest);
+          Dest^ := chr(i);
+        end
+        else
+          Dest^ := chr(i);
+        Inc(Dest);
+      end;
+    end;
+  end;
+  //SetLength(Result, Dest - PChar(Result));
+  SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
+end;
+
+function UTF8ToCP936(const s: string): string;
+begin
+  Result := UTF8ToSingleByteEx(s, @UnicodeToCP936);
+end;
+
+function UTF8ToCP950(const s: string): string;
+begin
+  Result := UTF8ToSingleByteEx(s, @UnicodeToCP950);
+end;
+
+function UTF8ToCP949(const s: string): string;
+begin
+  Result := UTF8ToSingleByteEx(s, @UnicodeToCP949);
+end;
+
+function UTF8ToCP932(const s: string): string;
+begin
+  Result := UTF8ToSingleByteEx(s, @UnicodeToCP932);
+end;
+
--- a/lcl/lconvencoding.pas
+++ b/lcl/lconvencoding.pas
@ -67,13 +67,8 @@ function CP850ToUTF8(const s: string): string;  // DOS western europe
 function CP866ToUTF8(const s: string): string;  // DOS and Windows console's cyrillic
 function CP874ToUTF8(const s: string): string;  // thai
 function KOI8ToUTF8(const s: string): string;  // russian cyrillic
-function CP936ToUTF8(const s: string): string;      // Chinese
-function CP950ToUTF8(const s: string): string;      // Chinese Complex
-function CP949ToUTF8(const s: string): string;      // korea
-function CP932ToUTF8(const s: string): string;      // japanese
 function SingleByteToUTF8(const s: string;
                          const Table: TCharToUTF8Table): string;
-function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;                          
 function UCS2LEToUTF8(const s: string): string; // UCS2-LE 2byte little endian
 function UCS2BEToUTF8(const s: string): string; // UCS2-BE 2byte big endian

@ -94,16 +89,27 @@ function UTF8ToCP850(const s: string): string;  // DOS western europe
 function UTF8ToCP866(const s: string): string;  // DOS and Windows console's cyrillic
 function UTF8ToCP874(const s: string): string;  // thai
 function UTF8ToKOI8(const s: string): string;  // russian cyrillic
+function UTF8ToSingleByte(const s: string;
+                          const UTF8CharConvFunc: TUnicodeToCharID): string;
+function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian
+function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian
+
+// Asian encodings
+
+function CP936ToUTF8(const s: string): string;      // Chinese
+function CP950ToUTF8(const s: string): string;      // Chinese Complex
+function CP949ToUTF8(const s: string): string;      // korea
+function CP932ToUTF8(const s: string): string;      // japanese
+
+function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
+
 function UTF8ToCP936(const s: string): string;      // Chinese, essentially the same as GB 2312 and a predecessor to GB 18030
 function UTF8ToCP950(const s: string): string;      // Chinese Complex
 function UTF8ToCP949(const s: string): string;      // korea
 function UTF8ToCP932(const s: string): string;      // japanese
-function UTF8ToSingleByte(const s: string;
-                          const UTF8CharConvFunc: TUnicodeToCharID): string;
+
 function UTF8ToSingleByteEx(const s: string;
-                          const UTF8CharConvFunc: TUnicodeToCharID): string;                          
-function UTF8ToUCS2LE(const s: string): string; // UCS2-LE 2byte little endian
-function UTF8ToUCS2BE(const s: string): string; // UCS2-BE 2byte big endian
+                          const UTF8CharConvFunc: TUnicodeToCharID): string;

 procedure GetSupportedEncodings(List: TStrings);

@ -113,11 +119,12 @@ implementation
 uses Windows;
 {$ENDIF}

-{$include include/asiancodepages.inc}
-
 var EncodingValid: boolean = false;
    DefaultTextEncoding: string = EncodingAnsi;

+{$include include/asiancodepages.inc}
+{$include include/asiancodepagefunctions.inc}
+
 {$IFDEF Windows}
 function GetWindowsEncoding: string;
 var
@ -4461,109 +4468,6 @@ begin
  SetLength(Result,PtrUInt(Dest)-PtrUInt(Result));
 end;

-function SingleByteToUTF8Ex(const s: string; CodeP: integer): string;
-var
-  len:  integer;
-  i, j:    integer;
-  Src:  PChar;
-  Dest: PChar;
-  p:    PChar;
-  c:    char;
-  tempstr: ansistring;
-  tempint: integer;
-begin
-  SetLength(tempstr, 4);
-  if s = '' then
-  begin
-    Result := s;
-    exit;
-  end;
-  len := length(s);
-  SetLength(Result, len * 6);// UTF-8 is at most 6 bytes
-  Src  := PChar(s);
-  Dest := PChar(Result);
-  //for i:=1 to len do begin
-  i    := 1;
-  while i < len do
-  begin
-    c := Src^;
-    Inc(Src);
-    i := i + 1;
-    if Ord(c) < 128 then
-    begin
-      Dest^ := c;
-      Inc(Dest);
-      //writeln(Format('%X', [Byte(c)]));
-    end
-    else
-    begin
-      //p:=Table[c];
-      TempStr[2] := c;
-      if i <= len then
-      begin
-        TempStr[1] := Src^;
-        i := i + 1;
-      end
-      else
-        TempStr[1] := #0;
-      TempStr[4]   := #0;
-      TempStr[3]   := #0;
-      tempint      := PInteger(@TempStr[1])^;
-      Inc(Src);
-      ///for i:=1 to 4 do
-      //    writeln(Format('%X', [tempint]));
-
-      case CodeP of
-        936:
-          tempint := Uni936C[SearchTable(CP936CC, tempint)];
-        950:
-          tempint := Uni950C[SearchTable(CP950CC, tempint)];
-        949:
-          tempint := Uni949C[SearchTable(CP949CC, tempint)];
-        932:
-          tempint := Uni932C[SearchTable(CP932CC, tempint)];
-        else
-          tempint := -1;
-      end;
-      //    writeln(Format('U %X ', [tempint]));
-
-      if tempint <> -1 then
-      begin
-        //PInteger(@TempStr[1])^ := CP936CU[SearchTable(CP936CC, tempint)];
-        TempStr := UnicodeToUTF8(tempint); //CP936CU[SearchTable(CP936CC, tempint)]);
-
-        for j := 1 to Length(TempStr) do
-        begin
-          Dest^ := TempStr[j];
-          Inc(Dest);
-          //      writeln(Format('%X', [Byte(TempStr[i])]));
-        end;
-      end;
-    end;
-  end;
-  SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
-end;
-
-function CP936ToUTF8(const s: string): string;
-begin
-  Result := SingleByteToUTF8Ex(s, 936);
-end;
-
-function CP950ToUTF8(const s: string): string;
-begin
-  Result := SingleByteToUTF8Ex(s, 950);
-end;
-
-function CP949ToUTF8(const s: string): string;
-begin
-  Result := SingleByteToUTF8Ex(s, 949);
-end;
-
-function CP932ToUTF8(const s: string): string;
-begin
-  Result := SingleByteToUTF8Ex(s, 932);
-end;
-
 function UCS2LEToUTF8(const s: string): string;
 var
  len: Integer;
@ -5496,43 +5400,6 @@ begin
  end;
 end;

-
-function UnicodeToCP936(Unicode: cardinal): integer;
-begin
-  case Unicode of
-    0..127: Result := Unicode;
-    else
-      Result := CP936CU[SearchTable(Uni936U, Unicode)];
-  end;
-end;
-
-function UnicodeToCP950(Unicode: cardinal): integer;
-begin
-  case Unicode of
-    0..127: Result := Unicode;
-    else
-      Result := CP950CU[SearchTable(Uni950U, Unicode)];
-  end;
-end;
-
-function UnicodeToCP949(Unicode: cardinal): integer;
-begin
-  case Unicode of
-    0..127: Result := Unicode;
-    else
-      Result := CP949CU[SearchTable(Uni949U, Unicode)];
-  end;
-end;
-
-function UnicodeToCP932(Unicode: cardinal): integer;
-begin
-  case Unicode of
-    0..127: Result := Unicode;
-    else
-      Result := CP932CU[SearchTable(Uni932U, Unicode)];
-  end;
-end;
-
 function UnicodeToKOI8(Unicode: cardinal): integer;
 begin
  case Unicode of
@ -5806,81 +5673,6 @@ begin
  SetLength(Result,Dest-PChar(Result));
 end;

-function UTF8ToSingleByteEx(const s: string;
-  const UTF8CharConvFunc: TUnicodeToCharID): string;
-var
-  len:  integer;
-  Src:  PChar;
-  Dest: PChar;
-  c:    char;
-  Unicode: longword;
-  CharLen: integer;
-  i:    integer;
-begin
-  if s = '' then
-  begin
-    Result := '';
-    exit;
-  end;
-  len := length(s);
-  SetLength(Result, len);
-  Src  := PChar(s);
-  Dest := PChar(Result);
-  while len > 0 do
-  begin
-    c := Src^;
-    if c < #128 then
-    begin
-      Dest^ := c;
-      Inc(Dest);
-      Inc(Src);
-      Dec(len);
-    end
-    else
-    begin
-      Unicode := UTF8CharacterToUnicode(Src, CharLen);
-      Inc(Src, CharLen);
-      Dec(len, CharLen);
-      i := UTF8CharConvFunc(Unicode);
-      //writeln(Format('%X', [i]));
-      if i >= 0 then
-      begin
-        if i > $ff then
-        begin
-          Dest^ := chr(i shr 8);
-          Inc(Dest);
-          Dest^ := chr(i);
-        end
-        else
-          Dest^ := chr(i);
-        Inc(Dest);
-      end;
-    end;
-  end;
-  //SetLength(Result, Dest - PChar(Result));
-  SetLength(Result, PtrUInt(Dest) - PtrUInt(Result));
-end;
-
-function UTF8ToCP936(const s: string): string;
-begin
-  Result := UTF8ToSingleByteEx(s, @UnicodeToCP936);
-end;
-
-function UTF8ToCP950(const s: string): string;
-begin
-  Result := UTF8ToSingleByteEx(s, @UnicodeToCP950);
-end;
-
-function UTF8ToCP949(const s: string): string;
-begin
-  Result := UTF8ToSingleByteEx(s, @UnicodeToCP949);
-end;
-
-function UTF8ToCP932(const s: string): string;
-begin
-  Result := UTF8ToSingleByteEx(s, @UnicodeToCP932);
-end;
-
 function UTF8ToUCS2LE(const s: string): string;
 var
  len: Integer;