Completes lowercase up to codepoint 1EFF and adds many tests

git-svn-id: trunk@32880 -
2025-05-30 03:22:48 +02:00 · 2011-10-14 09:53:23 +00:00 · 2011-10-14 09:53:23 +00:00 · d6cddfc645
commit d6cddfc645
parent c3ae437f84
2 changed files with 220 additions and 29 deletions
--- a/components/lazutils/lazutf8.pas
+++ b/components/lazutils/lazutf8.pas
@ -1536,7 +1536,7 @@ var
  InStr, InStrEnd, OutStr: PChar;
  // Language identification
  IsTurkish: Boolean;
-  c1, c2, c3, new_c1, new_c2: Char;
+  c1, c2, c3, new_c1, new_c2, new_c3: Char;
 begin
  Result:=AInStr;
  InStr := PChar(AInStr);
@ -1549,7 +1549,7 @@ begin
    c1 := InStr^;
    case c1 of
    'A'..'Z': Break;
-    #$C3, #$C4, #$C5..#$C8, #$CE, #$D0..#$D2, #$E1:
+    #$C3..#$C9, #$CE, #$D0..#$D5, #$E1:
    begin
      c2 := InStr[1];
      case c1 of
@ -1570,9 +1570,10 @@ begin
          #$B8: Break;
        end;
      end;
-      #$C6..#$C8,#$CE, #$D0..#$D2, #$E1: Break;
-      // already lower, or otherwhise not affected
+      else
+        Break;
      end;
+      // already lower, or otherwhise not affected
    end;
    end;
    inc(InStr);
@ -1617,8 +1618,7 @@ begin
      end;

      // Chars with 2-bytes which might be modified
-      //#$C3..#$C8, #$CE, #$D0..#$D2:
-      #$C3..#$D2:
+      #$C3..#$D5:
      begin
        c2 := InStr[1];
        new_c1 := c1;
@ -1655,7 +1655,12 @@ begin
              inc(CounterDiff, 1);
              Continue;
            end;
-            #$B9..#$BF: if ord(c2) mod 2 = 1 then new_c2 := chr(ord(c2) + 1);
+            #$B9..#$BE: if ord(c2) mod 2 = 1 then new_c2 := chr(ord(c2) + 1);
+            #$BF: // This crosses the borders between the first byte of the UTF-8 char
+            begin
+              new_c1 := #$C5;
+              new_c2 := #$80;
+            end;
          end;
        end;
        // $C589 ŉ
@ -1670,7 +1675,7 @@ begin
              if ord(c2) mod 2 = 0 then
                new_c2 := chr(ord(c2) + 1);
            end;
-            #$00..#$88, #$B9..#$FF: //1
+            #$00..#$88, #$B9..#$BE: //1
            begin
              if ord(c2) mod 2 = 1 then
                new_c2 := chr(ord(c2) + 1);
@ -1814,7 +1819,7 @@ begin
          01A3;LATIN SMALL LETTER OI;Ll;0;L;;;;;N;LATIN SMALL LETTER O I;;01A2;;01A2 <=
          01A4;LATIN CAPITAL LETTER P WITH HOOK;Lu;0;L;;;;;N;LATIN CAPITAL LETTER P HOOK;;;01A5; => +1
          01A5;LATIN SMALL LETTER P WITH HOOK;Ll;0;L;;;;;N;LATIN SMALL LETTER P HOOK;;01A4;;01A4 <=
-          01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;;;0280; <=
+          01A6;LATIN LETTER YR;Lu;0;L;;;;;N;LATIN LETTER Y R;;;0280; => CA 80
          01A7;LATIN CAPITAL LETTER TONE TWO;Lu;0;L;;;;;N;;;;01A8; => +1
          01A8;LATIN SMALL LETTER TONE TWO;Ll;0;L;;;;;N;;;01A7;;01A7 <=
          01A9;LATIN CAPITAL LETTER ESH;Lu;0;L;;;;;N;;;;0283; => CA 83
@ -1835,6 +1840,11 @@ begin
            if ord(c2) mod 2 = 1 then
              new_c2 := chr(ord(c2) + 1);
          end;
+          #$A6:
+          begin
+            new_c1 := #$CA;
+            new_c2 := #$80;
+          end;
          #$A9:
          begin
            new_c1 := #$CA;
@ -1974,6 +1984,41 @@ begin
          #$BB: new_c2 := chr(ord(c2) + 1);
          end;
        end;
+        {
+        Codepoints 0240 to 027F
+
+        Here only 0240..024F needs lowercase
+        }
+        #$C9:
+        begin
+          case c2 of
+          #$81..#$82:
+          begin
+            if ord(c2) mod 2 = 1 then
+              new_c2 := chr(ord(c2) + 1);
+          end;
+          #$86..#$8F:
+          begin
+            if ord(c2) mod 2 = 0 then
+              new_c2 := chr(ord(c2) + 1);
+          end;
+          #$83:
+          begin
+            new_c1 := #$C6;
+            new_c2 := #$80;
+          end;
+          #$84:
+          begin
+            new_c1 := #$CA;
+            new_c2 := #$89;
+          end;
+          #$85:
+          begin
+            new_c1 := #$CA;
+            new_c2 := #$8C;
+          end;
+          end;
+        end;
        // $CE91..$CE9F: NewChar := OldChar + $20; // Greek Characters
        // $CEA0..$CEA9: NewChar := OldChar + $E0; // Greek Characters
        #$CE:
@ -2037,6 +2082,61 @@ begin
            end;
          end;
        end;
+        {
+        Codepoints  04C0..04FF
+        }
+        #$D3:
+        begin
+          case c2 of
+            #$80: new_c2 := #$8F;
+            #$81..#$8E:
+            begin
+              if ord(c2) mod 2 = 1 then
+                new_c2 := chr(ord(c2) + 1);
+            end;
+            #$90..#$BF:
+            begin
+              if ord(c2) mod 2 = 0 then
+                new_c2 := chr(ord(c2) + 1);
+            end;
+          end;
+        end;
+        {
+        Codepoints  0500..053F
+
+        Armenian starts in 0531
+        }
+        #$D4:
+        begin
+          if ord(c2) mod 2 = 0 then
+            new_c2 := chr(ord(c2) + 1);
+
+          // Armenian
+          if c2 in [#$B1..#$BF] then
+          begin
+            new_c1 := #$D5;
+            new_c2 := chr(ord(c2) - $10);
+          end;
+        end;
+        {
+        Codepoints  0540..057F
+
+        Armenian
+        }
+        #$D5:
+        begin
+          case c2 of
+            #$80..#$8F:
+            begin
+              new_c2 := chr(ord(c2) + $30);
+            end;
+            #$90..#$96:
+            begin
+              new_c1 := #$D6;
+              new_c2 := chr(ord(c2) - $10);
+            end;
+          end;
+        end;
        end;
        // Common code 2-byte modifiable chars
        if (CounterDiff <> 0) then
@ -2052,36 +2152,90 @@ begin
        inc(InStr, 2);
        inc(OutStr, 2);
      end;
-      // Georgian codepoints 10A0-10C5 => 2D00-2D25
-      // In UTF-8 this is:
-      // E1 82 A0 - E1 82 BF => E2 B4 80 - E2 B4 9F
-      // E1 83 80 - E1 83 85 => E2 B4 A0 - E2 B4 A5
+      {
+      Characters with 3 bytes
+      }
      #$E1:
      begin
        new_c1 := c1;
        c2 := InStr[1];
        c3 := InStr[2];
+        new_c2 := c2;
+        new_c3 := c3;
+        {
+        Georgian codepoints 10A0-10C5 => 2D00-2D25
+
+        In UTF-8 this is:
+        E1 82 A0 - E1 82 BF => E2 B4 80 - E2 B4 9F
+        E1 83 80 - E1 83 85 => E2 B4 A0 - E2 B4 A5
+        }
        if (c2 = #$82) and (c3 in [#$A0..#$BF]) then
        begin
-          OutStr^ := #$E2;
-          OutStr[1] := #$B4;
-          OutStr[2] := chr(ord(c3) - $20);
+          new_c1 := #$E2;
+          new_c2 := #$B4;
+          new_c3 := chr(ord(c3) - $20);
        end
        else if (c2 = #$83) and (c3 in [#$80..#$85]) then
        begin
-          OutStr^ := #$E2;
-          OutStr[1] := #$B4;
-          OutStr[2] := chr(ord(c3) + $20);
+          new_c1 := #$E2;
+          new_c2 := #$B4;
+          new_c3 := chr(ord(c3) + $20);
+        end
+        {
+        Extra chars between 1E00..1EFF
+
+        Blocks of chars:
+          1E00..1E3F    E1 B8 80..E1 B8 BF
+          1E40..1E7F    E1 B9 80..E1 B9 BF
+          1E80..1EBF    E1 BA 80..E1 BA BF
+          1EC0..1EFF    E1 BB 80..E1 BB BF
+        }
+        else if c2 in [#$B8..#$BB] then
+        begin
+          // Start with a default and change for some particular chars
+          if ord(c3) mod 2 = 0 then
+            new_c3 := chr(ord(c3) + 1);
+
+          { Only 1E96..1E9F are different E1 BA 96..E1 BA 9F
+
+          1E96;LATIN SMALL LETTER H WITH LINE BELOW;Ll;0;L;0068 0331;;;;N;;;;;
+          1E97;LATIN SMALL LETTER T WITH DIAERESIS;Ll;0;L;0074 0308;;;;N;;;;;
+          1E98;LATIN SMALL LETTER W WITH RING ABOVE;Ll;0;L;0077 030A;;;;N;;;;;
+          1E99;LATIN SMALL LETTER Y WITH RING ABOVE;Ll;0;L;0079 030A;;;;N;;;;;
+          1E9A;LATIN SMALL LETTER A WITH RIGHT HALF RING;Ll;0;L;<compat> 0061 02BE;;;;N;;;;;
+          1E9B;LATIN SMALL LETTER LONG S WITH DOT ABOVE;Ll;0;L;017F 0307;;;;N;;;1E60;;1E60
+          1E9C;LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE;Ll;0;L;;;;;N;;;;;
+          1E9D;LATIN SMALL LETTER LONG S WITH HIGH STROKE;Ll;0;L;;;;;N;;;;;
+          1E9E;LATIN CAPITAL LETTER SHARP S;Lu;0;L;;;;;N;;;;00DF; => C3 9F
+          1E9F;LATIN SMALL LETTER DELTA;Ll;0;L;;;;;N;;;;;
+          }
+          if (c2 = #$BA) and (c3 in [#$96..#$9F]) then new_c3 := c3;
+          // LATIN CAPITAL LETTER SHARP S => to german Beta
+          if (c2 = #$BA) and (c3 = #$9E) then
+          begin
+            inc(InStr, 3);
+            OutStr^ := #$C3;
+            inc(OutStr);
+            OutStr^ := #$9F;
+            inc(OutStr);
+            inc(CounterDiff, 1);
+            Continue;
+          end;
+        end;
+
+        if (CounterDiff <> 0) then
+        begin
+          OutStr^ := new_c1;
+          OutStr[1] := new_c2;
+          OutStr[2] := new_c3;
        end
        else
        begin
-          if (CounterDiff <> 0) then
-          begin
-            OutStr^ := InStr[0];
-            OutStr[1] := InStr[1];
-            OutStr[2] := InStr[2];
-          end;
+          if c1 <> new_c1 then OutStr^ := new_c1;
+          if c2 <> new_c2 then OutStr[1] := new_c2;
+          if c3 <> new_c3 then OutStr[2] := new_c3;
        end;
+
        inc(InStr, 3);
        inc(OutStr, 3);
      end;
--- a/test/lazutils/testunicode.pas
+++ b/test/lazutils/testunicode.pas
@ -25,13 +25,13 @@ begin
  begin
    Write(' Expected ', AStrExpected2, ' !Error!');
    WriteLn();
-    Write('Got      Len=', Length(AStr2),' ');
+    Write('Got      Len=', Length(AStr2), ' Str=');
    WriteStringHex(AStr2);
    WriteLn('');
-    Write('Expected Len=', Length(AStrExpected2),' ');
+    Write('Expected Len=', Length(AStrExpected2), ' Str=');
    WriteStringHex(AStrExpected2);
    WriteLn();
-    Write('Orig     Len=', Length(AStr1),' ');
+    Write('Orig     Len=', Length(AStr1), ' Str=');
    WriteStringHex(AStr1);
    WriteLn('');
  end;
@ -112,21 +112,58 @@ begin
  AssertStringOperationUTF8LowerCase('Polish UTF8LowerCase 1', '', 'aąbcćdeęfghijklłmnńoóprsśtuwyzźż', 'aąbcćdeęfghijklłmnńoóprsśtuwyzźż');
  AssertStringOperationUTF8LowerCase('Polish UTF8LowerCase 2', '', 'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ', 'aąbcćdeęfghijklłmnńoóprsśtuwyzźż');
  AssertStringOperationUTF8LowerCase('German UTF8LowerCase 1', '', 'Ä/ä,Ö/ö,Ü/ü,ß', 'ä/ä,ö/ö,ü/ü,ß');
+  // Unicode table
  AssertStringOperationUTF8LowerCase('Latin 00C0 UTF8LowerCase', '', 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ', 'àáâãäåæçèéêëìíîï');
  AssertStringOperationUTF8LowerCase('Latin 00D0 UTF8LowerCase', '', 'ÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß', 'ðñòóôõö×øùúûüýþß');
+  AssertStringOperationUTF8LowerCase('Latin 00E0 UTF8LowerCase', '', 'àáâãäåæçèéêëìíîï', 'àáâãäåæçèéêëìíîï');
+  AssertStringOperationUTF8LowerCase('Latin 00F0 UTF8LowerCase', '', 'ðñòóôõö÷øùúûüýþÿ', 'ðñòóôõö÷øùúûüýþÿ');
  AssertStringOperationUTF8LowerCase('Latin 0100 UTF8LowerCase', '', 'Āā Ăă Ąą Ćć Ĉĉ Ċċ Čč Ďď', 'āā ăă ąą ćć ĉĉ ċċ čč ďď');
+  AssertStringOperationUTF8LowerCase('Latin 0110 UTF8LowerCase', '', 'ĐđĒēĔĕĖėĘęĚěĜĝĞğ', 'đđēēĕĕėėęęěěĝĝğğ');
  AssertStringOperationUTF8LowerCase('Latin 0120 UTF8LowerCase', '', 'ĠġĢģĤĥĦħĨĩĪīĬĭĮį', 'ġġģģĥĥħħĩĩīīĭĭįį');
+  AssertStringOperationUTF8LowerCase('Latin 0130 UTF8LowerCase', '', 'İıĲĳĴĵĶķĸĹĺĻļĽľĿ', 'iıĳĳĵĵķķĸĺĺļļľľŀ');
  AssertStringOperationUTF8LowerCase('Latin 0140 UTF8LowerCase', '', 'ŀŁłŃńŅņŇňŉŊŋŌōŎŏ', 'ŀłłńńņņňňŉŋŋōōŏŏ');
+  AssertStringOperationUTF8LowerCase('Latin 0150 UTF8LowerCase', '', 'ŐőŒœŔŕŖŗŘřŚśŜŝŞş', 'őőœœŕŕŗŗřřśśŝŝşş');
  AssertStringOperationUTF8LowerCase('Latin 0160 UTF8LowerCase', '', 'ŠšŢţŤťŦŧŨũŪūŬŭŮů', 'ššţţťťŧŧũũūūŭŭůů');
+  AssertStringOperationUTF8LowerCase('Latin 0170 UTF8LowerCase', '', 'ŰűŲųŴŵŶŷŸŹźŻżŽžſ', 'űűųųŵŵŷŷÿźźżżžžſ');
  AssertStringOperationUTF8LowerCase('Latin 0180 UTF8LowerCase', '', 'ƀ Ɓ Ƃƃ Ƅƅ Ɔ Ƈƈ Ɖ Ɗ Ƌƌ ƍ Ǝ Ə', 'ƀ ɓ ƃƃ ƅƅ ɔ ƈƈ ɖ ɗ ƌƌ ƍ ǝ ə');
  AssertStringOperationUTF8LowerCase('Latin 0190 UTF8LowerCase', '', 'ƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟ', 'ɛƒƒɠɣƕɩɨƙƙƚƛɯɲƞɵ');
-  AssertStringOperationUTF8LowerCase('Latin 01A0 UTF8LowerCase', '', 'ƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯ', 'ơơƣƣƥƥƦƨƨʃƪƫƭƭʈư');
+  AssertStringOperationUTF8LowerCase('Latin 01A0 UTF8LowerCase', '', 'ƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯ', 'ơơƣƣƥƥʀƨƨʃƪƫƭƭʈư');
  AssertStringOperationUTF8LowerCase('Latin 01B0 UTF8LowerCase', '', 'ưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿ', 'ưʊʋƴƴƶƶʒƹƹƺƻƽƽƾƿ');
  AssertStringOperationUTF8LowerCase('Latin 01C0 UTF8LowerCase', '', 'ǀǁǂǃǄǅǆǇǈǉǊǋǌǍǎǏ', 'ǀǁǂǃǆǆǆǉǉǉǌǌǌǎǎǐ');
  AssertStringOperationUTF8LowerCase('Latin 0200 UTF8LowerCase', '', 'ȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏ', 'ȁȁȃȃȅȅȇȇȉȉȋȋȍȍȏȏ');
  AssertStringOperationUTF8LowerCase('Latin 0210 UTF8LowerCase', '', 'ȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟ', 'ȑȑȓȓȕȕȗȗșșțțȝȝȟȟ');
  AssertStringOperationUTF8LowerCase('Latin 0220 UTF8LowerCase', '', 'ȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯ', 'ƞȡȣȣȥȥȧȧȩȩȫȫȭȭȯȯ');
  AssertStringOperationUTF8LowerCase('Latin 0230 UTF8LowerCase', '', 'ȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿ', 'ȱȱȳȳȴȵȶȷȸȹⱥȼȼƚⱦȿ');
+  AssertStringOperationUTF8LowerCase('Latin 0240 UTF8LowerCase', '', 'ɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏ', 'ɀɂɂƀʉʌɇɇɉɉɋɋɍɍɏɏ');
+  AssertStringOperationUTF8LowerCase('Latin 0250 UTF8LowerCase', '', 'ɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟ', 'ɐɑɒɓɔɕɖɗɘəɚɛɜɝɞɟ');
+  AssertStringOperationUTF8LowerCase('Unicode 0400 UTF8LowerCase', '', 'ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ', 'ѐёђѓєѕіїјљњћќѝўџ');
+  AssertStringOperationUTF8LowerCase('Unicode 0410 UTF8LowerCase', '', 'АБВГДЕЖЗИЙКЛМНОП', 'абвгдежзийклмноп');
+  AssertStringOperationUTF8LowerCase('Unicode 0420 UTF8LowerCase', '', 'РСТУФХЦЧШЩЪЫЬЭЮЯ', 'рстуфхцчшщъыьэюя');
+  AssertStringOperationUTF8LowerCase('Unicode 0430 UTF8LowerCase', '', 'абвгдежзийклмноп', 'абвгдежзийклмноп');
+  AssertStringOperationUTF8LowerCase('Unicode 0440 UTF8LowerCase', '', 'рстуфхцчшщъыьэюя', 'рстуфхцчшщъыьэюя');
+  AssertStringOperationUTF8LowerCase('Unicode 0450 UTF8LowerCase', '', 'ѐёђѓєѕіїјљњћќѝўџ', 'ѐёђѓєѕіїјљњћќѝўџ');
+  AssertStringOperationUTF8LowerCase('Unicode 0460 UTF8LowerCase', '', 'ѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯ', 'ѡѡѣѣѥѥѧѧѩѩѫѫѭѭѯѯ');
+  AssertStringOperationUTF8LowerCase('Unicode 0470 UTF8LowerCase', '', 'ѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿ', 'ѱѱѳѳѵѵѷѷѹѹѻѻѽѽѿѿ');
+  AssertStringOperationUTF8LowerCase('Unicode 0480 UTF8LowerCase', '', 'Ҁҁ҂ ҃ ҄ ҅ ҆ ҇ ҈ ҉ҊҋҌҍҎҏ', 'ҁҁ҂ ҃ ҄ ҅ ҆ ҇ ҈ ҉ҋҋҍҍҏҏ');
+  AssertStringOperationUTF8LowerCase('Unicode 0490 UTF8LowerCase', '', 'ҐґҒғҔҕҖҗҘҙҚқҜҝҞҟ', 'ґґғғҕҕҗҗҙҙққҝҝҟҟ');
+  AssertStringOperationUTF8LowerCase('Unicode 04A0 UTF8LowerCase', '', 'ҠҡҢңҤҥҦҧҨҩҪҫҬҭҮү', 'ҡҡңңҥҥҧҧҩҩҫҫҭҭүү');
+  AssertStringOperationUTF8LowerCase('Unicode 04B0 UTF8LowerCase', '', 'ҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿ', 'ұұҳҳҵҵҷҷҹҹһһҽҽҿҿ');
+  AssertStringOperationUTF8LowerCase('Unicode 04C0 UTF8LowerCase', '', 'ӀӁӂӃӄӅӆӇӈӉӊӋӌӍӎӏ', 'ӏӂӂӄӄӆӆӈӈӊӊӌӌӎӎӏ');
+  AssertStringOperationUTF8LowerCase('Unicode 04D0 UTF8LowerCase', '', 'ӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟ', 'ӑӑӓӓӕӕӗӗәәӛӛӝӝӟӟ');
+  AssertStringOperationUTF8LowerCase('Unicode 04E0 UTF8LowerCase', '', 'ӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯ', 'ӡӡӣӣӥӥӧӧөөӫӫӭӭӯӯ');
+  AssertStringOperationUTF8LowerCase('Unicode 04F0 UTF8LowerCase', '', 'ӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿ', 'ӱӱӳӳӵӵӷӷӹӹӻӻӽӽӿӿ');
+  AssertStringOperationUTF8LowerCase('Unicode 0500 UTF8LowerCase', '', 'ԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏ', 'ԁԁԃԃԅԅԇԇԉԉԋԋԍԍԏԏ');
+  AssertStringOperationUTF8LowerCase('Unicode 0510 UTF8LowerCase', '', 'ԐԑԒԓԔԕԖԗԘԙԚԛԜԝԞԟ', 'ԑԑԓԓԕԕԗԗԙԙԛԛԝԝԟԟ');
+  AssertStringOperationUTF8LowerCase('Unicode 0520 UTF8LowerCase', '', 'ԠԡԢԣԤԥԦԧ', 'ԡԡԣԣԥԥԧԧ');
+  // Armenian Unicode Table
+  AssertStringOperationUTF8LowerCase('Unicode 0530 UTF8LowerCase', '', 'ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿ', 'աբգդեզէըթժիլխծկ');
+  AssertStringOperationUTF8LowerCase('Unicode 0540 UTF8LowerCase', '', 'ՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏ', 'հձղճմյնշոչպջռսվտ');
+  AssertStringOperationUTF8LowerCase('Unicode 0550 UTF8LowerCase', '', 'ՐՑՒՓՔՕՖ', 'րցւփքօֆ');
+  AssertStringOperationUTF8LowerCase('Unicode 0560 UTF8LowerCase', '', 'աբգդեզէըթժիլխծկ', 'աբգդեզէըթժիլխծկ');
+  AssertStringOperationUTF8LowerCase('Unicode 0570 UTF8LowerCase', '', 'հձղճմյնշոչպջռսվտ', 'հձղճմյնշոչպջռսվտ');
+  AssertStringOperationUTF8LowerCase('Unicode 0580 UTF8LowerCase', '', 'րցւփքօֆ', 'րցւփքօֆ');
+  // Higher Unicode Table
+  AssertStringOperationUTF8LowerCase('Unicode 1E00 UTF8LowerCase', '', 'ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏ', 'ḁḁḃḃḅḅḇḇḉḉḋḋḍḍḏḏ');
  // Turkish
  AssertStringOperationUTF8LowerCase('Turkish UTF8LowerCase 1', 'tu', 'abcçdefgğhııijklmnoöprsştuüvyz', 'abcçdefgğhııijklmnoöprsştuüvyz');
  AssertStringOperationUTF8LowerCase('Turkish UTF8LowerCase 2', 'tu', 'ABCÇDEFGĞHIIİJKLMNOÖPRSŞTUÜVYZ', 'abcçdefgğhııijklmnoöprsştuüvyz');