diff --git a/.gitattributes b/.gitattributes
index 32ab9b5e3f..89992b9db2 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -12291,6 +12291,7 @@ tests/test/tcpstr26b.pp svneol=native#text/plain
 tests/test/tcpstr26c.pp svneol=native#text/plain
 tests/test/tcpstr26d.pp svneol=native#text/plain
 tests/test/tcpstr27.pp svneol=native#text/plain
+tests/test/tcpstr28.pp svneol=native#text/pascal
 tests/test/tcpstr2a.pp svneol=native#text/plain
 tests/test/tcpstr3.pp svneol=native#text/plain
 tests/test/tcpstr4.pp svneol=native#text/plain
diff --git a/compiler/scanner.pas b/compiler/scanner.pas
index be38faec24..b86b546a6c 100644
--- a/compiler/scanner.pas
+++ b/compiler/scanner.pas
@@ -4586,6 +4586,7 @@ type
     procedure tscannerfile.readtoken(allowrecordtoken:boolean);
       var
         code    : integer;
+        d : cardinal;
         len,
         low,high,mid : longint;
         w : word;
@@ -5160,9 +5161,35 @@ type
                                    iswidestring:=true;
                                    len:=0;
                                  end;
-                               { four or more chars aren't handled }
+                               { four chars }
                                if (ord(c) and $f0)=$f0 then
-                                 message(scan_e_utf8_bigger_than_65535)
+                                 begin
+                                   { this always represents a surrogate pair, so
+                                     read as 32-bit value and then split into
+                                     the corresponding pair of two wchars }
+                                   d:=ord(c) and $f;
+                                   readchar;
+                                   if (ord(c) and $c0)<>$80 then
+                                     message(scan_e_utf8_malformed);
+                                   d:=(d shl 6) or (ord(c) and $3f);
+                                   readchar;
+                                   if (ord(c) and $c0)<>$80 then
+                                     message(scan_e_utf8_malformed);
+                                   d:=(d shl 6) or (ord(c) and $3f);
+                                   readchar;
+                                   if (ord(c) and $c0)<>$80 then
+                                     message(scan_e_utf8_malformed);
+                                   d:=(d shl 6) or (ord(c) and $3f);
+                                   if d<$10000 then
+                                     message(scan_e_utf8_malformed);
+                                   d:=d-$10000;
+                                   { high surrogate }
+                                   w:=$d800+(d shr 10);
+                                   concatwidestringchar(patternw,w);
+                                   { low surrogate }
+                                   w:=$dc00+(d and $3ff);
+                                   concatwidestringchar(patternw,w);
+                                 end
                                { three chars }
                                else if (ord(c) and $e0)=$e0 then
                                  begin
diff --git a/tests/test/tcpstr28.pp b/tests/test/tcpstr28.pp
new file mode 100644
index 0000000000..843df14055
--- /dev/null
+++ b/tests/test/tcpstr28.pp
@@ -0,0 +1,23 @@
+program tcpstr28;
+
+{$codepage utf8}
+
+const
+  Test = '𝄞𤽜';
+  UTF8Test = UTF8String(Test);
+  UTF16Test = UnicodeString(Test);
+
+var
+  utf8str: UTF8String = Test;
+  utf16str: UnicodeString = Test;
+
+begin
+  if Length(UTF8Test) <> 8 then
+    Halt(1);
+  if Length(utf8str) <> 8 then
+    Halt(1);
+  if Length(UTF16Test) <> 4 then
+    Halt(1);
+  if Length(utf16str) <> 4 then
+    Halt(1);
+end.