mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-09-05 00:02:05 +02:00
lazutils: UTF8FixBroken: check for ascci as multi byte
git-svn-id: trunk@35036 -
This commit is contained in:
parent
5feee468d7
commit
28fbd2b34c
@ -187,32 +187,35 @@ begin
|
||||
// regular single byte character (#0 is a character, this is pascal ;)
|
||||
Result:=1;
|
||||
end
|
||||
else if ((ord(p^) and %11100000) = %11000000) then begin
|
||||
// could be 2 byte character
|
||||
if (ord(p[1]) and %11000000) = %10000000 then
|
||||
Result:=2
|
||||
else begin
|
||||
// multi byte
|
||||
if ((ord(p^) and %11100000) = %11000000) then begin
|
||||
// could be 2 byte character
|
||||
if (ord(p[1]) and %11000000) = %10000000 then
|
||||
Result:=2
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else if ((ord(p^) and %11110000) = %11100000) then begin
|
||||
// could be 3 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000) then
|
||||
Result:=3
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else if ((ord(p^) and %11111000) = %11110000) then begin
|
||||
// could be 4 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000)
|
||||
and ((ord(p[3]) and %11000000) = %10000000) then
|
||||
Result:=4
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else if ((ord(p^) and %11110000) = %11100000) then begin
|
||||
// could be 3 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000) then
|
||||
Result:=3
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else if ((ord(p^) and %11111000) = %11110000) then begin
|
||||
// could be 4 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000)
|
||||
and ((ord(p[3]) and %11000000) = %10000000) then
|
||||
Result:=4
|
||||
else
|
||||
Result:=1;
|
||||
end
|
||||
else
|
||||
Result:=1
|
||||
end;
|
||||
end else
|
||||
Result:=0;
|
||||
end;
|
||||
@ -292,7 +295,6 @@ begin
|
||||
if (CharLen>1) and (Result<128) then begin
|
||||
// invalid character
|
||||
Result:=ord(p^);
|
||||
CharLen:=1;
|
||||
end;
|
||||
end;
|
||||
end else begin
|
||||
@ -463,6 +465,8 @@ end;
|
||||
|
||||
{ fix any broken UTF8 sequences with spaces }
|
||||
procedure UTF8FixBroken(P: PChar);
|
||||
var
|
||||
c: cardinal;
|
||||
begin
|
||||
if p=nil then exit;
|
||||
while p^<>#0 do begin
|
||||
@ -476,27 +480,46 @@ begin
|
||||
inc(p);
|
||||
end
|
||||
else if ((ord(p^) and %11100000) = %11000000) then begin
|
||||
// should be 2 byte character
|
||||
if (ord(p[1]) and %11000000) = %10000000 then
|
||||
inc(p,2)
|
||||
// starts with %110 => should be 2 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000) then begin
|
||||
c:=((ord(p^) and %00011111) shl 6);
|
||||
//or (ord(p[1]) and %00111111);
|
||||
if c<128 then
|
||||
p^:=' '
|
||||
else
|
||||
inc(p,2)
|
||||
end
|
||||
else if p[1]<>#0 then
|
||||
p^:=' ';
|
||||
end
|
||||
else if ((ord(p^) and %11110000) = %11100000) then begin
|
||||
// should be 3 byte character
|
||||
// starts with %1110 => should be 3 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000) then
|
||||
inc(p,3)
|
||||
else
|
||||
and ((ord(p[2]) and %11000000) = %10000000) then begin
|
||||
c:=((ord(p^) and %00011111) shl 12)
|
||||
or ((ord(p[1]) and %00111111) shl 6);
|
||||
//or (ord(p[2]) and %00111111);
|
||||
if c<128 then
|
||||
p^:=' '
|
||||
else
|
||||
inc(p,3);
|
||||
end else
|
||||
p^:=' ';
|
||||
end
|
||||
else if ((ord(p^) and %11111000) = %11110000) then begin
|
||||
// should be 4 byte character
|
||||
// starts with %11110 => should be 4 byte character
|
||||
if ((ord(p[1]) and %11000000) = %10000000)
|
||||
and ((ord(p[2]) and %11000000) = %10000000)
|
||||
and ((ord(p[3]) and %11000000) = %10000000) then
|
||||
inc(p,4)
|
||||
else
|
||||
and ((ord(p[3]) and %11000000) = %10000000) then begin
|
||||
c:=((ord(p^) and %00001111) shl 18)
|
||||
or ((ord(p[1]) and %00111111) shl 12)
|
||||
or ((ord(p[2]) and %00111111) shl 6);
|
||||
//or (ord(p[3]) and %00111111);
|
||||
if c<128 then
|
||||
p^:=' '
|
||||
else
|
||||
inc(p,4)
|
||||
end else
|
||||
p^:=' ';
|
||||
end
|
||||
else begin
|
||||
|
Loading…
Reference in New Issue
Block a user