mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-07-16 00:38:34 +02:00
lazutf8: added overloaded UTF8FixBroken(var string)
git-svn-id: trunk@36319 -
This commit is contained in:
parent
98e9e68885
commit
f0f090fb65
@ -54,7 +54,8 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
|
|||||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||||
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
||||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||||
procedure UTF8FixBroken(P: PChar);
|
procedure UTF8FixBroken(P: PChar); overload;
|
||||||
|
procedure UTF8FixBroken(var S: string); overload;
|
||||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||||
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
|
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
|
||||||
@ -523,7 +524,7 @@ begin
|
|||||||
c:=((ord(p^) and %00011111) shl 6);
|
c:=((ord(p^) and %00011111) shl 6);
|
||||||
//or (ord(p[1]) and %00111111);
|
//or (ord(p[1]) and %00111111);
|
||||||
if c<(1 shl 7) then
|
if c<(1 shl 7) then
|
||||||
p^:=' '
|
p^:=' ' // fix XSS attack
|
||||||
else
|
else
|
||||||
inc(p,2)
|
inc(p,2)
|
||||||
end
|
end
|
||||||
@ -538,7 +539,7 @@ begin
|
|||||||
or ((ord(p[1]) and %00111111) shl 6);
|
or ((ord(p[1]) and %00111111) shl 6);
|
||||||
//or (ord(p[2]) and %00111111);
|
//or (ord(p[2]) and %00111111);
|
||||||
if c<(1 shl 11) then
|
if c<(1 shl 11) then
|
||||||
p^:=' '
|
p^:=' ' // fix XSS attack
|
||||||
else
|
else
|
||||||
inc(p,3);
|
inc(p,3);
|
||||||
end else
|
end else
|
||||||
@ -554,7 +555,7 @@ begin
|
|||||||
or ((ord(p[2]) and %00111111) shl 6);
|
or ((ord(p[2]) and %00111111) shl 6);
|
||||||
//or (ord(p[3]) and %00111111);
|
//or (ord(p[3]) and %00111111);
|
||||||
if c<(1 shl 16) then
|
if c<(1 shl 16) then
|
||||||
p^:=' '
|
p^:=' ' // fix XSS attack
|
||||||
else
|
else
|
||||||
inc(p,4)
|
inc(p,4)
|
||||||
end else
|
end else
|
||||||
@ -567,6 +568,14 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure UTF8FixBroken(var S: string);
|
||||||
|
begin
|
||||||
|
if S='' then exit;
|
||||||
|
if FindInvalidUTF8Character(PChar(S),length(S),true)<0 then exit;
|
||||||
|
UniqueString(S);
|
||||||
|
UTF8FixBroken(PChar(S));
|
||||||
|
end;
|
||||||
|
|
||||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||||
begin
|
begin
|
||||||
if p=nil then exit(0);
|
if p=nil then exit(0);
|
||||||
|
@ -1499,18 +1499,8 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TWikiPage.FixUTF8;
|
procedure TWikiPage.FixUTF8;
|
||||||
var
|
|
||||||
p: PChar;
|
|
||||||
e: PChar;
|
|
||||||
begin
|
begin
|
||||||
if FSrc='' then exit;
|
UTF8FixBroken(FSrc);
|
||||||
UniqueString(FSrc);
|
|
||||||
p:=PChar(FSrc);
|
|
||||||
e:=p+length(FSrc);
|
|
||||||
while p<e do begin
|
|
||||||
UTF8FixBroken(p);
|
|
||||||
inc(p,UTF8CharacterLength(p));
|
|
||||||
end;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure Init;
|
procedure Init;
|
||||||
|
@ -278,7 +278,7 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
|
|||||||
</element>
|
</element>
|
||||||
<!-- procedure Visibility: default -->
|
<!-- procedure Visibility: default -->
|
||||||
<element name="UTF8FixBroken">
|
<element name="UTF8FixBroken">
|
||||||
<short/>
|
<short>Replaces all invalid UTF8 characters with spaces. Stops at #0.</short>
|
||||||
<descr/>
|
<descr/>
|
||||||
<errors/>
|
<errors/>
|
||||||
<seealso/>
|
<seealso/>
|
||||||
|
@ -1706,10 +1706,8 @@ begin
|
|||||||
for i:=1 to length(Result) do
|
for i:=1 to length(Result) do
|
||||||
if Result[i] in [#0..#31,#127] then Result[i]:=' ';
|
if Result[i] in [#0..#31,#127] then Result[i]:=' ';
|
||||||
if Result='' then exit;
|
if Result='' then exit;
|
||||||
if FixUTF8 then begin
|
if FixUTF8 then
|
||||||
UniqueString(Result);
|
UTF8FixBroken(Result);
|
||||||
UTF8FixBroken(PChar(Result));
|
|
||||||
end;
|
|
||||||
Result:=UTF8Trim(Result);
|
Result:=UTF8Trim(Result);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user