mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-07-16 00:38:34 +02:00
lazutf8: added overloaded UTF8FixBroken(var string)
git-svn-id: trunk@36319 -
This commit is contained in:
parent
98e9e68885
commit
f0f090fb65
@ -54,7 +54,8 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer;
|
||||
function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar;
|
||||
// find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr)
|
||||
function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt;
|
||||
procedure UTF8FixBroken(P: PChar);
|
||||
procedure UTF8FixBroken(P: PChar); overload;
|
||||
procedure UTF8FixBroken(var S: string); overload;
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string;
|
||||
function UTF8Pos(const SearchForText, SearchInText: string): PtrInt;
|
||||
@ -523,7 +524,7 @@ begin
|
||||
c:=((ord(p^) and %00011111) shl 6);
|
||||
//or (ord(p[1]) and %00111111);
|
||||
if c<(1 shl 7) then
|
||||
p^:=' '
|
||||
p^:=' ' // fix XSS attack
|
||||
else
|
||||
inc(p,2)
|
||||
end
|
||||
@ -538,7 +539,7 @@ begin
|
||||
or ((ord(p[1]) and %00111111) shl 6);
|
||||
//or (ord(p[2]) and %00111111);
|
||||
if c<(1 shl 11) then
|
||||
p^:=' '
|
||||
p^:=' ' // fix XSS attack
|
||||
else
|
||||
inc(p,3);
|
||||
end else
|
||||
@ -554,7 +555,7 @@ begin
|
||||
or ((ord(p[2]) and %00111111) shl 6);
|
||||
//or (ord(p[3]) and %00111111);
|
||||
if c<(1 shl 16) then
|
||||
p^:=' '
|
||||
p^:=' ' // fix XSS attack
|
||||
else
|
||||
inc(p,4)
|
||||
end else
|
||||
@ -567,6 +568,14 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure UTF8FixBroken(var S: string);
|
||||
begin
|
||||
if S='' then exit;
|
||||
if FindInvalidUTF8Character(PChar(S),length(S),true)<0 then exit;
|
||||
UniqueString(S);
|
||||
UTF8FixBroken(PChar(S));
|
||||
end;
|
||||
|
||||
function UTF8CharacterStrictLength(P: PChar): integer;
|
||||
begin
|
||||
if p=nil then exit(0);
|
||||
|
@ -1499,18 +1499,8 @@ begin
|
||||
end;
|
||||
|
||||
procedure TWikiPage.FixUTF8;
|
||||
var
|
||||
p: PChar;
|
||||
e: PChar;
|
||||
begin
|
||||
if FSrc='' then exit;
|
||||
UniqueString(FSrc);
|
||||
p:=PChar(FSrc);
|
||||
e:=p+length(FSrc);
|
||||
while p<e do begin
|
||||
UTF8FixBroken(p);
|
||||
inc(p,UTF8CharacterLength(p));
|
||||
end;
|
||||
UTF8FixBroken(FSrc);
|
||||
end;
|
||||
|
||||
procedure Init;
|
||||
|
@ -278,7 +278,7 @@ It returns 0 if the codepoint can not be represented as a 1 to 4 byte UTF-8 sequ
|
||||
</element>
|
||||
<!-- procedure Visibility: default -->
|
||||
<element name="UTF8FixBroken">
|
||||
<short/>
|
||||
<short>Replaces all invalid UTF8 characters with spaces. Stops at #0.</short>
|
||||
<descr/>
|
||||
<errors/>
|
||||
<seealso/>
|
||||
|
@ -1706,10 +1706,8 @@ begin
|
||||
for i:=1 to length(Result) do
|
||||
if Result[i] in [#0..#31,#127] then Result[i]:=' ';
|
||||
if Result='' then exit;
|
||||
if FixUTF8 then begin
|
||||
UniqueString(Result);
|
||||
UTF8FixBroken(PChar(Result));
|
||||
end;
|
||||
if FixUTF8 then
|
||||
UTF8FixBroken(Result);
|
||||
Result:=UTF8Trim(Result);
|
||||
end;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user