diff --git a/components/lazutils/lazutf8.pas b/components/lazutils/lazutf8.pas index 49d0e20b64..c5c2bfb5c8 100644 --- a/components/lazutils/lazutf8.pas +++ b/components/lazutils/lazutf8.pas @@ -54,7 +54,8 @@ function UTF8FindNearestCharStart(UTF8Str: PChar; Len: integer; function UTF8CharStart(UTF8Str: PChar; Len, CharIndex: PtrInt): PChar; // find the byte index of the n-th UTF8 character, ignoring BIDI (byte len of substr) function UTF8CharToByteIndex(UTF8Str: PChar; Len, CharIndex: PtrInt): PtrInt; -procedure UTF8FixBroken(P: PChar); +procedure UTF8FixBroken(P: PChar); overload; +procedure UTF8FixBroken(var S: string); overload; function UTF8CharacterStrictLength(P: PChar): integer; function UTF8CStringToUTF8String(SourceStart: PChar; SourceLen: PtrInt) : string; function UTF8Pos(const SearchForText, SearchInText: string): PtrInt; @@ -523,7 +524,7 @@ begin c:=((ord(p^) and %00011111) shl 6); //or (ord(p[1]) and %00111111); if c<(1 shl 7) then - p^:=' ' + p^:=' ' // fix XSS attack else inc(p,2) end @@ -538,7 +539,7 @@ begin or ((ord(p[1]) and %00111111) shl 6); //or (ord(p[2]) and %00111111); if c<(1 shl 11) then - p^:=' ' + p^:=' ' // fix XSS attack else inc(p,3); end else @@ -554,7 +555,7 @@ begin or ((ord(p[2]) and %00111111) shl 6); //or (ord(p[3]) and %00111111); if c<(1 shl 16) then - p^:=' ' + p^:=' ' // fix XSS attack else inc(p,4) end else @@ -567,6 +568,14 @@ begin end; end; +procedure UTF8FixBroken(var S: string); +begin + if S='' then exit; + if FindInvalidUTF8Character(PChar(S),length(S),true)<0 then exit; + UniqueString(S); + UTF8FixBroken(PChar(S)); +end; + function UTF8CharacterStrictLength(P: PChar): integer; begin if p=nil then exit(0); diff --git a/components/wiki/lazwiki/wikiparser.pas b/components/wiki/lazwiki/wikiparser.pas index f4a8449bc2..65fde5165f 100644 --- a/components/wiki/lazwiki/wikiparser.pas +++ b/components/wiki/lazwiki/wikiparser.pas @@ -1499,18 +1499,8 @@ begin end; procedure TWikiPage.FixUTF8; -var - p: PChar; - e: PChar; begin - if FSrc='' then exit; - UniqueString(FSrc); - p:=PChar(FSrc); - e:=p+length(FSrc); - while p - + Replaces all invalid UTF8 characters with spaces. Stops at #0. diff --git a/ide/ideprocs.pp b/ide/ideprocs.pp index 5bd7e69909..99118046d9 100644 --- a/ide/ideprocs.pp +++ b/ide/ideprocs.pp @@ -1706,10 +1706,8 @@ begin for i:=1 to length(Result) do if Result[i] in [#0..#31,#127] then Result[i]:=' '; if Result='' then exit; - if FixUTF8 then begin - UniqueString(Result); - UTF8FixBroken(PChar(Result)); - end; + if FixUTF8 then + UTF8FixBroken(Result); Result:=UTF8Trim(Result); end;