mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-16 23:21:57 +02:00
* HTML reader, attempt recovery from misplaced '<' characters (another part of Mantis #16916).
git-svn-id: trunk@15574 -
This commit is contained in:
parent
28041f3e3f
commit
b4b83a7edb
@ -136,6 +136,22 @@ begin
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
function CheckForName(const Tag: SAXString): Boolean;
|
||||
var
|
||||
p, p1: PSAXChar;
|
||||
begin
|
||||
p := PSAXChar(Tag);
|
||||
result := False;
|
||||
if p^ <> '!' then
|
||||
begin
|
||||
if p^ = '/' then Inc(p);
|
||||
p1 := p;
|
||||
while (p1^ <> #0) and (p1^ <> '/') and not IsXMLWhitespace(p1^) do
|
||||
Inc(p1);
|
||||
result := IsXMLName(p, p1-p);
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure THTMLReader.Parse(AInput: TSAXInputSource);
|
||||
const
|
||||
MaxBufferSize = 1024;
|
||||
@ -163,6 +179,7 @@ begin
|
||||
|
||||
BufferPos := 0;
|
||||
while (BufferPos < BufferSize) and not FStopFlag do
|
||||
begin
|
||||
case ScannerContext of
|
||||
scUnknown:
|
||||
case Buffer[BufferPos] of
|
||||
@ -261,13 +278,30 @@ begin
|
||||
if FCurStringValueDelimiter = #0 then
|
||||
EnterNewScannerContext(scUnknown);
|
||||
end;
|
||||
else
|
||||
begin
|
||||
FTokenText := FTokenText + Buffer[BufferPos];
|
||||
Inc(BufferPos);
|
||||
end;
|
||||
'<': // either an unclosed tag or unescaped '<' in text; attempt recovery
|
||||
begin
|
||||
// TODO: this check is hardly complete, probably must also check if
|
||||
// tag name is followed by legal attributes.
|
||||
if CheckForName(FTokenText) then
|
||||
EnterNewScannerContext(scUnknown) // assume unclosed tag
|
||||
else if (FTokenText <> '') and (FTokenText[1] <> '!') then
|
||||
begin
|
||||
Insert('<', FTokenText, 1); // assume plaintext
|
||||
FScannerContext := scText;
|
||||
EnterNewScannerContext(scUnknown);
|
||||
end
|
||||
else
|
||||
begin // in comment, ignore
|
||||
FTokenText := FTokenText + Buffer[BufferPos];
|
||||
Inc(BufferPos);
|
||||
end;
|
||||
end;
|
||||
else
|
||||
FTokenText := FTokenText + Buffer[BufferPos];
|
||||
Inc(BufferPos);
|
||||
end;
|
||||
end;
|
||||
end; // case ScannerContext of
|
||||
end; // while not endOfBuffer
|
||||
end;
|
||||
end;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user