mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-29 13:21:35 +02:00
* The second part of changes suggested by Maris Janis Vasilevskis (Mantis #22434):
* Parse comments and script/style elements using dedicated scanner states * Ignore markup in comments * Ignore unescaped '&' in script/style elements * Additionally, report comments via OnComment event * Reverted r17003 since it is entirely replaced by above changes. git-svn-id: trunk@33250 -
This commit is contained in:
parent
f01edc0a57
commit
5cc8ddfbf1
@ -42,7 +42,10 @@ type
|
||||
scWhitespace, // within whitespace
|
||||
scText, // within text
|
||||
scEntityReference, // within entity reference ("&...;")
|
||||
scTag); // within a start tag or end tag
|
||||
scTag, // within a start tag or end tag
|
||||
scComment,
|
||||
scScript
|
||||
);
|
||||
|
||||
THTMLReader = class(TSAXReader)
|
||||
private
|
||||
@ -51,6 +54,8 @@ type
|
||||
FScannerContext: THTMLScannerContext;
|
||||
FTokenText: SAXString;
|
||||
FRawTokenText: string;
|
||||
FScriptEndTag: string;
|
||||
FScriptEndMatchPos: Integer;
|
||||
FCurStringValueDelimiter: Char;
|
||||
FAttrNameRead: Boolean;
|
||||
FStack: array of THTMLElementTag;
|
||||
@ -155,6 +160,8 @@ const
|
||||
var
|
||||
Buffer: array[0..MaxBufferSize - 1] of Char;
|
||||
BufferSize, BufferPos: Integer;
|
||||
len: Integer;
|
||||
ch: Char;
|
||||
begin
|
||||
if not FStarted then
|
||||
begin
|
||||
@ -295,8 +302,62 @@ begin
|
||||
end;
|
||||
else
|
||||
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||
if FRawTokenText='!--' then
|
||||
begin
|
||||
FScannerContext := scComment;
|
||||
FRawTokenText := '';
|
||||
end;
|
||||
Inc(BufferPos);
|
||||
end;
|
||||
scComment:
|
||||
begin
|
||||
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||
Inc(BufferPos);
|
||||
|
||||
if (Buffer[BufferPos-1]='>') then
|
||||
begin
|
||||
len:=length(FRawTokenText);
|
||||
if (len>2) and (FRawTokenText[len-1]='-') and (FRawTokenText[len-2]='-') then
|
||||
begin
|
||||
Delete(FRawTokenText, Length(FRawTokenText)-2, MaxInt);
|
||||
EnterNewScannerContext(scUnknown);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
scScript:
|
||||
begin
|
||||
ch := Buffer[BufferPos];
|
||||
if FScriptEndMatchPos <= Length(FScriptEndTag) then
|
||||
begin
|
||||
if lowercase(ch) = FScriptEndTag[FScriptEndMatchPos] then
|
||||
Inc(FScriptEndMatchPos)
|
||||
else
|
||||
FScriptEndMatchPos := 1;
|
||||
FRawTokenText := FRawTokenText + ch;
|
||||
Inc(BufferPos);
|
||||
end
|
||||
else
|
||||
begin
|
||||
case ch of
|
||||
#9,#10,#13,' ':
|
||||
begin
|
||||
FRawTokenText := FRawTokenText + ch;
|
||||
Inc(BufferPos);
|
||||
Inc(FScriptEndMatchPos);
|
||||
end;
|
||||
'>':
|
||||
begin
|
||||
Inc(BufferPos);
|
||||
Delete(FRawTokenText, Length(FRawTokenText)-FScriptEndMatchPos+2, MaxInt);
|
||||
EnterNewScannerContext(scUnknown);
|
||||
end;
|
||||
else
|
||||
FRawTokenText := FRawTokenText + ch;
|
||||
Inc(BufferPos);
|
||||
FScriptEndMatchPos := 1;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end; // case ScannerContext of
|
||||
end; // while not endOfBuffer
|
||||
end;
|
||||
@ -429,18 +490,6 @@ begin
|
||||
until false;
|
||||
end;
|
||||
|
||||
function RightTrimmedLength(const s: SAXString): Integer;
|
||||
begin
|
||||
result := Length(s);
|
||||
while IsXmlWhitespace(s[result]) do Dec(result);
|
||||
end;
|
||||
|
||||
function TagPos(elTag: THTMLElementTag; s: SAXString): Integer;
|
||||
begin
|
||||
WStrLower(s);
|
||||
Result := Pos(HTMLElementProps[elTag].Name, s);
|
||||
end;
|
||||
|
||||
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
|
||||
var
|
||||
Attr: TSAXAttributes;
|
||||
@ -468,60 +517,62 @@ begin
|
||||
scTag:
|
||||
if Length(TokenText) > 0 then
|
||||
begin
|
||||
{ ignore possibly unescaped markup in SCRIPT and STYLE }
|
||||
if (FNesting > 0) and (FStack[FNesting-1] in [etScript,etStyle]) and
|
||||
not (
|
||||
(TokenText[1] = '/') and
|
||||
(RightTrimmedLength(TokenText)=Length(HTMLElementProps[FStack[FNesting-1]].Name)+1) and
|
||||
(TagPos(FStack[FNesting-1], TokenText) = 2)
|
||||
)
|
||||
and (TokenText[1] <> '!') then
|
||||
Attr := nil;
|
||||
if TokenText[Length(fTokenText)]='/' then // handle xml/xhtml style empty tag
|
||||
begin
|
||||
FTokenText := '<'+FTokenText+'>';
|
||||
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
||||
setlength(fTokenText,length(fTokenText)-1);
|
||||
// Do NOT combine to a single line, as Attr is an output value!
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
AutoClose(TagName);
|
||||
DoStartElement('', TagName, '', Attr);
|
||||
DoEndElement('', TagName, '');
|
||||
end
|
||||
else
|
||||
else if TokenText[1] = '/' then
|
||||
begin
|
||||
Attr := nil;
|
||||
if TokenText[Length(fTokenText)]='/' then // handle xml/xhtml style empty tag
|
||||
begin
|
||||
setlength(fTokenText,length(fTokenText)-1);
|
||||
// Do NOT combine to a single line, as Attr is an output value!
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
AutoClose(TagName);
|
||||
DoStartElement('', TagName, '', Attr);
|
||||
DoEndElement('', TagName, '');
|
||||
end
|
||||
else if TokenText[1] = '/' then
|
||||
begin
|
||||
Delete(FTokenText, 1, 1);
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
elTag := LookupTag(TagName);
|
||||
i := FNesting-1;
|
||||
while (i >= 0) and (FStack[i] <> elTag) and
|
||||
(efEndTagOptional in HTMLElementProps[FStack[i]].Flags) do
|
||||
Dec(i);
|
||||
if (i>=0) and (FStack[i] = elTag) then
|
||||
while FStack[FNesting-1] <> elTag do
|
||||
begin
|
||||
DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
|
||||
namePop;
|
||||
end;
|
||||
Delete(FTokenText, 1, 1);
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
elTag := LookupTag(TagName);
|
||||
i := FNesting-1;
|
||||
while (i >= 0) and (FStack[i] <> elTag) and
|
||||
(efEndTagOptional in HTMLElementProps[FStack[i]].Flags) do
|
||||
Dec(i);
|
||||
if (i>=0) and (FStack[i] = elTag) then
|
||||
while FStack[FNesting-1] <> elTag do
|
||||
begin
|
||||
DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
|
||||
namePop;
|
||||
end;
|
||||
|
||||
DoEndElement('', TagName, '');
|
||||
namePop;
|
||||
end
|
||||
else if TokenText[1] <> '!' then
|
||||
DoEndElement('', TagName, '');
|
||||
namePop;
|
||||
end
|
||||
else if TokenText[1] <> '!' then
|
||||
begin
|
||||
// Do NOT combine to a single line, as Attr is an output value!
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
AutoClose(TagName);
|
||||
namePush(TagName);
|
||||
DoStartElement('', TagName, '', Attr);
|
||||
if FStack[FNesting-1] in [etScript,etStyle] then
|
||||
begin
|
||||
// Do NOT combine to a single line, as Attr is an output value!
|
||||
TagName := SplitTagString(TokenText, Attr);
|
||||
AutoClose(TagName);
|
||||
namePush(TagName);
|
||||
DoStartElement('', TagName, '', Attr);
|
||||
NewContext := scScript;
|
||||
FScriptEndTag := '</' + HTMLElementProps[FStack[FNesting-1]].Name;
|
||||
FScriptEndMatchPos := 1;
|
||||
end;
|
||||
if Assigned(Attr) then
|
||||
Attr.Free;
|
||||
end;
|
||||
if Assigned(Attr) then
|
||||
Attr.Free;
|
||||
end;
|
||||
scComment:
|
||||
begin
|
||||
DoComment(PSAXChar(TokenText), 0, Length(TokenText));
|
||||
end;
|
||||
scScript:
|
||||
begin
|
||||
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
||||
DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
|
||||
namePop;
|
||||
FScriptEndTag := '';
|
||||
end;
|
||||
end;
|
||||
FScannerContext := NewContext;
|
||||
|
Loading…
Reference in New Issue
Block a user