mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-15 17:49:25 +02:00
htmldefs.pp:
+ Define elements which may omit end-tag (except HTML, HEAD and BODY which may also omit the start-tag) + Define which elements may close other elements (modelled after libxml2). * DIV may have #PCDATA content. sax_html.pp: * Improve the parser to report startElement/endElement events properly. Should resolve Mantis #14073 and related element hierarchy issues. git-svn-id: trunk@13357 -
This commit is contained in:
parent
8c7f6fb7ac
commit
fcd96805fa
@ -97,8 +97,9 @@ type
|
|||||||
efPCDATAContent, // may have PCDATA content
|
efPCDATAContent, // may have PCDATA content
|
||||||
efPreserveWhitespace, // preserve all whitespace
|
efPreserveWhitespace, // preserve all whitespace
|
||||||
efDeprecated, // can be dropped in future versions
|
efDeprecated, // can be dropped in future versions
|
||||||
efNoChecks // Checks (attributes,subtags,...) can only be implemented in descendants
|
efNoChecks, // Checks (attributes,subtags,...) can only be implemented in descendants
|
||||||
);
|
efEndTagOptional
|
||||||
|
);
|
||||||
THTMLElementFlags = set of THTMLElementFlag;
|
THTMLElementFlags = set of THTMLElementFlag;
|
||||||
|
|
||||||
PHTMLElementProps = ^THTMLElementProps;
|
PHTMLElementProps = ^THTMLElementProps;
|
||||||
@ -184,10 +185,10 @@ const
|
|||||||
(Name: 'col'; Flags: [];
|
(Name: 'col'; Flags: [];
|
||||||
Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
|
Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
|
||||||
|
|
||||||
(Name: 'colgroup'; Flags: [efSubelementContent];
|
(Name: 'colgroup'; Flags: [efSubelementContent, efEndTagOptional];
|
||||||
Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
|
Attributes: atsattrs+atscellhalign+[atvalign,atspan,atwidth]),
|
||||||
|
|
||||||
(Name: 'dd'; Flags: efSubcontent; Attributes: atsattrs),
|
(Name: 'dd'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'del'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
|
(Name: 'del'; Flags: [efSubelementContent]; Attributes: atsattrs+[atcite,atdatetime]),
|
||||||
|
|
||||||
@ -195,11 +196,11 @@ const
|
|||||||
|
|
||||||
(Name: 'dir'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
|
(Name: 'dir'; Flags: [efSubelementContent,efDeprecated]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'div'; Flags: [efSubelementContent]; Attributes: atsattrs),
|
(Name: 'div'; Flags: efSubContent; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'dl'; Flags: [efSubelementContent]; Attributes: atsattrs),
|
(Name: 'dl'; Flags: [efSubelementContent]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'dt'; Flags: [efPCDataContent]; Attributes: atsattrs),
|
(Name: 'dt'; Flags: [efPCDataContent, efEndTagOptional]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'em'; Flags: efSubcontent; Attributes: atsattrs),
|
(Name: 'em'; Flags: efSubcontent; Attributes: atsattrs),
|
||||||
|
|
||||||
@ -260,7 +261,7 @@ const
|
|||||||
|
|
||||||
(Name: 'legend'; Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]),
|
(Name: 'legend'; Flags: efSubcontent; Attributes: atsattrs+[ataccesskey]),
|
||||||
|
|
||||||
(Name: 'li'; Flags: efSubcontent; Attributes: atsattrs),
|
(Name: 'li'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'link'; Flags: [];
|
(Name: 'link'; Flags: [];
|
||||||
Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]),
|
Attributes: atsattrs+[atcharset,athref,athreflang,attype,atrel,atrev,atmedia]),
|
||||||
@ -283,10 +284,10 @@ const
|
|||||||
|
|
||||||
(Name: 'optgroup'; Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]),
|
(Name: 'optgroup'; Flags: efSubcontent; Attributes: atsattrs+[atdisabled,atlabel]),
|
||||||
|
|
||||||
(Name: 'option'; Flags: efSubcontent;
|
(Name: 'option'; Flags: efSubcontent+[efEndTagOptional];
|
||||||
Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]),
|
Attributes: atsattrs+[atselected,atdisabled,atlabel,atvalue]),
|
||||||
|
|
||||||
(Name: 'p'; Flags: efSubcontent; Attributes: atsattrs),
|
(Name: 'p'; Flags: efSubcontent+[efEndTagOptional]; Attributes: atsattrs),
|
||||||
|
|
||||||
(Name: 'param'; Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]),
|
(Name: 'param'; Flags: []; Attributes: [atid,atname,atvalue,atvaluetype,attype]),
|
||||||
|
|
||||||
@ -324,23 +325,23 @@ const
|
|||||||
|
|
||||||
(Name: 'tbody'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
(Name: 'tbody'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
||||||
|
|
||||||
(Name: 'td'; Flags: efSubcontent;
|
(Name: 'td'; Flags: efSubcontent+[efEndTagOptional];
|
||||||
Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
|
Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
|
||||||
|
|
||||||
(Name: 'textarea'; Flags: [efPCDATAContent];
|
(Name: 'textarea'; Flags: [efPCDATAContent];
|
||||||
Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex,
|
Attributes: atsattrs+[atname,atrows,atcols,atdisabled,atreadonly,attabindex,
|
||||||
ataccesskey,atonfocus,atonblur,atonselect,atonchange]),
|
ataccesskey,atonfocus,atonblur,atonselect,atonchange]),
|
||||||
|
|
||||||
(Name: 'tfoot'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
(Name: 'tfoot'; Flags: [efSubelementContent,efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
||||||
|
|
||||||
(Name: 'th'; Flags: efSubcontent;
|
(Name: 'th'; Flags: efSubcontent+[efEndTagOptional];
|
||||||
Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
|
Attributes: atsattrs+atscellhalign+[atvalign,atabbr,ataxis,atheaders,atscope,atrowspan,atcolspan]),
|
||||||
|
|
||||||
(Name: 'thead'; Flags: [efSubelementContent]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
(Name: 'thead'; Flags: [efSubelementContent, efEndTagOptional]; Attributes: atsattrs+atscellhalign+[atvalign]),
|
||||||
|
|
||||||
(Name: 'title'; Flags: efSubcontent; Attributes: atsi18n),
|
(Name: 'title'; Flags: efSubcontent; Attributes: atsi18n),
|
||||||
|
|
||||||
(Name: 'tr'; Flags: [efSubelementContent];
|
(Name: 'tr'; Flags: [efSubelementContent, efEndTagOptional];
|
||||||
Attributes: atsattrs+atscellhalign+[atvalign]),
|
Attributes: atsattrs+atscellhalign+[atvalign]),
|
||||||
|
|
||||||
(Name: 'tt'; Flags: efSubcontent; Attributes: atsattrs),
|
(Name: 'tt'; Flags: efSubcontent; Attributes: atsattrs),
|
||||||
@ -559,12 +560,81 @@ const
|
|||||||
function ResolveHTMLEntityReference(const Name: String;
|
function ResolveHTMLEntityReference(const Name: String;
|
||||||
var Entity: WideChar): Boolean;
|
var Entity: WideChar): Boolean;
|
||||||
|
|
||||||
|
function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
|
||||||
|
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses SysUtils;
|
uses SysUtils;
|
||||||
|
|
||||||
|
{ Define which elements auto-close other elements, modelled after libxml2.
|
||||||
|
This is an array of variable-length lists, each terminated by etUnknown.
|
||||||
|
Indices to first element of each list are provided by AutoCloseIndex array,
|
||||||
|
which *must* be updated after any change. }
|
||||||
|
const
|
||||||
|
AutoCloseTab: array[0..277] of THTMLElementTag = (
|
||||||
|
|
||||||
|
etform, etform, etp, ethr, eth1, eth2, eth3, eth4, eth5, eth6,
|
||||||
|
etdl, etul, etol, etmenu, etdir, etaddress, etpre,
|
||||||
|
ethead, etUnknown,
|
||||||
|
ethead, etp, etUnknown,
|
||||||
|
ettitle, etp, etUnknown,
|
||||||
|
etbody, ethead, etstyle, etlink, ettitle, etp, etUnknown,
|
||||||
|
etframeset, ethead, etstyle, etlink, ettitle, etp, etUnknown,
|
||||||
|
etli, etp, eth1, eth2, eth3, eth4, eth5, eth6, etdl, etaddress,
|
||||||
|
etpre, ethead, etli, etUnknown,
|
||||||
|
ethr, etp, ethead, etUnknown,
|
||||||
|
eth1, etp, ethead, etUnknown,
|
||||||
|
eth2, etp, ethead, etUnknown,
|
||||||
|
eth3, etp, ethead, etUnknown,
|
||||||
|
eth4, etp, ethead, etUnknown,
|
||||||
|
eth5, etp, ethead, etUnknown,
|
||||||
|
eth6, etp, ethead, etUnknown,
|
||||||
|
etdir, etp, ethead, etUnknown,
|
||||||
|
etaddress, etp, ethead, etul, etUnknown,
|
||||||
|
etpre, etp, ethead, etul, etUnknown,
|
||||||
|
etblockquote, etp, ethead, etUnknown,
|
||||||
|
etdl, etp, etdt, etmenu, etdir, etaddress, etpre,
|
||||||
|
ethead, etUnknown,
|
||||||
|
etdt, etp, etmenu, etdir, etaddress, etpre,
|
||||||
|
ethead, etdd, etUnknown,
|
||||||
|
etdd, etp, etmenu, etdir, etaddress, etpre,
|
||||||
|
ethead, etdt, etUnknown,
|
||||||
|
etul, etp, ethead, etol, etmenu, etdir, etaddress, etpre, etUnknown,
|
||||||
|
etol, etp, ethead, etul, etUnknown,
|
||||||
|
etmenu, etp, ethead, etul, etUnknown,
|
||||||
|
etp, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etUnknown,
|
||||||
|
etdiv, etp, ethead, etUnknown,
|
||||||
|
etnoscript, etp, ethead, etUnknown,
|
||||||
|
etcenter, etfont, etb, eti, etp, ethead, etUnknown,
|
||||||
|
eta, eta, etUnknown,
|
||||||
|
etcaption, etp, etUnknown,
|
||||||
|
etcolgroup, etcaption, etcolgroup, etcol, etp, etUnknown,
|
||||||
|
etcol, etcaption, etcol, etp, etUnknown,
|
||||||
|
ettable, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6, etpre,
|
||||||
|
eta, etUnknown,
|
||||||
|
etth, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
|
||||||
|
ettd, etth, ettd, etp, etspan, etfont, eta, etb, eti, etu, etUnknown,
|
||||||
|
ettr, etth, ettd, ettr, etcaption, etcol, etcolgroup, etp, etUnknown,
|
||||||
|
etthead, etcaption, etcol, etcolgroup, etUnknown,
|
||||||
|
ettfoot, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
|
||||||
|
ettbody, etp, etUnknown,
|
||||||
|
ettbody, etth, ettd, ettr, etcaption, etcol, etcolgroup, etthead,
|
||||||
|
ettfoot, ettbody, etp, etUnknown,
|
||||||
|
etoptgroup, etoption, etUnknown,
|
||||||
|
etoption, etoption, etUnknown,
|
||||||
|
etfieldset, etlegend, etp, ethead, eth1, eth2, eth3, eth4, eth5, eth6,
|
||||||
|
etpre, eta, etUnknown,
|
||||||
|
etUnknown);
|
||||||
|
|
||||||
|
AutoCloseIndex: array[0..40] of Integer = (
|
||||||
|
0, 19, 22, 25, 32, 39, 53, 57, 61, 65, 69,
|
||||||
|
73, 77, 81, 85, 90, 95, 99, 108, 117, 126,
|
||||||
|
135, 140, 145, 155, 159, 163, 170, 173, 176,
|
||||||
|
182, 187, 199, 210, 221, 230, 235, 246, 258,
|
||||||
|
261, 264
|
||||||
|
);
|
||||||
|
|
||||||
function ResolveHTMLEntityReference(const Name: String;
|
function ResolveHTMLEntityReference(const Name: String;
|
||||||
var Entity: WideChar): Boolean;
|
var Entity: WideChar): Boolean;
|
||||||
var
|
var
|
||||||
@ -639,4 +709,26 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
function IsAutoClose(NewTag, OldTag: THTMLElementTag): Boolean;
|
||||||
|
var
|
||||||
|
i, j: Integer;
|
||||||
|
begin
|
||||||
|
Result := False;
|
||||||
|
for i := 0 to high(AutoCloseIndex) do
|
||||||
|
if NewTag = AutoCloseTab[AutoCloseIndex[i]] then
|
||||||
|
begin
|
||||||
|
j := AutoCloseIndex[i]+1;
|
||||||
|
while AutoCloseTab[j] <> etUnknown do
|
||||||
|
begin
|
||||||
|
if AutoCloseTab[j] = OldTag then
|
||||||
|
begin
|
||||||
|
Result := True;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
Inc(j);
|
||||||
|
end;
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
@ -52,6 +52,11 @@ type
|
|||||||
FTokenText: SAXString;
|
FTokenText: SAXString;
|
||||||
FCurStringValueDelimiter: Char;
|
FCurStringValueDelimiter: Char;
|
||||||
FAttrNameRead: Boolean;
|
FAttrNameRead: Boolean;
|
||||||
|
FStack: array of THTMLElementTag;
|
||||||
|
FNesting: Integer;
|
||||||
|
procedure AutoClose(const aName: string);
|
||||||
|
procedure NamePush(const aName: string);
|
||||||
|
procedure NamePop;
|
||||||
protected
|
protected
|
||||||
procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
|
procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
|
||||||
public
|
public
|
||||||
@ -122,6 +127,7 @@ constructor THTMLReader.Create;
|
|||||||
begin
|
begin
|
||||||
inherited Create;
|
inherited Create;
|
||||||
FScannerContext := scUnknown;
|
FScannerContext := scUnknown;
|
||||||
|
SetLength(FStack, 16);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
destructor THTMLReader.Destroy;
|
destructor THTMLReader.Destroy;
|
||||||
@ -265,89 +271,135 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
|
function LookupTag(const aName: string): THTMLElementTag;
|
||||||
|
var
|
||||||
function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
|
j: THTMLElementTag;
|
||||||
var
|
begin
|
||||||
i, j: Integer;
|
for j := Low(THTMLElementTag) to High(THTMLElementTag) do
|
||||||
AttrName: String;
|
if SameText(HTMLElementProps[j].Name, aName) then
|
||||||
ValueDelimiter: Char;
|
|
||||||
DoIncJ: Boolean;
|
|
||||||
begin
|
|
||||||
Attr := nil;
|
|
||||||
i := Pos(' ', s);
|
|
||||||
if i <= 0 then
|
|
||||||
Result := LowerCase(s)
|
|
||||||
else
|
|
||||||
begin
|
begin
|
||||||
Result := LowerCase(Copy(s, 1, i - 1));
|
Result := j;
|
||||||
Attr := TSAXAttributes.Create;
|
Exit;
|
||||||
|
end;
|
||||||
|
Result := etUnknown;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure THTMLReader.AutoClose(const aName: string);
|
||||||
|
var
|
||||||
|
newTag: THTMLElementTag;
|
||||||
|
begin
|
||||||
|
newTag := LookupTag(aName);
|
||||||
|
while (FNesting > 0) and IsAutoClose(newTag, FStack[FNesting-1]) do
|
||||||
|
begin
|
||||||
|
DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
|
||||||
|
namePop;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure THTMLReader.NamePush(const aName: string);
|
||||||
|
var
|
||||||
|
tag: THTMLElementTag;
|
||||||
|
begin
|
||||||
|
tag := LookupTag(aName);
|
||||||
|
if FNesting >= Length(FStack) then
|
||||||
|
SetLength(FStack, FNesting * 2);
|
||||||
|
FStack[FNesting] := tag;
|
||||||
|
Inc(FNesting);
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure THTMLReader.NamePop;
|
||||||
|
begin
|
||||||
|
if FNesting <= 0 then
|
||||||
|
Exit;
|
||||||
|
Dec(FNesting);
|
||||||
|
FStack[FNesting] := etUnknown;
|
||||||
|
end;
|
||||||
|
|
||||||
|
function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
|
||||||
|
var
|
||||||
|
i, j: Integer;
|
||||||
|
AttrName: String;
|
||||||
|
ValueDelimiter: Char;
|
||||||
|
DoIncJ: Boolean;
|
||||||
|
begin
|
||||||
|
Attr := nil;
|
||||||
|
i := Pos(' ', s);
|
||||||
|
if i <= 0 then
|
||||||
|
Result := LowerCase(s)
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
Result := LowerCase(Copy(s, 1, i - 1));
|
||||||
|
Attr := TSAXAttributes.Create;
|
||||||
|
Inc(i);
|
||||||
|
|
||||||
|
while (i <= Length(s)) and (s[i] in WhitespaceChars) do
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
while (i <= Length(s)) and (s[i] in WhitespaceChars) do
|
SetLength(AttrName, 0);
|
||||||
Inc(i);
|
j := i;
|
||||||
|
|
||||||
SetLength(AttrName, 0);
|
while j <= Length(s) do
|
||||||
j := i;
|
if s[j] = '=' then
|
||||||
|
begin
|
||||||
while j <= Length(s) do
|
AttrName := LowerCase(Copy(s, i, j - i));
|
||||||
if s[j] = '=' then
|
Inc(j);
|
||||||
|
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
||||||
begin
|
begin
|
||||||
AttrName := LowerCase(Copy(s, i, j - i));
|
ValueDelimiter := s[j];
|
||||||
Inc(j);
|
Inc(j);
|
||||||
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
|
||||||
begin
|
|
||||||
ValueDelimiter := s[j];
|
|
||||||
Inc(j);
|
|
||||||
end else
|
|
||||||
ValueDelimiter := #0;
|
|
||||||
i := j;
|
|
||||||
DoIncJ := False;
|
|
||||||
while j <= Length(s) do
|
|
||||||
if ValueDelimiter = #0 then
|
|
||||||
if s[j] in WhitespaceChars then
|
|
||||||
break
|
|
||||||
else
|
|
||||||
Inc(j)
|
|
||||||
else if s[j] = ValueDelimiter then
|
|
||||||
begin
|
|
||||||
DoIncJ := True;
|
|
||||||
break
|
|
||||||
end else
|
|
||||||
Inc(j);
|
|
||||||
|
|
||||||
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
|
||||||
|
|
||||||
if DoIncJ then
|
|
||||||
Inc(j);
|
|
||||||
|
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
|
||||||
Inc(j);
|
|
||||||
i := j;
|
|
||||||
end
|
|
||||||
else if s[j] in WhitespaceChars then
|
|
||||||
begin
|
|
||||||
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
|
||||||
Inc(j);
|
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
|
||||||
Inc(j);
|
|
||||||
i := j;
|
|
||||||
end else
|
end else
|
||||||
Inc(j);
|
ValueDelimiter := #0;
|
||||||
end;
|
i := j;
|
||||||
end;
|
DoIncJ := False;
|
||||||
|
while j <= Length(s) do
|
||||||
|
if ValueDelimiter = #0 then
|
||||||
|
if s[j] in WhitespaceChars then
|
||||||
|
break
|
||||||
|
else
|
||||||
|
Inc(j)
|
||||||
|
else if s[j] = ValueDelimiter then
|
||||||
|
begin
|
||||||
|
DoIncJ := True;
|
||||||
|
break
|
||||||
|
end else
|
||||||
|
Inc(j);
|
||||||
|
|
||||||
|
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
||||||
|
|
||||||
|
if DoIncJ then
|
||||||
|
Inc(j);
|
||||||
|
|
||||||
|
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
||||||
|
Inc(j);
|
||||||
|
i := j;
|
||||||
|
end
|
||||||
|
else if s[j] in WhitespaceChars then
|
||||||
|
begin
|
||||||
|
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
||||||
|
Inc(j);
|
||||||
|
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
||||||
|
Inc(j);
|
||||||
|
i := j;
|
||||||
|
end else
|
||||||
|
Inc(j);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
|
||||||
var
|
var
|
||||||
Attr: TSAXAttributes;
|
Attr: TSAXAttributes;
|
||||||
TagName: String;
|
TagName: String;
|
||||||
Found: Boolean;
|
Found: Boolean;
|
||||||
Ent: SAXChar;
|
Ent: SAXChar;
|
||||||
i: Integer;
|
i: Integer;
|
||||||
|
elTag: THTMLElementTag;
|
||||||
begin
|
begin
|
||||||
case ScannerContext of
|
case ScannerContext of
|
||||||
scWhitespace:
|
scWhitespace:
|
||||||
DoIgnorableWhitespace(PSAXChar(TokenText), 1, Length(TokenText));
|
if (FNesting > 0) and (efPCDataContent in HTMLElementProps[FStack[FNesting-1]].Flags) then
|
||||||
|
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText))
|
||||||
|
else
|
||||||
|
DoIgnorableWhitespace(PSAXChar(TokenText), 0, Length(TokenText));
|
||||||
scText:
|
scText:
|
||||||
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
||||||
scEntityReference:
|
scEntityReference:
|
||||||
@ -382,18 +434,35 @@ begin
|
|||||||
setlength(fTokenText,length(fTokenText)-1);
|
setlength(fTokenText,length(fTokenText)-1);
|
||||||
// Do NOT combine to a single line, as Attr is an output value!
|
// Do NOT combine to a single line, as Attr is an output value!
|
||||||
TagName := SplitTagString(TokenText, Attr);
|
TagName := SplitTagString(TokenText, Attr);
|
||||||
|
AutoClose(TagName);
|
||||||
DoStartElement('', TagName, '', Attr);
|
DoStartElement('', TagName, '', Attr);
|
||||||
DoEndElement('', TagName, '');
|
DoEndElement('', TagName, '');
|
||||||
end
|
end
|
||||||
else if TokenText[1] = '/' then
|
else if TokenText[1] = '/' then
|
||||||
begin
|
begin
|
||||||
DoEndElement('',
|
Delete(FTokenText, 1, 1);
|
||||||
SplitTagString(Copy(TokenText, 2, Length(TokenText)), Attr), '');
|
TagName := SplitTagString(TokenText, Attr);
|
||||||
|
elTag := LookupTag(TagName);
|
||||||
|
i := FNesting-1;
|
||||||
|
while (i >= 0) and (FStack[i] <> elTag) and
|
||||||
|
(efEndTagOptional in HTMLElementProps[FStack[i]].Flags) do
|
||||||
|
Dec(i);
|
||||||
|
if (i>=0) and (FStack[i] = elTag) then
|
||||||
|
while FStack[FNesting-1] <> elTag do
|
||||||
|
begin
|
||||||
|
DoEndElement('', HTMLElementProps[FStack[FNesting-1]].Name, '');
|
||||||
|
namePop;
|
||||||
|
end;
|
||||||
|
|
||||||
|
DoEndElement('', TagName, '');
|
||||||
|
namePop;
|
||||||
end
|
end
|
||||||
else if TokenText[1] <> '!' then
|
else if TokenText[1] <> '!' then
|
||||||
begin
|
begin
|
||||||
// Do NOT combine to a single line, as Attr is an output value!
|
// Do NOT combine to a single line, as Attr is an output value!
|
||||||
TagName := SplitTagString(TokenText, Attr);
|
TagName := SplitTagString(TokenText, Attr);
|
||||||
|
AutoClose(TagName);
|
||||||
|
namePush(TagName);
|
||||||
DoStartElement('', TagName, '', Attr);
|
DoStartElement('', TagName, '', Attr);
|
||||||
end;
|
end;
|
||||||
if Assigned(Attr) then
|
if Assigned(Attr) then
|
||||||
@ -427,16 +496,7 @@ end;
|
|||||||
constructor THTMLToDOMConverter.CreateFragment(AReader: THTMLReader;
|
constructor THTMLToDOMConverter.CreateFragment(AReader: THTMLReader;
|
||||||
AFragmentRoot: TDOMNode);
|
AFragmentRoot: TDOMNode);
|
||||||
begin
|
begin
|
||||||
inherited Create;
|
Create(AReader, AFragmentRoot.OwnerDocument);
|
||||||
FReader := AReader;
|
|
||||||
FReader.OnCharacters := @ReaderCharacters;
|
|
||||||
FReader.OnIgnorableWhitespace := @ReaderIgnorableWhitespace;
|
|
||||||
FReader.OnSkippedEntity := @ReaderSkippedEntity;
|
|
||||||
FReader.OnStartElement := @ReaderStartElement;
|
|
||||||
FReader.OnEndElement := @ReaderEndElement;
|
|
||||||
FDocument := AFragmentRoot.OwnerDocument;
|
|
||||||
FElementStack := TList.Create;
|
|
||||||
FNodeBuffer := TList.Create;
|
|
||||||
FragmentRoot := AFragmentRoot;
|
FragmentRoot := AFragmentRoot;
|
||||||
IsFragmentMode := True;
|
IsFragmentMode := True;
|
||||||
end;
|
end;
|
||||||
|
Loading…
Reference in New Issue
Block a user