mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-11 10:29:21 +02:00
* HTML parser: in case of malformed input, do not create attributes with invalid names (Mantis #16916).
* Along the way, eliminated one layer of useless converting strings from wide to ansi and back. git-svn-id: trunk@15564 -
This commit is contained in:
parent
119277166e
commit
7e2f713d09
@ -31,7 +31,7 @@ unit SAX_HTML;
|
|||||||
|
|
||||||
interface
|
interface
|
||||||
|
|
||||||
uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs;
|
uses SysUtils, Classes, SAX, DOM, DOM_HTML,htmldefs,xmlutils;
|
||||||
|
|
||||||
type
|
type
|
||||||
|
|
||||||
@ -54,8 +54,8 @@ type
|
|||||||
FAttrNameRead: Boolean;
|
FAttrNameRead: Boolean;
|
||||||
FStack: array of THTMLElementTag;
|
FStack: array of THTMLElementTag;
|
||||||
FNesting: Integer;
|
FNesting: Integer;
|
||||||
procedure AutoClose(const aName: string);
|
procedure AutoClose(const aName: SAXString);
|
||||||
procedure NamePush(const aName: string);
|
procedure NamePush(const aName: SAXString);
|
||||||
procedure NamePop;
|
procedure NamePop;
|
||||||
protected
|
protected
|
||||||
procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
|
procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
|
||||||
@ -271,12 +271,14 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function LookupTag(const aName: string): THTMLElementTag;
|
function LookupTag(const aName: SAXString): THTMLElementTag;
|
||||||
var
|
var
|
||||||
j: THTMLElementTag;
|
j: THTMLElementTag;
|
||||||
|
ansiName: string;
|
||||||
begin
|
begin
|
||||||
|
ansiName := aName;
|
||||||
for j := Low(THTMLElementTag) to High(THTMLElementTag) do
|
for j := Low(THTMLElementTag) to High(THTMLElementTag) do
|
||||||
if SameText(HTMLElementProps[j].Name, aName) then
|
if SameText(HTMLElementProps[j].Name, ansiName) then
|
||||||
begin
|
begin
|
||||||
Result := j;
|
Result := j;
|
||||||
Exit;
|
Exit;
|
||||||
@ -284,7 +286,7 @@ begin
|
|||||||
Result := etUnknown;
|
Result := etUnknown;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure THTMLReader.AutoClose(const aName: string);
|
procedure THTMLReader.AutoClose(const aName: SAXString);
|
||||||
var
|
var
|
||||||
newTag: THTMLElementTag;
|
newTag: THTMLElementTag;
|
||||||
begin
|
begin
|
||||||
@ -296,7 +298,7 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure THTMLReader.NamePush(const aName: string);
|
procedure THTMLReader.NamePush(const aName: SAXString);
|
||||||
var
|
var
|
||||||
tag: THTMLElementTag;
|
tag: THTMLElementTag;
|
||||||
begin
|
begin
|
||||||
@ -315,27 +317,27 @@ begin
|
|||||||
FStack[FNesting] := etUnknown;
|
FStack[FNesting] := etUnknown;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
|
function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
|
||||||
var
|
var
|
||||||
i, j: Integer;
|
i, j: Integer;
|
||||||
AttrName: String;
|
AttrName: SAXString;
|
||||||
ValueDelimiter: Char;
|
ValueDelimiter: WideChar;
|
||||||
DoIncJ: Boolean;
|
DoIncJ: Boolean;
|
||||||
begin
|
begin
|
||||||
Attr := nil;
|
Attr := nil;
|
||||||
i := 1;
|
i := 1;
|
||||||
while (i <= Length(s)) and not (s[i] in WhitespaceChars) do
|
while (i <= Length(s)) and not IsXMLWhitespace(s[i]) do
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
if i = Length(s) then
|
if i = Length(s) then
|
||||||
Result := LowerCase(s)
|
Result := s
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
Result := LowerCase(Copy(s, 1, i - 1));
|
Result := Copy(s, 1, i - 1);
|
||||||
Attr := TSAXAttributes.Create;
|
Attr := TSAXAttributes.Create;
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
while (i <= Length(s)) and (s[i] in WhitespaceChars) do
|
while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
SetLength(AttrName, 0);
|
SetLength(AttrName, 0);
|
||||||
@ -344,7 +346,8 @@ begin
|
|||||||
while j <= Length(s) do
|
while j <= Length(s) do
|
||||||
if s[j] = '=' then
|
if s[j] = '=' then
|
||||||
begin
|
begin
|
||||||
AttrName := LowerCase(Copy(s, i, j - i));
|
AttrName := Copy(s, i, j - i);
|
||||||
|
WStrLower(AttrName);
|
||||||
Inc(j);
|
Inc(j);
|
||||||
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
||||||
begin
|
begin
|
||||||
@ -356,7 +359,7 @@ begin
|
|||||||
DoIncJ := False;
|
DoIncJ := False;
|
||||||
while j <= Length(s) do
|
while j <= Length(s) do
|
||||||
if ValueDelimiter = #0 then
|
if ValueDelimiter = #0 then
|
||||||
if s[j] in WhitespaceChars then
|
if IsXMLWhitespace(s[j]) then
|
||||||
break
|
break
|
||||||
else
|
else
|
||||||
Inc(j)
|
Inc(j)
|
||||||
@ -367,31 +370,34 @@ begin
|
|||||||
end else
|
end else
|
||||||
Inc(j);
|
Inc(j);
|
||||||
|
|
||||||
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
if IsXMLName(AttrName) then
|
||||||
|
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
||||||
|
|
||||||
if DoIncJ then
|
if DoIncJ then
|
||||||
Inc(j);
|
Inc(j);
|
||||||
|
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
||||||
Inc(j);
|
Inc(j);
|
||||||
i := j;
|
i := j;
|
||||||
end
|
end
|
||||||
else if s[j] in WhitespaceChars then
|
else if IsXMLWhitespace(s[j]) then
|
||||||
begin
|
begin
|
||||||
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
if IsXMLName(@s[i], j-i) then
|
||||||
|
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
||||||
Inc(j);
|
Inc(j);
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
||||||
Inc(j);
|
Inc(j);
|
||||||
i := j;
|
i := j;
|
||||||
end else
|
end else
|
||||||
Inc(j);
|
Inc(j);
|
||||||
end;
|
end;
|
||||||
|
WStrLower(result);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
|
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
|
||||||
var
|
var
|
||||||
Attr: TSAXAttributes;
|
Attr: TSAXAttributes;
|
||||||
TagName: String;
|
TagName: SAXString;
|
||||||
Ent: SAXChar;
|
Ent: SAXChar;
|
||||||
i: Integer;
|
i: Integer;
|
||||||
elTag: THTMLElementTag;
|
elTag: THTMLElementTag;
|
||||||
@ -502,30 +508,22 @@ end;
|
|||||||
procedure THTMLToDOMConverter.ReaderCharacters(Sender: TObject;
|
procedure THTMLToDOMConverter.ReaderCharacters(Sender: TObject;
|
||||||
const ch: PSAXChar; Start, Count: Integer);
|
const ch: PSAXChar; Start, Count: Integer);
|
||||||
var
|
var
|
||||||
s: SAXString;
|
|
||||||
NodeInfo: THTMLNodeInfo;
|
NodeInfo: THTMLNodeInfo;
|
||||||
begin
|
begin
|
||||||
SetLength(s, Count);
|
|
||||||
Move(ch^, s[1], Count * SizeOf(SAXChar));
|
|
||||||
|
|
||||||
NodeInfo := THTMLNodeInfo.Create;
|
NodeInfo := THTMLNodeInfo.Create;
|
||||||
NodeInfo.NodeType := ntText;
|
NodeInfo.NodeType := ntText;
|
||||||
NodeInfo.DOMNode := FDocument.CreateTextNode(s);
|
NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
|
||||||
FNodeBuffer.Add(NodeInfo);
|
FNodeBuffer.Add(NodeInfo);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure THTMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
|
procedure THTMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
|
||||||
const ch: PSAXChar; Start, Count: Integer);
|
const ch: PSAXChar; Start, Count: Integer);
|
||||||
var
|
var
|
||||||
s: SAXString;
|
|
||||||
NodeInfo: THTMLNodeInfo;
|
NodeInfo: THTMLNodeInfo;
|
||||||
begin
|
begin
|
||||||
SetLength(s, Count);
|
|
||||||
Move(ch^, s[1], Count * SizeOf(SAXChar));
|
|
||||||
|
|
||||||
NodeInfo := THTMLNodeInfo.Create;
|
NodeInfo := THTMLNodeInfo.Create;
|
||||||
NodeInfo.NodeType := ntWhitespace;
|
NodeInfo.NodeType := ntWhitespace;
|
||||||
NodeInfo.DOMNode := FDocument.CreateTextNode(s);
|
NodeInfo.DOMNode := FDocument.CreateTextNodeBuf(ch, Count, False);
|
||||||
FNodeBuffer.Add(NodeInfo);
|
FNodeBuffer.Add(NodeInfo);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ function IsXmlWhiteSpace(c: WideChar): Boolean;
|
|||||||
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
|
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
|
||||||
{ beware, works in ASCII range only }
|
{ beware, works in ASCII range only }
|
||||||
function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
|
function WStrLIComp(S1, S2: PWideChar; Len: Integer): Integer;
|
||||||
|
procedure WStrLower(var S: WideString);
|
||||||
|
|
||||||
type
|
type
|
||||||
TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
|
TXMLVersion = (xmlVersionUnknown, xmlVersion10, xmlVersion11);
|
||||||
@ -385,6 +386,15 @@ begin
|
|||||||
result := c1 - c2;
|
result := c1 - c2;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure WStrLower(var S: WideString);
|
||||||
|
var
|
||||||
|
i: Integer;
|
||||||
|
begin
|
||||||
|
for i := 1 to Length(S) do
|
||||||
|
if (S[i] >= 'A') and (S[i] <= 'Z') then
|
||||||
|
Inc(word(S[i]), 32);
|
||||||
|
end;
|
||||||
|
|
||||||
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
|
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
|
||||||
begin
|
begin
|
||||||
Result := InitValue;
|
Result := InitValue;
|
||||||
|
Loading…
Reference in New Issue
Block a user