mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-16 15:59:35 +02:00
sax_xml.pp:
* Applied counterpart of sax_html.pp r15564, eliminating redundant wide-to-ansi conversions; * AStart parameter of IgnorableWhitespace event should be zero, not 1; * XML is case-sensitive, removed calls to lowercase(); * Accumulate token characters in FRawTokenText, then convert it all at once to SAXString. Without it, handling multi-byte encodings like UTF-8 was impossible, because it was converting by individual bytes which always resulted in errors. Provides a partial fix for Mantis #16732. Also provides a single location to insert a proper decoding procedure. git-svn-id: trunk@15738 -
This commit is contained in:
parent
f138637678
commit
1c7c97dc93
@ -40,6 +40,7 @@ type
|
|||||||
FEndOfStream: Boolean;
|
FEndOfStream: Boolean;
|
||||||
FScannerContext: TXMLScannerContext;
|
FScannerContext: TXMLScannerContext;
|
||||||
FTokenText: SAXString;
|
FTokenText: SAXString;
|
||||||
|
FRawTokenText: string;
|
||||||
FCurStringValueDelimiter: Char;
|
FCurStringValueDelimiter: Char;
|
||||||
FAttrNameRead: Boolean;
|
FAttrNameRead: Boolean;
|
||||||
protected
|
protected
|
||||||
@ -103,7 +104,9 @@ procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream);
|
|||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
uses htmldefs; // for entities...
|
uses
|
||||||
|
xmlutils,
|
||||||
|
htmldefs; // for entities...
|
||||||
|
|
||||||
const
|
const
|
||||||
WhitespaceChars = [#9, #10, #13, ' '];
|
WhitespaceChars = [#9, #10, #13, ' '];
|
||||||
@ -154,6 +157,7 @@ begin
|
|||||||
|
|
||||||
BufferPos := 0;
|
BufferPos := 0;
|
||||||
while (BufferPos < BufferSize) and not FStopFlag do
|
while (BufferPos < BufferSize) and not FStopFlag do
|
||||||
|
begin
|
||||||
case ScannerContext of
|
case ScannerContext of
|
||||||
scUnknown:
|
scUnknown:
|
||||||
case Buffer[BufferPos] of
|
case Buffer[BufferPos] of
|
||||||
@ -176,7 +180,7 @@ begin
|
|||||||
case Buffer[BufferPos] of
|
case Buffer[BufferPos] of
|
||||||
#9, #10, #13, ' ':
|
#9, #10, #13, ' ':
|
||||||
begin
|
begin
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
'&':
|
'&':
|
||||||
@ -190,7 +194,7 @@ begin
|
|||||||
EnterNewScannerContext(scTag);
|
EnterNewScannerContext(scTag);
|
||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
FScannerContext := scText
|
FScannerContext := scText;
|
||||||
end;
|
end;
|
||||||
scText:
|
scText:
|
||||||
case Buffer[BufferPos] of
|
case Buffer[BufferPos] of
|
||||||
@ -206,7 +210,7 @@ begin
|
|||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
@ -220,7 +224,7 @@ begin
|
|||||||
EnterNewScannerContext(scUnknown)
|
EnterNewScannerContext(scUnknown)
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
scTag:
|
scTag:
|
||||||
@ -237,13 +241,13 @@ begin
|
|||||||
FAttrNameRead := False;
|
FAttrNameRead := False;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
'=':
|
'=':
|
||||||
begin
|
begin
|
||||||
FAttrNameRead := True;
|
FAttrNameRead := True;
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
'>':
|
'>':
|
||||||
@ -254,39 +258,37 @@ begin
|
|||||||
end;
|
end;
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
FTokenText := FTokenText + Buffer[BufferPos];
|
FRawTokenText := FRawTokenText + Buffer[BufferPos];
|
||||||
Inc(BufferPos);
|
Inc(BufferPos);
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end; // case ScannerContext of
|
||||||
|
end; // while not endOfBuffer
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
function SplitTagString(const s: SAXString; var Attr: TSAXAttributes): SAXString;
|
||||||
|
|
||||||
function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
|
|
||||||
var
|
var
|
||||||
i, j: Integer;
|
i, j: Integer;
|
||||||
AttrName: String;
|
AttrName: SAXString;
|
||||||
ValueDelimiter: Char;
|
ValueDelimiter: WideChar;
|
||||||
DoIncJ: Boolean;
|
DoIncJ: Boolean;
|
||||||
begin
|
begin
|
||||||
Attr := nil;
|
Attr := nil;
|
||||||
i := 0;
|
i := 0;
|
||||||
repeat
|
repeat
|
||||||
Inc(i)
|
Inc(i)
|
||||||
until (i > Length(s)) or (s[i] in WhitespaceChars);
|
until (i > Length(s)) or IsXMLWhitespace(s[i]);
|
||||||
|
|
||||||
if i > Length(s) then
|
if i > Length(s) then
|
||||||
Result := LowerCase(s)
|
Result := s
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
Result := LowerCase(Copy(s, 1, i - 1));
|
Result := Copy(s, 1, i - 1);
|
||||||
Attr := TSAXAttributes.Create;
|
Attr := TSAXAttributes.Create;
|
||||||
|
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
while (i <= Length(s)) and (s[i] in WhitespaceChars) do
|
while (i <= Length(s)) and IsXMLWhitespace(s[i]) do
|
||||||
Inc(i);
|
Inc(i);
|
||||||
|
|
||||||
SetLength(AttrName, 0);
|
SetLength(AttrName, 0);
|
||||||
@ -295,7 +297,7 @@ procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|||||||
while j <= Length(s) do
|
while j <= Length(s) do
|
||||||
if s[j] = '=' then
|
if s[j] = '=' then
|
||||||
begin
|
begin
|
||||||
AttrName := LowerCase(Copy(s, i, j - i));
|
AttrName := Copy(s, i, j - i);
|
||||||
Inc(j);
|
Inc(j);
|
||||||
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
|
||||||
begin
|
begin
|
||||||
@ -307,7 +309,7 @@ procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|||||||
DoIncJ := False;
|
DoIncJ := False;
|
||||||
while j <= Length(s) do
|
while j <= Length(s) do
|
||||||
if ValueDelimiter = #0 then
|
if ValueDelimiter = #0 then
|
||||||
if s[j] in WhitespaceChars then
|
if IsXMLWhitespace(s[j]) then
|
||||||
break
|
break
|
||||||
else
|
else
|
||||||
Inc(j)
|
Inc(j)
|
||||||
@ -318,20 +320,22 @@ procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|||||||
end else
|
end else
|
||||||
Inc(j);
|
Inc(j);
|
||||||
|
|
||||||
|
if IsXMLName(AttrName) then
|
||||||
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
|
||||||
|
|
||||||
if DoIncJ then
|
if DoIncJ then
|
||||||
Inc(j);
|
Inc(j);
|
||||||
|
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
||||||
Inc(j);
|
Inc(j);
|
||||||
i := j;
|
i := j;
|
||||||
end
|
end
|
||||||
else if s[j] in WhitespaceChars then
|
else if IsXMLWhitespace(s[j]) then
|
||||||
begin
|
begin
|
||||||
|
if IsXMLName(@s[i], j-i) then
|
||||||
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
|
||||||
Inc(j);
|
Inc(j);
|
||||||
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
|
while (j <= Length(s)) and IsXMLWhitespace(s[j]) do
|
||||||
Inc(j);
|
Inc(j);
|
||||||
i := j;
|
i := j;
|
||||||
end else
|
end else
|
||||||
@ -339,14 +343,16 @@ procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TSAXXMLReader.EnterNewScannerContext(NewContext: TXMLScannerContext);
|
||||||
var
|
var
|
||||||
Attr: TSAXAttributes;
|
Attr: TSAXAttributes;
|
||||||
TagName: String;
|
TagName: SAXString;
|
||||||
Ent: SAXChar;
|
Ent: SAXChar;
|
||||||
begin
|
begin
|
||||||
|
FTokenText := FRawTokenText; // this is where conversion takes place
|
||||||
case ScannerContext of
|
case ScannerContext of
|
||||||
scWhitespace:
|
scWhitespace:
|
||||||
DoIgnorableWhitespace(PSAXChar(TokenText), 1, Length(TokenText));
|
DoIgnorableWhitespace(PSAXChar(TokenText), 0, Length(TokenText));
|
||||||
scText:
|
scText:
|
||||||
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
|
||||||
scEntityReference:
|
scEntityReference:
|
||||||
@ -397,7 +403,8 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
FScannerContext := NewContext;
|
FScannerContext := NewContext;
|
||||||
SetLength(FTokenText, 0);
|
FTokenText := '';
|
||||||
|
FRawTokenText := '';
|
||||||
FCurStringValueDelimiter := #0;
|
FCurStringValueDelimiter := #0;
|
||||||
FAttrNameRead := False;
|
FAttrNameRead := False;
|
||||||
end;
|
end;
|
||||||
|
Loading…
Reference in New Issue
Block a user