mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-16 05:09:17 +02:00
XML writer:
* Moved line ending processing from the encoder to a higher level; without this, implementing/using external encoders is very problematic. + Implemented line ending processing for c14n mode. git-svn-id: trunk@14194 -
This commit is contained in:
parent
966bcbec9f
commit
0589b2a846
@ -59,7 +59,7 @@ type
|
|||||||
FBuffer: PChar;
|
FBuffer: PChar;
|
||||||
FBufPos: PChar;
|
FBufPos: PChar;
|
||||||
FCapacity: Integer;
|
FCapacity: Integer;
|
||||||
FLineBreak: string;
|
FLineBreak: WideString;
|
||||||
FNSHelper: TNSSupport;
|
FNSHelper: TNSSupport;
|
||||||
FAttrFixups: TFPList;
|
FAttrFixups: TFPList;
|
||||||
FScratch: TFPList;
|
FScratch: TFPList;
|
||||||
@ -156,6 +156,16 @@ end;
|
|||||||
TXMLWriter
|
TXMLWriter
|
||||||
---------------------------------------------------------------------}
|
---------------------------------------------------------------------}
|
||||||
|
|
||||||
|
const
|
||||||
|
AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
|
||||||
|
TextSpecialChars = ['<', '>', '&', #10, #13];
|
||||||
|
CDSectSpecialChars = [']'];
|
||||||
|
LineEndingChars = [#13, #10];
|
||||||
|
QuotStr = '"';
|
||||||
|
AmpStr = '&';
|
||||||
|
ltStr = '<';
|
||||||
|
gtStr = '>';
|
||||||
|
|
||||||
constructor TXMLWriter.Create;
|
constructor TXMLWriter.Create;
|
||||||
var
|
var
|
||||||
I: Integer;
|
I: Integer;
|
||||||
@ -165,14 +175,22 @@ begin
|
|||||||
FBuffer := AllocMem(512+32);
|
FBuffer := AllocMem(512+32);
|
||||||
FBufPos := FBuffer;
|
FBufPos := FBuffer;
|
||||||
FCapacity := 512;
|
FCapacity := 512;
|
||||||
// Initialize Indent string
|
|
||||||
SetLength(FIndent, 100);
|
|
||||||
FIndent[1] := #10;
|
|
||||||
for I := 2 to 100 do FIndent[I] := ' ';
|
|
||||||
FIndentCount := 0;
|
|
||||||
// Later on, this may be put under user control
|
// Later on, this may be put under user control
|
||||||
// for now, take OS setting
|
// for now, take OS setting
|
||||||
FLineBreak := sLineBreak;
|
if FCanonical then
|
||||||
|
FLineBreak := #10
|
||||||
|
else
|
||||||
|
FLineBreak := sLineBreak;
|
||||||
|
// Initialize Indent string
|
||||||
|
// TODO: this must be done in setter of FLineBreak
|
||||||
|
SetLength(FIndent, 100);
|
||||||
|
FIndent[1] := FLineBreak[1];
|
||||||
|
if Length(FLineBreak) > 1 then
|
||||||
|
FIndent[2] := FLineBreak[2]
|
||||||
|
else
|
||||||
|
FIndent[2] := ' ';
|
||||||
|
for I := 3 to 100 do FIndent[I] := ' ';
|
||||||
|
FIndentCount := 0;
|
||||||
FNSHelper := TNSSupport.Create;
|
FNSHelper := TNSSupport.Create;
|
||||||
FScratch := TFPList.Create;
|
FScratch := TFPList.Create;
|
||||||
FNSDefs := TFPList.Create;
|
FNSDefs := TFPList.Create;
|
||||||
@ -216,14 +234,7 @@ begin
|
|||||||
|
|
||||||
wc := Cardinal(Src^); Inc(Src);
|
wc := Cardinal(Src^); Inc(Src);
|
||||||
case wc of
|
case wc of
|
||||||
$0A: pb := StrECopy(pb, PChar(FLineBreak));
|
0..$7F: begin
|
||||||
$0D: begin
|
|
||||||
pb := StrECopy(pb, PChar(FLineBreak));
|
|
||||||
if (Src < SrcEnd) and (Src^ = #$0A) then
|
|
||||||
Inc(Src);
|
|
||||||
end;
|
|
||||||
|
|
||||||
0..$09, $0B, $0C, $0E..$7F: begin
|
|
||||||
pb^ := char(wc); Inc(pb);
|
pb^ := char(wc); Inc(pb);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -276,7 +287,7 @@ end;
|
|||||||
|
|
||||||
procedure TXMLWriter.wrtIndent; { inline }
|
procedure TXMLWriter.wrtIndent; { inline }
|
||||||
begin
|
begin
|
||||||
wrtChars(PWideChar(FIndent), FIndentCount*2+1);
|
wrtChars(PWideChar(FIndent), FIndentCount*2+Length(FLineBreak));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLWriter.IncIndent;
|
procedure TXMLWriter.IncIndent;
|
||||||
@ -299,26 +310,6 @@ begin
|
|||||||
if FIndentCount>0 then dec(FIndentCount);
|
if FIndentCount>0 then dec(FIndentCount);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
|
|
||||||
var
|
|
||||||
Quote: WideChar;
|
|
||||||
begin
|
|
||||||
// TODO: need to check if the string also contains single quote
|
|
||||||
// both quotes present is a error
|
|
||||||
if Pos('"', ws) > 0 then
|
|
||||||
Quote := ''''
|
|
||||||
else
|
|
||||||
Quote := '"';
|
|
||||||
wrtChr(Quote);
|
|
||||||
wrtStr(ws);
|
|
||||||
wrtChr(Quote);
|
|
||||||
end;
|
|
||||||
|
|
||||||
const
|
|
||||||
AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
|
|
||||||
TextSpecialChars = ['<', '>', '&'];
|
|
||||||
CDSectSpecialChars = [']'];
|
|
||||||
|
|
||||||
procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
|
procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TSetOfChar;
|
||||||
const SpecialCharCallback: TSpecialCharCallback);
|
const SpecialCharCallback: TSpecialCharCallback);
|
||||||
var
|
var
|
||||||
@ -328,7 +319,7 @@ begin
|
|||||||
EndPos := 1;
|
EndPos := 1;
|
||||||
while EndPos <= Length(s) do
|
while EndPos <= Length(s) do
|
||||||
begin
|
begin
|
||||||
if (s[EndPos] < #255) and (Char(ord(s[EndPos])) in SpecialChars) then
|
if (s[EndPos] < 'A') and (Char(ord(s[EndPos])) in SpecialChars) then
|
||||||
begin
|
begin
|
||||||
wrtChars(@s[StartPos], EndPos - StartPos);
|
wrtChars(@s[StartPos], EndPos - StartPos);
|
||||||
SpecialCharCallback(Self, s, EndPos);
|
SpecialCharCallback(Self, s, EndPos);
|
||||||
@ -340,12 +331,6 @@ begin
|
|||||||
wrtChars(@s[StartPos], EndPos - StartPos);
|
wrtChars(@s[StartPos], EndPos - StartPos);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
const
|
|
||||||
QuotStr = '"';
|
|
||||||
AmpStr = '&';
|
|
||||||
ltStr = '<';
|
|
||||||
gtStr = '>';
|
|
||||||
|
|
||||||
procedure AttrSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
procedure AttrSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
||||||
var idx: Integer);
|
var idx: Integer);
|
||||||
begin
|
begin
|
||||||
@ -362,13 +347,35 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TextnodeSpecialCharCallback(Sender: TXMLWriter; const s: DOMString;
|
procedure TextnodeNormalCallback(Sender: TXMLWriter; const s: DOMString;
|
||||||
var idx: Integer);
|
var idx: Integer);
|
||||||
begin
|
begin
|
||||||
case s[idx] of
|
case s[idx] of
|
||||||
'<': Sender.wrtStr(ltStr);
|
'<': Sender.wrtStr(ltStr);
|
||||||
'>': Sender.wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
|
'>': Sender.wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
|
||||||
'&': Sender.wrtStr(AmpStr);
|
'&': Sender.wrtStr(AmpStr);
|
||||||
|
#13:
|
||||||
|
begin
|
||||||
|
// We normalize #13#10 and #13 to FLineBreak, going somewhat
|
||||||
|
// beyond the specs here, see issue #13879.
|
||||||
|
Sender.wrtStr(Sender.FLineBreak);
|
||||||
|
if (idx < Length(s)) and (s[idx+1] = #10) then
|
||||||
|
Inc(idx);
|
||||||
|
end;
|
||||||
|
#10: Sender.wrtStr(Sender.FLineBreak);
|
||||||
|
else
|
||||||
|
Sender.wrtChr(s[idx]);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
procedure TextnodeCanonicalCallback(Sender: TXMLWriter; const s: DOMString;
|
||||||
|
var idx: Integer);
|
||||||
|
begin
|
||||||
|
case s[idx] of
|
||||||
|
'<': Sender.wrtStr(ltStr);
|
||||||
|
'>': Sender.wrtStr(gtStr);
|
||||||
|
'&': Sender.wrtStr(AmpStr);
|
||||||
|
#13: Sender.wrtStr('
')
|
||||||
else
|
else
|
||||||
Sender.wrtChr(s[idx]);
|
Sender.wrtChr(s[idx]);
|
||||||
end;
|
end;
|
||||||
@ -387,6 +394,27 @@ begin
|
|||||||
Sender.wrtChr(s[idx]);
|
Sender.wrtChr(s[idx]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
const
|
||||||
|
TextnodeCallbacks: array[boolean] of TSpecialCharCallback = (
|
||||||
|
@TextnodeNormalCallback,
|
||||||
|
@TextnodeCanonicalCallback
|
||||||
|
);
|
||||||
|
|
||||||
|
procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
|
||||||
|
var
|
||||||
|
Quote: WideChar;
|
||||||
|
begin
|
||||||
|
// TODO: need to check if the string also contains single quote
|
||||||
|
// both quotes present is a error
|
||||||
|
if Pos('"', ws) > 0 then
|
||||||
|
Quote := ''''
|
||||||
|
else
|
||||||
|
Quote := '"';
|
||||||
|
wrtChr(Quote);
|
||||||
|
ConvWrite(ws, LineEndingChars, @TextnodeNormalCallback);
|
||||||
|
wrtChr(Quote);
|
||||||
|
end;
|
||||||
|
|
||||||
procedure TXMLWriter.WriteNode(node: TDOMNode);
|
procedure TXMLWriter.WriteNode(node: TDOMNode);
|
||||||
begin
|
begin
|
||||||
case node.NodeType of
|
case node.NodeType of
|
||||||
@ -605,7 +633,7 @@ end;
|
|||||||
|
|
||||||
procedure TXMLWriter.VisitText(node: TDOMNode);
|
procedure TXMLWriter.VisitText(node: TDOMNode);
|
||||||
begin
|
begin
|
||||||
ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeSpecialCharCallback);
|
ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, TextnodeCallbacks[FCanonical]);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLWriter.VisitCDATA(node: TDOMNode);
|
procedure TXMLWriter.VisitCDATA(node: TDOMNode);
|
||||||
@ -613,7 +641,7 @@ begin
|
|||||||
if not FInsideTextNode then
|
if not FInsideTextNode then
|
||||||
wrtIndent;
|
wrtIndent;
|
||||||
if FCanonical then
|
if FCanonical then
|
||||||
ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeSpecialCharCallback)
|
ConvWrite(TDOMCharacterData(node).Data, TextSpecialChars, @TextnodeCanonicalCallback)
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
wrtChars('<![CDATA[', 9);
|
wrtChars('<![CDATA[', 9);
|
||||||
@ -637,7 +665,8 @@ begin
|
|||||||
if TDOMProcessingInstruction(node).Data <> '' then
|
if TDOMProcessingInstruction(node).Data <> '' then
|
||||||
begin
|
begin
|
||||||
wrtChr(' ');
|
wrtChr(' ');
|
||||||
wrtStr(TDOMProcessingInstruction(node).Data);
|
// TODO: How does this comply with c14n??
|
||||||
|
ConvWrite(TDOMProcessingInstruction(node).Data, LineEndingChars, @TextnodeNormalCallback);
|
||||||
end;
|
end;
|
||||||
wrtStr('?>');
|
wrtStr('?>');
|
||||||
end;
|
end;
|
||||||
@ -646,7 +675,8 @@ procedure TXMLWriter.VisitComment(node: TDOMNode);
|
|||||||
begin
|
begin
|
||||||
if not FInsideTextNode then wrtIndent;
|
if not FInsideTextNode then wrtIndent;
|
||||||
wrtChars('<!--', 4);
|
wrtChars('<!--', 4);
|
||||||
wrtStr(TDOMCharacterData(node).Data);
|
// TODO: How does this comply with c14n??
|
||||||
|
ConvWrite(TDOMCharacterData(node).Data, LineEndingChars, @TextnodeNormalCallback);
|
||||||
wrtChars('-->', 3);
|
wrtChars('-->', 3);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -677,7 +707,8 @@ begin
|
|||||||
// TODO: now handled as a regular PI, remove this?
|
// TODO: now handled as a regular PI, remove this?
|
||||||
if Length(TXMLDocument(node).StylesheetType) > 0 then
|
if Length(TXMLDocument(node).StylesheetType) > 0 then
|
||||||
begin
|
begin
|
||||||
wrtStr(#10'<?xml-stylesheet type="');
|
wrtStr(FLineBreak);
|
||||||
|
wrtStr('<?xml-stylesheet type="');
|
||||||
wrtStr(TXMLDocument(node).StylesheetType);
|
wrtStr(TXMLDocument(node).StylesheetType);
|
||||||
wrtStr('" href="');
|
wrtStr('" href="');
|
||||||
wrtStr(TXMLDocument(node).StylesheetHRef);
|
wrtStr(TXMLDocument(node).StylesheetHRef);
|
||||||
@ -690,7 +721,7 @@ begin
|
|||||||
WriteNode(Child);
|
WriteNode(Child);
|
||||||
Child := Child.NextSibling;
|
Child := Child.NextSibling;
|
||||||
end;
|
end;
|
||||||
wrtChars(#10, 1);
|
wrtStr(FLineBreak);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLWriter.VisitDocument_Canonical(Node: TDOMNode);
|
procedure TXMLWriter.VisitDocument_Canonical(Node: TDOMNode);
|
||||||
@ -746,7 +777,8 @@ end;
|
|||||||
|
|
||||||
procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
|
procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
|
||||||
begin
|
begin
|
||||||
wrtStr(#10'<!DOCTYPE ');
|
wrtStr(FLineBreak);
|
||||||
|
wrtStr('<!DOCTYPE ');
|
||||||
wrtStr(Node.NodeName);
|
wrtStr(Node.NodeName);
|
||||||
wrtChr(' ');
|
wrtChr(' ');
|
||||||
with TDOMDocumentType(Node) do
|
with TDOMDocumentType(Node) do
|
||||||
@ -766,7 +798,7 @@ begin
|
|||||||
if InternalSubset <> '' then
|
if InternalSubset <> '' then
|
||||||
begin
|
begin
|
||||||
wrtChr('[');
|
wrtChr('[');
|
||||||
wrtStr(InternalSubset);
|
ConvWrite(InternalSubset, LineEndingChars, @TextnodeNormalCallback);
|
||||||
wrtChr(']');
|
wrtChr(']');
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
Loading…
Reference in New Issue
Block a user