mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-12-17 00:20:46 +01:00
dom.pp: - Removed unneeded TDOMNode.FNodeName/FNodeValue fields to reduce memory requirements; - finished TDOMElement.GetElementsByTagName; - Implemented TDOMNode.TextContent property; - Implemented attribute value normalization; - Fixed TDOMNode.Normalize and TDOMNode.InsertBefore for fragments. names.inc: - rewritten in cross-platform way, added XML 1.1 data; xmlwrite.pp: - Added support for Unicode characters with code > 65535; - Added writing TDOMDocumentType nodes; - Fixed handling of end-of-lines contained in node content; - Code cleaned up, so this unit is almost complete :) git-svn-id: trunk@4775 -
627 lines
15 KiB
ObjectPascal
627 lines
15 KiB
ObjectPascal
{
|
||
This file is part of the Free Component Library
|
||
|
||
XML writing routines
|
||
Copyright (c) 1999-2000 by Sebastian Guenther, sg@freepascal.org
|
||
Modified in 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
|
||
|
||
See the file COPYING.FPC, included in this distribution,
|
||
for details about the copyright.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||
|
||
**********************************************************************}
|
||
|
||
|
||
unit XMLWrite;
|
||
|
||
{$ifdef fpc}
|
||
{$MODE objfpc}
|
||
{$H+}
|
||
{/$DEFINE HAS_INLINE}
|
||
{$INLINE OFF}
|
||
{$endif}
|
||
|
||
interface
|
||
|
||
uses Classes, DOM;
|
||
|
||
procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String); overload;
|
||
procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text); overload;
|
||
procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream); overload;
|
||
|
||
procedure WriteXML(Element: TDOMNode; const AFileName: String); overload;
|
||
procedure WriteXML(Element: TDOMNode; var AFile: Text); overload;
|
||
procedure WriteXML(Element: TDOMNode; AStream: TStream); overload;
|
||
|
||
|
||
// ===================================================================
|
||
|
||
implementation
|
||
|
||
uses SysUtils;
|
||
|
||
type
|
||
TCharacters = set of Char;
|
||
TSpecialCharCallback = procedure(c: WideChar) of object;
|
||
|
||
TXMLWriter = class(TObject)
|
||
private
|
||
FInsideTextNode: Boolean;
|
||
FIndent: WideString;
|
||
FIndentCount: Integer;
|
||
FBuffer: PChar;
|
||
FBufPos: PChar;
|
||
FCapacity: Integer;
|
||
FLineBreak: string;
|
||
procedure wrtChars(Src: PWideChar; Length: Integer);
|
||
procedure IncIndent;
|
||
procedure DecIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
|
||
procedure wrtStr(const ws: WideString); {$IFDEF HAS_INLINE} inline; {$ENDIF}
|
||
procedure wrtChr(c: WideChar); {$IFDEF HAS_INLINE} inline; {$ENDIF}
|
||
procedure wrtLineEnd; {$IFDEF HAS_INLINE} inline; {$ENDIF}
|
||
procedure wrtIndent; {$IFDEF HAS_INLINE} inline; {$ENDIF}
|
||
procedure wrtQuotedLiteral(const ws: WideString);
|
||
procedure ConvWrite(const s: WideString; const SpecialChars: TCharacters;
|
||
const SpecialCharCallback: TSpecialCharCallback);
|
||
procedure AttrSpecialCharCallback(c: WideChar);
|
||
procedure TextNodeSpecialCharCallback(c: WideChar);
|
||
protected
|
||
procedure Write(const Buffer; Count: Longint); virtual; abstract;
|
||
procedure WriteNode(Node: TDOMNode);
|
||
procedure VisitDocument(Node: TDOMNode);
|
||
procedure VisitElement(Node: TDOMNode);
|
||
procedure VisitText(Node: TDOMNode);
|
||
procedure VisitCDATA(Node: TDOMNode);
|
||
procedure VisitComment(Node: TDOMNode);
|
||
procedure VisitFragment(Node: TDOMNode);
|
||
procedure VisitAttribute(Node: TDOMNode);
|
||
procedure VisitEntityRef(Node: TDOMNode);
|
||
procedure VisitDocumentType(Node: TDOMNode);
|
||
procedure VisitPI(Node: TDOMNode);
|
||
public
|
||
constructor Create;
|
||
destructor Destroy; override;
|
||
end;
|
||
|
||
TTextXMLWriter = Class(TXMLWriter)
|
||
Private
|
||
F : ^Text;
|
||
Protected
|
||
Procedure Write(Const Buffer; Count : Longint);override;
|
||
Public
|
||
constructor Create(var AFile: Text);
|
||
end;
|
||
|
||
TStreamXMLWriter = Class(TXMLWriter)
|
||
Private
|
||
F : TStream;
|
||
Protected
|
||
Procedure Write(Const Buffer; Count : Longint);override;
|
||
Public
|
||
constructor Create(AStream: TStream);
|
||
end;
|
||
|
||
{ ---------------------------------------------------------------------
|
||
TTextXMLWriter
|
||
---------------------------------------------------------------------}
|
||
|
||
|
||
constructor TTextXMLWriter.Create(var AFile: Text);
|
||
begin
|
||
inherited Create;
|
||
f := @AFile;
|
||
end;
|
||
|
||
procedure TTextXMLWriter.Write(const Buffer; Count: Longint);
|
||
var
|
||
s: string;
|
||
begin
|
||
if Count>0 then
|
||
begin
|
||
SetString(s, PChar(@Buffer), Count);
|
||
system.Write(f^, s);
|
||
end;
|
||
end;
|
||
|
||
{ ---------------------------------------------------------------------
|
||
TStreamXMLWriter
|
||
---------------------------------------------------------------------}
|
||
|
||
constructor TStreamXMLWriter.Create(AStream: TStream);
|
||
begin
|
||
inherited Create;
|
||
F := AStream;
|
||
end;
|
||
|
||
|
||
procedure TStreamXMLWriter.Write(const Buffer; Count: Longint);
|
||
begin
|
||
if Count > 0 then
|
||
F.Write(Buffer, Count);
|
||
end;
|
||
|
||
|
||
{ ---------------------------------------------------------------------
|
||
TXMLWriter
|
||
---------------------------------------------------------------------}
|
||
|
||
constructor TXMLWriter.Create;
|
||
var
|
||
I: Integer;
|
||
begin
|
||
inherited Create;
|
||
// some overhead - always be able to write at least one extra UCS4
|
||
FBuffer := AllocMem(512+32);
|
||
FBufPos := FBuffer;
|
||
FCapacity := 512;
|
||
// Initialize Indent string
|
||
SetLength(FIndent, 100);
|
||
for I := 1 to 100 do FIndent[I] := ' ';
|
||
FIndentCount := 0;
|
||
// Later on, this may be put under user control
|
||
// for now, take OS setting
|
||
FLineBreak := sLineBreak;
|
||
end;
|
||
|
||
destructor TXMLWriter.Destroy;
|
||
begin
|
||
if FBufPos > FBuffer then
|
||
write(FBuffer^, FBufPos-FBuffer);
|
||
|
||
FreeMem(FBuffer);
|
||
inherited Destroy;
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtChars(Src: PWideChar; Length: Integer);
|
||
var
|
||
pb: PChar;
|
||
wc: Cardinal;
|
||
SrcEnd: PWideChar;
|
||
I: Integer;
|
||
begin
|
||
pb := FBufPos;
|
||
SrcEnd := Src + Length;
|
||
while Src < SrcEnd do
|
||
begin
|
||
if pb >= @FBuffer[FCapacity] then
|
||
begin
|
||
write(FBuffer^, FCapacity);
|
||
Dec(pb, FCapacity);
|
||
if pb > FBuffer then
|
||
Move(FBuffer[FCapacity], FBuffer^, pb - FBuffer);
|
||
end;
|
||
|
||
wc := Cardinal(Src^); Inc(Src);
|
||
case wc of
|
||
$0A: for I := 1 to System.Length(FLineBreak) do
|
||
begin
|
||
pb^ := FLineBreak[I]; Inc(pb);
|
||
end;
|
||
|
||
0..$09, $0B..$7F: begin
|
||
pb^ := char(wc); Inc(pb);
|
||
end;
|
||
|
||
$80..$7FF: begin
|
||
pb^ := Char($C0 or (wc shr 6)); Inc(pb);
|
||
pb^ := Char($80 or (wc and $3F)); Inc(pb);
|
||
end;
|
||
|
||
$D800..$DBFF: begin
|
||
if (Src < SrcEnd) and (Src^ >= #$DC00) and (Src^ <= #$DFFF) then
|
||
begin
|
||
wc := ((wc - $D7C0) shl 10) + (word(Src^) xor $DC00);
|
||
Inc(Src);
|
||
|
||
pb^ := Char($F0 or (wc shr 18)); Inc(pb);
|
||
pb^ := Char($80 or ((wc shr 12) and $3F)); Inc(pb);
|
||
pb^ := Char($80 or ((wc shr 6) and $3F)); Inc(pb);
|
||
pb^ := Char($80 or (wc and $3F)); Inc(pb);
|
||
end
|
||
else
|
||
raise EConvertError.Create('High surrogate without low one');
|
||
end;
|
||
$DC00..$DFFF:
|
||
raise EConvertError.Create('Low surrogate without high one');
|
||
else // $800 >= wc > $FFFF, excluding surrogates
|
||
begin
|
||
pb^ := Char($E0 or (wc shr 12)); Inc(pb);
|
||
pb^ := Char($80 or ((wc shr 6) and $3F)); Inc(pb);
|
||
pb^ := Char($80 or (wc and $3F)); Inc(pb);
|
||
end;
|
||
end;
|
||
end;
|
||
FBufPos := pb;
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtStr(const ws: WideString); { inline }
|
||
begin
|
||
wrtChars(PWideChar(ws), Length(ws));
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtChr(c: WideChar); { inline }
|
||
begin
|
||
wrtChars(@c,1);
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtLineEnd; { inline }
|
||
begin
|
||
// line endings now handled in WrtStr!
|
||
wrtChr(#10);
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtIndent; { inline }
|
||
begin
|
||
wrtChars(PWideChar(FIndent), FIndentCount*2);
|
||
end;
|
||
|
||
procedure TXMLWriter.IncIndent;
|
||
var
|
||
I, NewLen, OldLen: Integer;
|
||
begin
|
||
Inc(FIndentCount);
|
||
if Length(FIndent) < 2 * FIndentCount then
|
||
begin
|
||
OldLen := Length(FIndent);
|
||
NewLen := 4 * FIndentCount;
|
||
SetLength(FIndent, NewLen);
|
||
for I := OldLen to NewLen do
|
||
FIndent[I] := ' ';
|
||
end;
|
||
end;
|
||
|
||
procedure TXMLWriter.DecIndent; { inline }
|
||
begin
|
||
if FIndentCount>0 then dec(FIndentCount);
|
||
end;
|
||
|
||
procedure TXMLWriter.wrtQuotedLiteral(const ws: WideString);
|
||
var
|
||
Quote: WideChar;
|
||
begin
|
||
// TODO: need to check if the string also contains single quote
|
||
// both quotes present is a error
|
||
if Pos('"', ws) > 0 then
|
||
Quote := ''''
|
||
else
|
||
Quote := '"';
|
||
wrtChr(Quote);
|
||
wrtStr(ws);
|
||
wrtChr(Quote);
|
||
end;
|
||
|
||
const
|
||
AttrSpecialChars = ['<', '"', '&', #9, #10, #13];
|
||
TextSpecialChars = ['<', '>', '&'];
|
||
|
||
procedure TXMLWriter.ConvWrite(const s: WideString; const SpecialChars: TCharacters;
|
||
const SpecialCharCallback: TSpecialCharCallback);
|
||
var
|
||
StartPos, EndPos: Integer;
|
||
begin
|
||
StartPos := 1;
|
||
EndPos := 1;
|
||
while EndPos <= Length(s) do
|
||
begin
|
||
if (s[EndPos] < #255) and (Char(s[EndPos]) in SpecialChars) then
|
||
begin
|
||
wrtChars(@s[StartPos], EndPos - StartPos);
|
||
SpecialCharCallback(s[EndPos]);
|
||
StartPos := EndPos + 1;
|
||
end;
|
||
Inc(EndPos);
|
||
end;
|
||
if StartPos <= length(s) then
|
||
wrtChars(@s[StartPos], EndPos - StartPos);
|
||
end;
|
||
|
||
const
|
||
QuotStr = '"';
|
||
AmpStr = '&';
|
||
ltStr = '<';
|
||
gtStr = '>';
|
||
|
||
procedure TXMLWriter.AttrSpecialCharCallback(c: WideChar);
|
||
begin
|
||
case c of
|
||
'"': wrtStr(QuotStr);
|
||
'&': wrtStr(AmpStr);
|
||
'<': wrtStr(ltStr);
|
||
// Escape whitespace using CharRefs to be consistent with W3 spec <20> 3.3.3
|
||
#9: wrtStr('	');
|
||
#10: wrtStr('
');
|
||
#13: wrtStr('
');
|
||
else
|
||
wrtChr(c);
|
||
end;
|
||
end;
|
||
|
||
procedure TXMLWriter.TextnodeSpecialCharCallback(c: WideChar);
|
||
begin
|
||
case c of
|
||
'<': wrtStr(ltStr);
|
||
'>': wrtStr(gtStr); // Required only in ']]>' literal, otherwise optional
|
||
'&': wrtStr(AmpStr);
|
||
else
|
||
wrtChr(c);
|
||
end;
|
||
end;
|
||
|
||
procedure TXMLWriter.WriteNode(node: TDOMNode);
|
||
begin
|
||
case node.NodeType of
|
||
ELEMENT_NODE: VisitElement(node);
|
||
ATTRIBUTE_NODE: VisitAttribute(node);
|
||
TEXT_NODE: VisitText(node);
|
||
CDATA_SECTION_NODE: VisitCDATA(node);
|
||
ENTITY_REFERENCE_NODE: VisitEntityRef(node);
|
||
PROCESSING_INSTRUCTION_NODE: VisitPI(node);
|
||
COMMENT_NODE: VisitComment(node);
|
||
DOCUMENT_NODE: VisitDocument(node);
|
||
DOCUMENT_TYPE_NODE: VisitDocumentType(node);
|
||
DOCUMENT_FRAGMENT_NODE: VisitFragment(node);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure TXMLWriter.VisitElement(node: TDOMNode);
|
||
var
|
||
i: Integer;
|
||
attr, child: TDOMNode;
|
||
SavedInsideTextNode: Boolean;
|
||
IsLeaf: Boolean;
|
||
MixedContent: Boolean;
|
||
begin
|
||
if not FInsideTextNode then
|
||
wrtIndent;
|
||
wrtChr('<');
|
||
wrtStr(node.NodeName);
|
||
// FIX: Accessing Attributes was causing them to be created for every element :(
|
||
if node.HasAttributes then
|
||
for i := 0 to node.Attributes.Length - 1 do
|
||
begin
|
||
attr := node.Attributes.Item[i];
|
||
VisitAttribute(attr);
|
||
end;
|
||
Child := node.FirstChild;
|
||
if Child = nil then
|
||
wrtStr('/>')
|
||
else
|
||
begin
|
||
SavedInsideTextNode := FInsideTextNode;
|
||
wrtChr('>');
|
||
MixedContent := False;
|
||
repeat
|
||
if Assigned(Child.PreviousSibling) and
|
||
(Child.PreviousSibling.InheritsFrom(TDOMText) <> Child.InheritsFrom(TDOMText)) then
|
||
MixedContent := True;
|
||
Child := Child.NextSibling;
|
||
until Child = nil;
|
||
Child := node.FirstChild; // restore
|
||
|
||
IsLeaf := (Child = node.LastChild) and (Child.FirstChild = nil);
|
||
if not (FInsideTextNode or MixedContent or IsLeaf) then
|
||
wrtLineEnd;
|
||
|
||
FInsideTextNode := {FInsideTextNode or} MixedContent or IsLeaf;
|
||
IncIndent;
|
||
repeat
|
||
WriteNode(Child);
|
||
Child := Child.NextSibling;
|
||
until Child = nil;
|
||
DecIndent;
|
||
if not FInsideTextNode then
|
||
wrtIndent;
|
||
FInsideTextNode := SavedInsideTextNode;
|
||
wrtStr('</');
|
||
wrtStr(Node.NodeName);
|
||
wrtChr('>');
|
||
end;
|
||
if not FInsideTextNode then
|
||
wrtLineEnd;
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitText(node: TDOMNode);
|
||
begin
|
||
ConvWrite(node.NodeValue, TextSpecialChars, {$IFDEF FPC}@{$ENDIF}TextnodeSpecialCharCallback);
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitCDATA(node: TDOMNode);
|
||
begin
|
||
if not FInsideTextNode then
|
||
wrtIndent;
|
||
wrtStr('<![CDATA[');
|
||
wrtStr(node.NodeValue);
|
||
wrtStr(']]>');
|
||
if not FInsideTextNode then
|
||
wrtLineEnd;
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitEntityRef(node: TDOMNode);
|
||
begin
|
||
wrtChr('&');
|
||
wrtStr(node.NodeName);
|
||
wrtChr(';');
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitPI(node: TDOMNode);
|
||
begin
|
||
if not FInsideTextNode then wrtIndent;
|
||
wrtStr('<?');
|
||
wrtStr(TDOMProcessingInstruction(node).Target);
|
||
wrtChr(' ');
|
||
wrtStr(TDOMProcessingInstruction(node).Data);
|
||
wrtStr('?>');
|
||
if not FInsideTextNode then wrtLineEnd;
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitComment(node: TDOMNode);
|
||
begin
|
||
if not FInsideTextNode then wrtIndent;
|
||
wrtStr('<!--');
|
||
wrtStr(node.NodeValue);
|
||
wrtStr('-->');
|
||
if not FInsideTextNode then wrtLineEnd;
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitDocument(node: TDOMNode);
|
||
var
|
||
child: TDOMNode;
|
||
begin
|
||
wrtStr('<?xml version="');
|
||
// Definitely should not escape anything here
|
||
if Length(TXMLDocument(node).XMLVersion) > 0 then
|
||
wrtStr(TXMLDocument(node).XMLVersion)
|
||
else
|
||
wrtStr('1.0');
|
||
wrtChr('"');
|
||
|
||
// DISABLED - we are only able write in UTF-8 which does not require labeling
|
||
// writing incorrect encoding will render xml unreadable...
|
||
(*
|
||
if Length(TXMLDocument(node).Encoding) > 0 then
|
||
begin
|
||
wrtStr(' encoding="');
|
||
wrtStr(TXMLDocument(node).Encoding);
|
||
wrtChr('"');
|
||
end;
|
||
*)
|
||
wrtStr('?>'#10);
|
||
|
||
// TODO: now handled as a regular PI, remove this?
|
||
if Length(TXMLDocument(node).StylesheetType) > 0 then
|
||
begin
|
||
wrtStr('<?xml-stylesheet type="');
|
||
wrtStr(TXMLDocument(node).StylesheetType);
|
||
wrtStr('" href="');
|
||
wrtStr(TXMLDocument(node).StylesheetHRef);
|
||
wrtStr('"?>'#10);
|
||
end;
|
||
|
||
child := node.FirstChild;
|
||
while Assigned(Child) do
|
||
begin
|
||
WriteNode(Child);
|
||
Child := Child.NextSibling;
|
||
end;
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitAttribute(Node: TDOMNode);
|
||
var
|
||
Child: TDOMNode;
|
||
begin
|
||
wrtChr(' ');
|
||
wrtStr(Node.NodeName);
|
||
wrtStr('="');
|
||
Child := Node.FirstChild;
|
||
while Assigned(Child) do
|
||
begin
|
||
if Child.NodeType = ENTITY_REFERENCE_NODE then
|
||
VisitEntityRef(Child)
|
||
else
|
||
ConvWrite(Child.NodeValue, AttrSpecialChars, {$IFDEF FPC}@{$ENDIF}AttrSpecialCharCallback);
|
||
Child := Child.NextSibling;
|
||
end;
|
||
wrtChr('"');
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitDocumentType(Node: TDOMNode);
|
||
begin
|
||
wrtStr('<!DOCTYPE ');
|
||
wrtStr(Node.NodeName);
|
||
with TDOMDocumentType(Node) do
|
||
begin
|
||
if PublicID <> '' then
|
||
begin
|
||
wrtStr(' PUBLIC ');
|
||
wrtQuotedLiteral(PublicID);
|
||
wrtChr(' ');
|
||
wrtQuotedLiteral(SystemID);
|
||
end
|
||
else if SystemID <> '' then
|
||
begin
|
||
wrtStr(' SYSTEM ');
|
||
wrtQuotedLiteral(SystemID);
|
||
end;
|
||
if InternalSubset <> '' then
|
||
begin
|
||
wrtChr('[');
|
||
wrtStr(InternalSubset);
|
||
wrtChr(']');
|
||
end;
|
||
end;
|
||
wrtStr('>'#10);
|
||
end;
|
||
|
||
procedure TXMLWriter.VisitFragment(Node: TDOMNode);
|
||
var
|
||
Child: TDOMNode;
|
||
begin
|
||
// Fragment itself should not be written, only its children should...
|
||
Child := Node.FirstChild;
|
||
while Assigned(Child) do
|
||
begin
|
||
WriteNode(Child);
|
||
Child := Child.NextSibling;
|
||
end;
|
||
end;
|
||
|
||
|
||
// -------------------------------------------------------------------
|
||
// Interface implementation
|
||
// -------------------------------------------------------------------
|
||
|
||
procedure WriteXMLFile(doc: TXMLDocument; const AFileName: String);
|
||
var
|
||
fs: TFileStream;
|
||
begin
|
||
fs := TFileStream.Create(AFileName, fmCreate);
|
||
try
|
||
WriteXMLFile(doc, fs);
|
||
finally
|
||
fs.Free;
|
||
end;
|
||
end;
|
||
|
||
procedure WriteXMLFile(doc: TXMLDocument; var AFile: Text);
|
||
begin
|
||
with TTextXMLWriter.Create(AFile) do
|
||
try
|
||
WriteNode(doc);
|
||
finally
|
||
Free;
|
||
end;
|
||
end;
|
||
|
||
procedure WriteXMLFile(doc: TXMLDocument; AStream: TStream);
|
||
begin
|
||
with TStreamXMLWriter.Create(AStream) do
|
||
try
|
||
WriteNode(doc);
|
||
finally
|
||
Free;
|
||
end;
|
||
end;
|
||
|
||
procedure WriteXML(Element: TDOMNode; const AFileName: String);
|
||
begin
|
||
WriteXMLFile(TXMLDocument(Element), AFileName);
|
||
end;
|
||
|
||
procedure WriteXML(Element: TDOMNode; var AFile: Text);
|
||
begin
|
||
WriteXMLFile(TXMLDocument(Element), AFile);
|
||
end;
|
||
|
||
procedure WriteXML(Element: TDOMNode; AStream: TStream);
|
||
begin
|
||
WriteXMLFile(TXMLDocument(Element), AStream);
|
||
end;
|
||
|
||
|
||
|
||
end.
|