* Small cleanups

* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser
This commit is contained in:
sg 2002-12-11 21:06:07 +00:00
parent 84243edae7
commit d2e9518fbe
6 changed files with 2435 additions and 8 deletions

View File

@ -213,9 +213,9 @@ UNITSDIR:=$(wildcard $(FPCDIR)/units/$(OS_TARGET))
endif
PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages/base $(FPCDIR)/packages/extra)
override PACKAGE_NAME=fcl
override TARGET_UNITS+=dom htmldoc xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
override TARGET_UNITS+=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
override INSTALL_FPCPACKAGE=y
override COMPILER_OPTIONS+=-S2
override COMPILER_OPTIONS+=-S2h
override COMPILER_TARGETDIR+=../$(OS_TARGET)
ifdef REQUIRE_UNITSDIR
override UNITSDIR+=$(REQUIRE_UNITSDIR)

View File

@ -6,10 +6,10 @@
main=fcl
[target]
units=dom htmldoc xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
units=sax dom sax_html dom_html xmlcfg xmlread xmlstreaming xmlwrite xhtml htmldefs htmwrite
[compiler]
options=-S2
options=-S2h
targetdir=../$(OS_TARGET)
[install]

View File

@ -32,9 +32,6 @@
unit DOM;
{$MODE objfpc}
{$H+}
interface
uses SysUtils, Classes;
@ -1504,7 +1501,12 @@ end.
{
$Log$
Revision 1.10 2002-09-07 15:15:29 peter
Revision 1.11 2002-12-11 21:06:07 sg
* Small cleanups
* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser
Revision 1.10 2002/09/07 15:15:29 peter
* old logs removed and tabs fixed
Revision 1.9 2002/03/01 10:02:38 sg

932
fcl/xml/dom_html.pp Normal file
View File

@ -0,0 +1,932 @@
{
$Id$
This file is part of the Free Component Library
Implementation of DOM HTML interfaces
Copyright (c) 2002 by
Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{ Please note that this is a very early version, most properties and methods
are not implemented yet. }
unit DOM_HTML;
interface
uses DOM;
type
THTMLDocument = class;
THTMLFormElement = class;
THTMLTableCaptionElement = class;
THTMLTableSectionElement = class;
THTMLCollection = class
public
property Length: Cardinal; // !!!: ro
function Item(Index: Cardinal): TDOMNode;
function NamedItem(const Index: DOMString): TDOMNode;
end;
THTMLOptionsCollection = class
public
property Length: Cardinal; // !!!: ro
function Item(Index: Cardinal): TDOMNode;
function NamedItem(const Index: DOMString): TDOMNode;
end;
THTMLElement = class(TDOMElement)
private
function GetID: DOMString;
procedure SetID(const Value: DOMString);
function GetTitle: DOMString;
procedure SetTitle(const Value: DOMString);
function GetLang: DOMString;
procedure SetLang(const Value: DOMString);
function GetDir: DOMString;
procedure SetDir(const Value: DOMString);
function GetClassName: DOMString;
procedure SetClassName(const Value: DOMString);
protected
constructor Create(AOwner: THTMLDocument; const ATagName: DOMString);
public
property ID: DOMString read GetID write SetID;
property Title: DOMString read GetTitle write SetTitle;
property Lang: DOMString read GetLang write SetLang;
property Dir: DOMString read GetDir write SetDir;
property ClassName: DOMString read GetClassName write SetClassName;
end;
THTMLHtmlElement = class(THTMLElement)
private
function GetVersion: DOMString;
procedure SetVersion(const Value: DOMString);
public
property Version: DOMString read GetVersion write SetVersion;
end;
THTMLHeadElement = class(THTMLElement)
private
function GetProfile: DOMString;
procedure SetProfile(const Value: DOMString);
public
property Profile: DOMString read GetProfile write SetProfile;
end;
THTMLLinkElement = class(THTMLElement)
public
property Disabled: Boolean; // !!!: rw
property Charset: DOMString; // !!!: rw
property HRef: DOMString; // !!!: rw
property HRefLang: DOMString; // !!!: rw
property Media: DOMString; // !!!: rw
property Rel: DOMString; // !!!: rw
property Rev: DOMString; // !!!: rw
property Target: DOMString; // !!!: rw
property HTMLType: DOMString; // !!!: rw
end;
THTMLTitleElement = class(THTMLElement)
public
property Text: DOMString; // !!!: rw
end;
THTMLMetaElement = class(THTMLElement)
public
property Content: DOMString; // !!!: rw
property HTTPEqiv: DOMString; // !!!: rw
property Name: DOMString; // !!!: rw
property Scheme: DOMString; // !!!: rw
end;
THTMLBaseElement = class(THTMLElement)
public
property HRef: DOMString; // !!!: rw
property Target: DOMString; // !!!: rw
end;
THTMLIsIndexElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property Prompt: DOMString; // !!!: rw
end;
THTMLStyleElement = class(THTMLElement)
public
property Disabled: Boolean; // !!!: rw
property Media: DOMString; // !!!: rw
property HTMLType: DOMString; // !!!: rw
end;
THTMLBodyElement = class(THTMLElement)
public
property ALink: DOMString; // !!!: rw
property Background: DOMString; // !!!: rw
property BgColor: DOMString; // !!!: rw
property Link: DOMString; // !!!: rw
property Text: DOMString; // !!!: rw
property VLink: DOMString; // !!!: rw
end;
THTMLFormElement = class(THTMLElement)
public
property Elements: THTMLCollection; // !!!: ro
property Length: Integer; // !!!: ro
property Name: DOMString; // !!!: rw
property AcceptCharset: DOMString; // !!!: rw
property Action: DOMString; // !!!: rw
property EncType: DOMString; // !!!: rw
property Method: DOMString; // !!!: rw
property Target: DOMString; // !!!: rw
procedure Submit; virtual; abstract;
procedure Reset; virtual; abstract;
end;
THTMLSelectElement = class(THTMLElement)
public
property HTMLType: DOMString; // !!!: ro
property SelectedIndex: Integer; // !!!: rw
property Value: DOMString; // !!!: rw
property Length: Cardinal; // !!!: rw
property Form: THTMLFormElement; // !!!: ro
property Options: THTMLOptionsCollection; // !!!: ro
property Disabled: Boolean; // !!!: rw
property Multiple: Boolean; // !!!: rw
property Name: DOMString; // !!!: rw
property Size: Integer; // !!!: rw
property TabIndex: Integer; // !!!: rw
procedure Add(Element, Before: THTMLElement);
procedure Remove(Index: Integer);
procedure Blur; virtual; abstract;
procedure Focus; virtual; abstract;
end;
THTMLOptGroupElement = class(THTMLElement)
public
property Disabled: Boolean; // !!!: rw
property GroupLabel: DOMString; // !!!: rw
end;
THTMLOptionElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property DefaultSelected: Boolean; // !!!: rw
property Text: DOMString; // !!!: ro
property Index: Integer; // !!!: ro
property Disabled: Boolean; // !!!: rw
property OptionLabel: DOMString; // !!!: rw
property Selected: Boolean; // !!!: rw
property Value: DOMString; // !!!: rw
end;
THTMLInputElement = class(THTMLElement)
public
property DefaultValue: DOMString; // !!!: rw
property DefaultChecked: Boolean; // !!!: rw
property Form: THTMLFormElement; // !!!: ro
property Accept: DOMString; // !!!: rw
property AccessKey: DOMString; // !!!: rw
property Align: DOMString; // !!!: rw
property Alt: DOMString; // !!!: rw
property Checked: Boolean; // !!!: rw
property Disabled: Boolean; // !!!: rw
property MaxLength: Integer; // !!!: rw
property Name: DOMString; // !!!: rw
property ReadOnly: Boolean; // !!!: rw
property Size: Cardinal; // !!!: rw
property Src: DOMString; // !!!: rw
property TabIndex: Integer; // !!!: rw
property HTMLType: DOMString; // !!!: rw
property UseMap: DOMString; // !!!: rw
property Value: DOMString; // !!!: rw
procedure Blur; virtual; abstract;
procedure Focus; virtual; abstract;
procedure Select; virtual; abstract;
procedure Click; virtual; abstract;
end;
THTMLTextAreaElement = class(THTMLElement)
public
property DefaultValue: DOMString; // !!!: rw
property Form: THTMLFormElement; // !!!: ro
property AccessKey: DOMString; // !!!: rw
property Cols: Integer; // !!!: rw
property Disabled: Boolean; // !!!: rw
property Name: DOMString; // !!!: rw
property ReadOnly: Boolean; // !!!: rw
property Rows: Integer; // !!!: rw
property TabIndex: Integer; // !!!: rw
property HTMLType: DOMString; // !!!: rw
property Value: DOMString; // !!!: rw
procedure Blur; virtual; abstract;
procedure Focus; virtual; abstract;
procedure Select; virtual; abstract;
end;
THTMLButtonElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property AccessKey: DOMString; // !!!: rw
property Disabled: Boolean; // !!!: rw
property Name: DOMString; // !!!: rw
property TabIndex: Integer; // !!!: rw
property HTMLType: DOMString; // !!!: rw
property Value: DOMString; // !!!: rw
end;
THTMLLabelElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property AccessKey: DOMString; // !!!: rw
property HtmlFor: DOMString; // !!!: rw
end;
THTMLFieldSetElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
end;
THTMLLegendElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property AccessKey: DOMString; // !!!: rw
property Align: DOMString; // !!!: rw
end;
THTMLUListElement = class(THTMLElement)
public
property Compact: Boolean; // !!!: rw
property HTMLType: DOMString; // !!!: rw
end;
THTMLOListElement = class(THTMLElement)
public
property Compact: Boolean; // !!!: rw
property Start: Integer; // !!!: rw
property HTMLType: DOMString; // !!!: rw
end;
THTMLDListElement = class(THTMLElement)
public
property Compact: Boolean; // !!!: rw
end;
THTMLDirectoryElement = class(THTMLElement)
public
property Compact: Boolean; // !!!: rw
end;
THTMLMenuElement = class(THTMLElement)
public
property Compact: Boolean; // !!!: rw
end;
THTMLLIElement = class(THTMLElement)
public
property HTMLType: DOMString; // !!!: rw
property Value: Integer; // !!!: rw
end;
THTMLDivElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
end;
THTMLParagraphElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
end;
THTMLHeadingElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
end;
THTMLQuoteElement = class(THTMLElement)
public
property Cite: DOMString; // !!!: rw
end;
THTMLPreElement = class(THTMLElement)
public
property Width: Integer; // !!!: rw
end;
THTMLBREElement = class(THTMLElement)
public
property Clear: DOMString; // !!!: rw
end;
THTMLBaseFontElement = class(THTMLElement)
public
property Color: DOMString; // !!!: rw
property Face: DOMString; // !!!: rw
property Size: Integer; // !!!: rw
end;
THTMLFontElement = class(THTMLElement)
public
property Color: DOMString; // !!!: rw
property Face: DOMString; // !!!: rw
property Size: Integer; // !!!: rw
end;
THTMLHRElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
property NoShade: Boolean; // !!!: rw
property Size: DOMString; // !!!: rw
property Width: DOMString; // !!!: rw
end;
THTMLModElement = class(THTMLElement)
public
property Cite: DOMString; // !!!: rw
property DateTime: DOMString; // !!!: rw
end;
THTMLAnchorElement = class(THTMLElement)
public
property AccessKey: DOMString; // !!!: rw
property Charset: DOMString; // !!!: rw
property Coords: DOMString; // !!!: rw
property HRef: DOMString; // !!!: rw
property HRefLang: DOMString; // !!!: rw
property Name: DOMString; // !!!: rw
property Rel: DOMString; // !!!: rw
property Rev: DOMString; // !!!: rw
property Shape: DOMString; // !!!: rw
property TabIndex: Integer; // !!!: rw
property Target: DOMString; // !!!: rw
property HTMLType: DOMString; // !!!: rw
procedure Blur; virtual; abstract;
procedure Focus; virtual; abstract;
end;
THTMLImageElement = class(THTMLElement)
public
property Name: DOMString; // !!!: rw
property Align: DOMString; // !!!: rw
property Alt: DOMString; // !!!: rw
property Border: DOMString; // !!!: rw
property Height: Integer; // !!!: rw
property HSpace: Integer; // !!!: rw
property IsMap: Boolean; // !!!: rw
property LongDesc: DOMString; // !!!: rw
property Src: Integer; // !!!: rw
property UseMap: DOMString; // !!!: rw
property VSpace: Integer; // !!!: rw
property Width: Integer; // !!!: rw
end;
THTMLObjectElement = class(THTMLElement)
public
property Form: THTMLFormElement; // !!!: ro
property Code: DOMString; // !!!: rw
property Align: DOMString; // !!!: rw
property Archive: DOMString; // !!!: rw
property Border: DOMString; // !!!: rw
property CodeBase: DOMString; // !!!: rw
property CodeType: DOMString; // !!!: rw
property Data: DOMString; // !!!: rw
property Declare: Boolean; // !!!: rw
property Height: DOMString; // !!!: rw
property HSpace: Integer; // !!!: rw
property Name: DOMString; // !!!: rw
property StandBy: DOMString; // !!!: rw
property TabIndex: Integer; // !!!: rw
property HTMLType: DOMString; // !!!: rw
property UseMap: DOMString; // !!!: rw
property VSpace: Integer; // !!!: rw
property Width: Integer; // !!!: rw
property ContentDocument: TDOMDocument; // !!!: ro
end;
THTMLParamElement = class(THTMLElement)
public
property Name: DOMString; // !!!: rw
property HTMLType: DOMString; // !!!: rw
property Value: DOMString; // !!!: rw
property ValueType: DOMString; // !!!: rw
end;
THTMLAppletElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
property Alt: DOMString; // !!!: rw
property Archive: DOMString; // !!!: rw
property Code: DOMString; // !!!: rw
property CodeBase: DOMString; // !!!: rw
property Height: DOMString; // !!!: rw
property HSpace: Integer; // !!!: rw
property Name: DOMString; // !!!: rw
property AppletObject: DOMString; // !!!: rw
property VSpace: Integer; // !!!: rw
property Width: Integer; // !!!: rw
end;
THTMLMapElement = class(THTMLElement)
public
property Areas: THTMLCollection; // !!!: ro
property Name: DOMString; // !!!: rw
end;
THTMLAreaElement = class(THTMLElement)
public
property AccessKey: DOMString; // !!!: rw
property Alt: DOMString; // !!!: rw
property Coords: DOMString; // !!!: rw
property HRef: DOMString; // !!!: rw
property NoHRef: Boolean; // !!!: rw
property Shape: DOMString; // !!!: rw
property TabIndex: Integer; // !!!: rw
property Target: DOMString; // !!!: rw
end;
THTMLScriptElement = class(THTMLElement)
public
property Text: DOMString; // !!!: rw
property HtmlFor: DOMString; // !!!: rw
property Event: DOMString; // !!!: rw
property Charset: DOMString; // !!!: rw
property Defer: Boolean; // !!!: rw
property Src: DOMString; // !!!: rw
property HTMLType: DOMString; // !!!: rw
end;
THTMLTableElement = class(THTMLElement)
public
property Caption: THTMLTableCaptionElement; // !!!: rw
property THead: THTMLTableSectionElement; // !!!: rw
property TFoot: THTMLTableSectionElement; // !!!: rw
property Rows: THTMLCollection; // !!!: ro
property TBodies: THTMLCollection; // !!!: ro
property Align: DOMString; // !!!: rw
property BgColor: DOMString; // !!!: rw
property Border: DOMString; // !!!: rw
property CellPadding: DOMString; // !!!: rw
property CellSpacing: DOMString; // !!!: rw
property Frame: DOMString; // !!!: rw
property Rules: DOMString; // !!!: rw
property Summary: DOMString; // !!!: rw
property Width: DOMString; // !!!: rw
function CreateTHead: THTMLElement;
procedure DeleteTHead;
function CreateTFoot: THTMLElement;
procedure DeleteTFoot;
function CreateCaption: THTMLElement;
procedure DeleteCaption;
function InsertRow(Index: Integer): THTMLElement;
procedure DeleteRow(Index: Integer);
end;
THTMLTableCaptionElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
end;
THTMLTableColElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
property Ch: DOMString; // !!!: rw
property ChOff: DOMString; // !!!: rw
property Span: Integer; // !!!: rw
property VAlign: DOMString; // !!!: rw
property Width: DOMString; // !!!: rw
end;
THTMLTableSectionElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
property Ch: DOMString; // !!!: rw
property ChOff: DOMString; // !!!: rw
property VAlign: DOMString; // !!!: rw
property Rows: THTMLCollection; // !!!: ro
function InsertRow(Index: Integer): THTMLElement;
procedure DeleteRow(Index: Integer);
end;
THTMLTableRowElement = class(THTMLElement)
public
property RowIndex: Integer; // !!!: ro
property SectionRowIndex: Integer; // !!!: ro
property Cells: THTMLCollection; // !!!: ro
property Align: DOMString; // !!!: rw
property BgColor: DOMString; // !!!: rw
property Ch: DOMString; // !!!: rw
property ChOff: DOMString; // !!!: rw
property VAlign: DOMString; // !!!: rw
function InsertCell(Index: Integer): THTMLElement;
procedure DeleteCell(Index: Integer);
end;
THTMLTableCellElement = class(THTMLElement)
public
property CellIndex: Integer; // !!!: ro
property Abbr: DOMString; // !!!: rw
property Align: DOMString; // !!!: rw
property Axis: DOMString; // !!!: rw
property BgColor: DOMString; // !!!: rw
property Ch: DOMString; // !!!: rw
property ChOff: DOMString; // !!!: rw
property ColSpan: Integer; // !!!: rw
property Headers: DOMString; // !!!: rw
property Height: DOMString; // !!!: rw
property NoWrap: Boolean; // !!!: rw
property RowSpan: Integer; // !!!: rw
property Scope: DOMString; // !!!: rw
property VAlign: DOMString; // !!!: rw
property Width: DOMString; // !!!: rw
end;
THTMLFrameSetElement = class(THTMLElement)
public
property Cols: DOMString; // !!!: rw
property Rows: DOMString; // !!!: rw
end;
THTMLFrameElement = class(THTMLElement)
public
property FrameBorder: DOMString; // !!!: rw
property LongDesc: DOMString; // !!!: rw
property MarginHeight: DOMString; // !!!: rw
property MarginWidth: DOMString; // !!!: rw
property Name: DOMString; // !!!: rw
property NoResize: Boolean; // !!!: rw
property Scrolling: DOMString; // !!!: rw
property Src: DOMString; // !!!: rw
property ContentDocument: TDOMDocument; // !!!: ro
end;
THTMLIFrameElement = class(THTMLElement)
public
property Align: DOMString; // !!!: rw
property FrameBorder: DOMString; // !!!: rw
property Height: DOMString; // !!!: rw
property LongDesc: DOMString; // !!!: rw
property MarginHeight: DOMString; // !!!: rw
property MarginWidth: DOMString; // !!!: rw
property Name: DOMString; // !!!: rw
property Scrolling: DOMString; // !!!: rw
property Src: DOMString; // !!!: rw
property Width: DOMString; // !!!: rw
property ContentDocument: TDOMDocument; // !!!: ro
end;
THTMLDocument = class(TXMLDocument)
private
function GetTitle: DOMString;
procedure SetTitle(const Value: DOMString);
public
property Title: DOMString read GetTitle write SetTitle;
property Referrer: DOMString; // !!!: ro
property Domain: DOMString; // !!!: ro
property URL: DOMString; // !!!: ro
property Body: THTMLElement; // !!!: rw
property Images: THTMLCollection; // !!!: ro
property Applets: THTMLCollection; // !!!: ro
property Links: THTMLCollection; // !!!: ro
property Forms: THTMLCollection; // !!!: ro
property Anchors: THTMLCollection; // !!!: ro
property Cookie: DOMString; // !!!: rw
procedure Open; virtual; abstract;
procedure Close; virtual; abstract;
procedure Write(const AText: DOMString);
procedure WriteLn(const AText: DOMString);
function GetElementsByName(const ElementName: DOMString): TDOMNodeList;
// Helper functions (not in DOM standard):
function CreateElement(const tagName: DOMString): THTMLElement;
function CreateSubElement: THTMLElement;
function CreateSupElement: THTMLElement;
function CreateSpanElement: THTMLElement;
function CreateBDOElement: THTMLElement;
function CreateTTElement: THTMLElement;
function CreateIElement: THTMLElement;
function CreateBElement: THTMLElement;
function CreateUElement: THTMLElement;
function CreateSElement: THTMLElement;
function CreateStrikeElement: THTMLElement;
function CreateBigElement: THTMLElement;
function CreateSmallElement: THTMLElement;
function CreateEmElement: THTMLElement;
function CreateStrongElement: THTMLElement;
function CreateDfnElement: THTMLElement;
function CreateCodeElement: THTMLElement;
function CreateSampElement: THTMLElement;
function CreateKbdElement: THTMLElement;
function CreateVarElement: THTMLElement;
function CreateCiteElement: THTMLElement;
function CreateAcronymElement: THTMLElement;
function CreateAbbrElement: THTMLElement;
function CreateDDElement: THTMLElement;
function CreateDTElement: THTMLElement;
function CreateNoFramesElement: THTMLElement;
function CreateNoScriptElement: THTMLElement;
function CreateAddressElement: THTMLElement;
function CreateCenterElement: THTMLElement;
function CreateHtmlElement: THTMLHtmlElement;
function CreateHeadElement: THTMLHeadElement;
function CreateLinkElement: THTMLLinkElement;
{ function CreateTitleElement: THTMLTitleElement;
function CreateMetaElement: THTMLMetaElement;
function CreateBaseElement: THTMLBaseElement;
function CreateIsIndexElement: THTMLIsIndexElement;
function CreateStyleElement: THTMLStyleElement;}
function CreateBodyElement: THTMLBodyElement;
{ function CreateFormElement: THTMLFormElement;
function CreateSelectElement: THTMLSelectElement;
function CreateOptGroupElement: THTMLOptGroupElement;
function CreateOptionElement: THTMLOptionElement;
function CreateInputElement: THTMLInputElement;
function CreateTextAreaElement: THTMLTextAreaElement;
function CreateButtonElement: THTMLButtonElement;
function CreateLabelElement: THTMLLabelElement;
function CreateFieldSetElement: THTMLFieldSetElement;
function CreateLegendElement: THTMLLegendElement;}
function CreateUListElement: THTMLUListElement;
function CreateOListElement: THTMLOListElement;
function CreateDListElement: THTMLDListElement;
{ function CreateDirectoryElement: THTMLDirectoryElement;
function CreateMenuElement: THTMLMenuElement;}
function CreateLIElement: THTMLLIElement;
{ function CreateDivElement: THTMLDivElement;}
function CreateParagraphElement: THTMLParagraphElement;
{ function CreateHeadingElement: THTMLHeadingElement;
function CreateQuoteElement: THTMLQuoteElement;
function CreatePreElement: THTMLPreElement;
function CreateBRElement: THTMLBreElement;
function CreateBaseFontElement: THTMLBaseFontElement;
function CreateFontElement: THTMFontLElement;
function CreateHRElement: THTMLHREElement;
function CreateModElement: THTMLModElement;
function CreateAnchorElement: THTMLAnchorElement;
function CreateImageElement: THTMLImageElement;
function CreateObjectElement: THTMLObjectElement;
function CreateParamElement: THTMLParamElement;
function CreateAppletElement: THTMLAppletElement;
function CreateMapElement: THTMLMapElement;
function CreateAreaElement: THTMLAreaElement;
function CreateScriptElement: THTMLScriptElement;
function CreateTableElement: THTMLTableElement;
function CreateTableCaptionElement: THTMLTableCaptionElement;
function CreateTableColElement: THTMLTableColElement;
function CreateTableSectionElement: THTMLTableSectionElement;
function CreateTableRowElement: THTMLTableRowElement;
function CreateTableCellElement: THTMLTableCellElement;
function CreateFrameSetElement: THTMLFrameSetElement;
function CreateFrameElement: THTMLFrameElement;
function CreateIFrameElement: THTMLIFrameElement;}
end;
implementation
function THTMLCollection.Item(Index: Cardinal): TDOMNode;
begin
Result := nil;
end;
function THTMLCollection.NamedItem(const Index: DOMString): TDOMNode;
begin
Result := nil;
end;
function THTMLOptionsCollection.Item(Index: Cardinal): TDOMNode;
begin
Result := nil;
end;
function THTMLOptionsCollection.NamedItem(const Index: DOMString): TDOMNode;
begin
Result := nil;
end;
constructor THTMLElement.Create(AOwner: THTMLDocument; const ATagName: DOMString);
begin
inherited Create(AOwner);
FNodeName := ATagName;
end;
function THTMLElement.GetID: DOMString; begin Result := GetAttribute('id') end;
procedure THTMLElement.SetID(const Value: DOMString); begin SetAttribute('id', Value) end;
function THTMLElement.GetTitle: DOMString; begin Result := GetAttribute('title') end;
procedure THTMLElement.SetTitle(const Value: DOMString); begin SetAttribute('title', Value) end;
function THTMLElement.GetLang: DOMString; begin Result := GetAttribute('lang') end;
procedure THTMLElement.SetLang(const Value: DOMString); begin SetAttribute('lang', Value) end;
function THTMLElement.GetDir: DOMString; begin Result := GetAttribute('dir') end;
procedure THTMLElement.SetDir(const Value: DOMString); begin SetAttribute('dir', Value) end;
function THTMLElement.GetClassName: DOMString; begin Result := GetAttribute('class') end;
procedure THTMLElement.SetClassName(const Value: DOMString); begin SetAttribute('class', Value) end;
function THTMLHtmlElement.GetVersion: DOMString; begin Result := GetAttribute('version') end;
procedure THTMLHtmlElement.SetVersion(const Value: DOMString); begin SetAttribute('version', Value) end;
function THTMLHeadElement.GetProfile: DOMString; begin Result := GetAttribute('profile') end;
procedure THTMLHeadElement.SetProfile(const Value: DOMString); begin SetAttribute('profile', Value) end;
procedure THTMLSelectElement.Add(Element, Before: THTMLElement);
begin
end;
procedure THTMLSelectElement.Remove(Index: Integer);
begin
end;
function THTMLTableElement.CreateTHead: THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableElement.DeleteTHead;
begin
end;
function THTMLTableElement.CreateTFoot: THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableElement.DeleteTFoot;
begin
end;
function THTMLTableElement.CreateCaption: THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableElement.DeleteCaption;
begin
end;
function THTMLTableElement.InsertRow(Index: Integer): THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableElement.DeleteRow(Index: Integer);
begin
end;
function THTMLTableSectionElement.InsertRow(Index: Integer): THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableSectionElement.DeleteRow(Index: Integer);
begin
end;
function THTMLTableRowElement.InsertCell(Index: Integer): THTMLElement;
begin
Result := nil;
end;
procedure THTMLTableRowElement.DeleteCell(Index: Integer);
begin
end;
function THTMLDocument.GetTitle: DOMString;
var
Node: TDOMNode;
begin
Result := '';
if not Assigned(DocumentElement) then
exit;
Node := DocumentElement.FirstChild;
while Assigned(Node) and (Node.NodeName <> 'head') do
Node := Node.NextSibling;
if not Assigned(Node) then
exit;
Node := Node.FirstChild;
while Assigned(Node) and (Node.NodeName <> 'title') do
Node := Node.NextSibling;
if not Assigned(Node) then
exit;
Node := Node.FirstChild;
if Assigned(Node) and (Node.NodeType = TEXT_NODE) then
Result := Node.NodeValue;
end;
procedure THTMLDocument.SetTitle(const Value: DOMString);
var
Node: TDOMNode;
TitleEl: TDOMElement;
begin
if not Assigned(DocumentElement) then
AppendChild(CreateHtmlElement);
Node := DocumentElement.FirstChild;
while Assigned(Node) and (Node.NodeName <> 'head') do
Node := Node.NextSibling;
if not Assigned(Node) then
begin
Node := CreateHeadElement;
DocumentElement.InsertBefore(Node, DocumentElement.FirstChild);
end;
TitleEl := TDOMElement(Node.FirstChild);
while Assigned(TitleEl) and (TitleEl.NodeName <> 'title') do
TitleEl := TDOMElement(TitleEl.NextSibling);
if not Assigned(TitleEl) then
begin
TitleEl := CreateElement('title');
Node.AppendChild(TitleEl);
end;
while Assigned(TitleEl.FirstChild) do
TitleEl.RemoveChild(TitleEl.FirstChild);
TitleEl.AppendChild(CreateTextNode(Value));
end;
procedure THTMLDocument.Write(const AText: DOMString);
begin
end;
procedure THTMLDocument.WriteLn(const AText: DOMString);
begin
end;
function THTMLDocument.GetElementsByName(const ElementName: DOMString): TDOMNodeList;
begin
Result := nil;
end;
function THTMLDocument.CreateElement(const tagName: DOMString): THTMLElement;
begin
Result := THTMLElement.Create(Self, tagName);
end;
function THTMLDocument.CreateSubElement: THTMLElement; begin Result := CreateElement('sub') end;
function THTMLDocument.CreateSupElement: THTMLElement; begin Result := CreateElement('sup') end;
function THTMLDocument.CreateSpanElement: THTMLElement; begin Result := CreateElement('span') end;
function THTMLDocument.CreateBDOElement: THTMLElement; begin Result := CreateElement('bdo') end;
function THTMLDocument.CreateTTElement: THTMLElement; begin Result := CreateElement('tt') end;
function THTMLDocument.CreateIElement: THTMLElement; begin Result := CreateElement('i') end;
function THTMLDocument.CreateBElement: THTMLElement; begin Result := CreateElement('b') end;
function THTMLDocument.CreateUElement: THTMLElement; begin Result := CreateElement('u') end;
function THTMLDocument.CreateSElement: THTMLElement; begin Result := CreateElement('s') end;
function THTMLDocument.CreateStrikeElement: THTMLElement; begin Result := CreateElement('strike') end;
function THTMLDocument.CreateBigElement: THTMLElement; begin Result := CreateElement('big') end;
function THTMLDocument.CreateSmallElement: THTMLElement; begin Result := CreateElement('small') end;
function THTMLDocument.CreateEmElement: THTMLElement; begin Result := CreateElement('em') end;
function THTMLDocument.CreateStrongElement: THTMLElement; begin Result := CreateElement('strong') end;
function THTMLDocument.CreateDfnElement: THTMLElement; begin Result := CreateElement('dfn') end;
function THTMLDocument.CreateCodeElement: THTMLElement; begin Result := CreateElement('code') end;
function THTMLDocument.CreateSampElement: THTMLElement; begin Result := CreateElement('samp') end;
function THTMLDocument.CreateKbdElement: THTMLElement; begin Result := CreateElement('kbd') end;
function THTMLDocument.CreateVarElement: THTMLElement; begin Result := CreateElement('var') end;
function THTMLDocument.CreateCiteElement: THTMLElement; begin Result := CreateElement('cite') end;
function THTMLDocument.CreateAcronymElement: THTMLElement; begin Result := CreateElement('acronym') end;
function THTMLDocument.CreateAbbrElement: THTMLElement; begin Result := CreateElement('abbr') end;
function THTMLDocument.CreateDDElement: THTMLElement; begin Result := CreateElement('dd') end;
function THTMLDocument.CreateDTElement: THTMLElement; begin Result := CreateElement('dt') end;
function THTMLDocument.CreateNoFramesElement: THTMLElement; begin Result := CreateElement('noframes') end;
function THTMLDocument.CreateNoScriptElement: THTMLElement; begin Result := CreateElement('noscript') end;
function THTMLDocument.CreateAddressElement: THTMLElement; begin Result := CreateElement('address') end;
function THTMLDocument.CreateCenterElement: THTMLElement; begin Result := CreateElement('center') end;
function THTMLDocument.CreateHtmlElement: THTMLHtmlElement; begin Result := THTMLHtmlElement.Create(Self, 'html') end;
function THTMLDocument.CreateHeadElement: THTMLHeadElement; begin Result := THTMLHeadElement.Create(Self, 'head') end;
function THTMLDocument.CreateLinkElement: THTMLLinkElement; begin Result := THTMLLinkElement.Create(Self, 'a') end;
//...
function THTMLDocument.CreateBodyElement: THTMLBodyElement; begin Result := THTMLBodyElement.Create(Self, 'body') end;
//...
function THTMLDocument.CreateUListElement: THTMLUListElement; begin Result := THTMLUListElement.Create(Self, 'ul') end;
function THTMLDocument.CreateOListElement: THTMLOListElement; begin Result := THTMLOListElement.Create(Self, 'ol') end;
function THTMLDocument.CreateDListElement: THTMLDListElement; begin Result := THTMLDListElement.Create(Self, 'dl') end;
// ...
function THTMLDocument.CreateLIElement: THTMLLIElement; begin Result := THTMLLIElement.Create(Self, 'li') end;
//...
function THTMLDocument.CreateParagraphElement: THTMLParagraphElement; begin Result := THTMLParagraphElement.Create(Self, 'p') end;
end.
{
$Log$
Revision 1.1 2002-12-11 21:06:07 sg
* Small cleanups
* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser
}

938
fcl/xml/sax.pp Normal file
View File

@ -0,0 +1,938 @@
{
$Id$
This file is part of the Free Component Library
SAX 2 (Simple API for XML) implementation
Copyright (c) 2000 - 2002 by
Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit SAX;
{ $DEFINE UseDynArrays}
{ $IFDEF UseDynArrays}
{$MODE objfpc}
{ $ELSE}
{ $MODE Delphi}
{ $ENDIF}
{$H+}
interface
uses SysUtils, Classes;
resourcestring
SSAXAttributeIndexError = 'Invalid attribute index %d';
const
XMLNS = 'http://www.w3.org/XML/1998/namespace';
type
{$IFDEF ver1_0}
SAXString = String;
SAXChar = Char;
{$ELSE}
SAXString = WideString;
SAXChar = WideChar;
{$ENDIF}
PSAXChar = ^SAXChar;
{ Exceptions }
ESAXError = class(Exception);
ESAXAttributeIndexError = class(ESAXError)
public
constructor Create(Index: Integer);
end;
ESAXParseException = class(ESAXError);
{ TSAXInputSource: A single input source for an XML entity }
TSAXInputSource = class
private
FStream: TStream;
FEncoding: String;
FPublicID, FSystemID: SAXString;
public
constructor Create; overload;
constructor Create(AStream: TStream); overload;
constructor Create(const ASystemID: SAXString); overload;
property Stream: TStream read FStream write FStream;
property Encoding: String read FEncoding write FEncoding;
property PublicID: SAXString read FPublicID write FPublicID;
property SystemID: SAXString read FSystemID write FSystemID;
end;
{ TSAXAttributes: List of XML attributes }
TSAXAttributeData = record
URI, LocalName, QName, Value: SAXString;
AttrType: String;
end;
{$IFNDEF UseDynArrays}
PSAXAttributeData = ^TSAXAttributeData;
{$ENDIF}
TSAXAttributes = class
protected
FLength: Integer;
{$IFDEF UseDynArrays}
Data: array of TSAXAttributeData;
{$ELSE}
FData: TList;
function GetData(Index: Integer): PSAXAttributeData;
property Data[Index:Integer]: PSAXAttributeData read GetData;
{$ENDIF}
procedure BadIndex(Index: Integer);
public
constructor Create; overload;
constructor Create(Atts: TSAXAttributes); overload;
{$IFNDEF UseDynArrays}
destructor Destroy; override;
{$ENDIF}
function GetIndex(const QName: SAXString): Integer; overload;
function GetIndex(const URI, LocalPart: SAXString): Integer; overload;
function GetLength: Integer;
function GetLocalName(Index: Integer): SAXString;
function GetQName(Index: Integer): SAXString;
function GetType(Index: Integer): String; overload;
function GetType(const QName: SAXString): String; overload;
function GetType(const URI, LocalName: SAXString): String; overload;
function GetURI(Index: Integer): SAXString;
function GetValue(Index: Integer): SAXString; overload;
function GetValue(const QName: SAXString): SAXString; overload;
function GetValue(const URI, LocalName: SAXString): SAXString; overload;
// Manipulation methods:
procedure Clear;
procedure SetAttributes(Atts: TSAXAttributes);
procedure AddAttribute(const AURI, ALocalName, AQName: SAXString;
const AType: String; const AValue: SAXString);
procedure SetAttribute(Index: Integer;
const AURI, ALocalName, AQName: SAXString; const AType: String;
const AValue: SAXString);
procedure RemoveAttribute(Index: Integer);
procedure SetURI(Index: Integer; const AURI: SAXString);
procedure SetLocalName(Index: Integer; const ALocalName: SAXString);
procedure SetQName(Index: Integer; const AQName: SAXString);
procedure SetType(Index: Integer; const AType: String);
procedure SetValue(Index: Integer; const AValue: SAXString);
property Length: Integer read GetLength;
property LocalNames[Index: Integer]: SAXString read GetLocalName;
property QNames[Index: Integer]: SAXString read GetQName;
property Types[Index: Integer]: String read GetType;
property URIs[Index: Integer]: SAXString read GetURI;
property Values[Index: Integer]: SAXString read GetValue;
end;
{ TSAXReader: Reading an XML document using callbacks }
TCharactersEvent = procedure(Sender: TObject; const ch: PSAXChar; AStart, ALength: Integer) of object;
TCommentEvent = type TCharactersEvent;
TEndElementEvent = procedure(Sender: TObject; const NamespaceURI, LocalName, QName: SAXString) of object;
TEndPrefixMappingEvent = procedure(Sender: TObject; const Prefix: SAXString) of object;
TIgnorableWhitespaceEvent = procedure(Sender: TObject; const ch: PSAXChar; AStart, ALength: Integer) of object;
TProcessingInstructionEvent = procedure(Sender: TObject; const Target, Data: SAXString) of object;
TSkippedEntityEvent = procedure(Sender: TObject; const Name: SAXString) of object;
TStartElementEvent = procedure(Sender: TObject; const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes) of object;
TStartPrefixMappingEvent = procedure(Sender: TObject; const Prefix, URI: SAXString) of object;
TNotationDeclEvent = procedure(Sender: TObject; const Name, PublicID, SystemID: SAXString) of object;
TUnparsedEntityDeclEvent = procedure(Sender: TObject; const Name, PublicID, SystemID, NotationName: SAXString) of object;
TResolveEntityEvent = function(Sender: TObject; const PublicID, SystemID: SAXString): TSAXInputSource of object;
TErrorEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
TFatalErrorEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
TWarningEvent = procedure(Sender: TObject; AException: ESAXParseException) of object;
TSAXReader = class
private
FOnCharacters: TCharactersEvent;
FOnComment: TCommentEvent;
FOnEndDocument: TNotifyEvent;
FOnEndElement: TEndElementEvent;
FOnEndPrefixMapping: TEndPrefixMappingEvent;
FOnIgnorableWhitespace: TIgnorableWhitespaceEvent;
FOnProcessingInstruction: TProcessingInstructionEvent;
FOnSkippedEntity: TSkippedEntityEvent;
FOnStartDocument: TNotifyEvent;
FOnStartElement: TStartElementEvent;
FOnStartPrefixMapping: TStartPrefixMappingEvent;
FOnNotationDecl: TNotationDeclEvent;
FOnUnparsedEntityDecl: TUnparsedEntityDeclEvent;
FOnResolveEntity: TResolveEntityEvent;
FOnError: TErrorEvent;
FOnFatalError: TFatalErrorEvent;
FOnWarning: TWarningEvent;
protected
FCurColumnNumber, FCurLineNumber: Integer;
FCurPublicID, FCurSystemID: SAXString;
function GetFeature(const Name: String): Boolean; dynamic; abstract;
function GetProperty(const Name: String): TObject; dynamic; abstract;
procedure SetFeature(const Name: String; Value: Boolean); dynamic; abstract;
procedure SetProperty(const Name: String; Value: TObject); dynamic; abstract;
// Notification of the content of a document
procedure DoCharacters(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
procedure DoComment(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
procedure DoEndDocument; dynamic;
procedure DoEndElement(const NamespaceURI, LocalName, QName: SAXString); dynamic;
procedure DoEndPrefixMapping(const Prefix: SAXString); dynamic;
procedure DoIgnorableWhitespace(const ch: PSAXChar; AStart, ALength: Integer); dynamic;
procedure DoProcessingInstruction(const Target, Data: SAXString); dynamic;
procedure DoSkippedEntity(const Name: SAXString); dynamic;
procedure DoStartDocument; dynamic;
procedure DoStartElement(const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes); dynamic;
procedure DoStartPrefixMapping(const Prefix, URI: SAXString); dynamic;
// Notification of basic DTD-related events
procedure DoNotationDecl(const Name, PublicID, SystemID: SAXString); dynamic;
procedure DoUnparsedEntityDecl(const Name, PublicID,
SystemID, NotationName: SAXString); dynamic;
// Resolving entities
function DoResolveEntity(const PublicID,
SystemID: SAXString): TSAXInputSource; dynamic;
// SAX error handlers
procedure DoError(AException: ESAXParseException); dynamic;
procedure DoFatalError(AException: ESAXParseException); dynamic;
procedure DoWarning(AException: ESAXParseException); dynamic;
public
procedure Parse(AInput: TSAXInputSource); dynamic; abstract; overload;
procedure Parse(const SystemID: SAXString); dynamic; overload;
procedure ParseStream(AStream: TStream);
// Current location
property CurColumnNumber: Integer read FCurColumnNumber;
property CurLineNumber: Integer read FCurLineNumber;
property CurPublicID: SAXString read FCurPublicID;
property CurSystemID: SAXString read FCurSystemID;
property Features[const Name: String]: Boolean read GetFeature write SetFeature;
property Properties[const Name: String]: TObject read GetProperty write SetProperty;
// Content handler callbacks
property OnCharacters: TCharactersEvent read FOnCharacters write FOnCharacters;
property OnComment: TCommentEvent read FOnComment write FOnComment;
property OnEndDocument: TNotifyEvent read FOnEndDocument write FOnEndDocument;
property OnEndElement: TEndElementEvent read FOnEndElement write FOnEndElement;
property OnEndPrefixMapping: TEndPrefixMappingEvent read FOnEndPrefixMapping write FOnEndPrefixMapping;
property OnIgnorableWhitespace: TIgnorableWhitespaceEvent read FOnIgnorableWhitespace write FOnIgnorableWhitespace;
property OnProcessingInstruction: TProcessingInstructionEvent read FOnProcessingInstruction write FOnProcessingInstruction;
property OnSkippedEntity: TSkippedEntityEvent read FOnSkippedEntity write FOnSkippedEntity;
property OnStartDocument: TNotifyEvent read FOnStartDocument write FOnStartDocument;
property OnStartElement: TStartElementEvent read FOnStartElement write FOnStartElement;
property OnStartPrefixMapping: TStartPrefixMappingEvent read FOnStartPrefixMapping write FOnStartPrefixMapping;
// DTD handler callbacks
property OnNotationDecl: TNotationDeclEvent read FOnNotationDecl write FOnNotationDecl;
property OnUnparsedEntityDecl: TUnparsedEntityDeclEvent read FOnUnparsedEntityDecl write FOnUnparsedEntityDecl;
// Entity resolver callbacks
property OnResolveEntity: TResolveEntityEvent read FOnResolveEntity write FOnResolveEntity;
// Error handler callbacks
property OnError: TErrorEvent read FOnError write FOnError;
property OnFatalError: TFatalErrorEvent read FOnFatalError write FOnFatalError;
property OnWarning: TWarningEvent read FOnWarning write FOnWarning;
end;
{ TSAXFilter: XML filter }
TSAXFilter = class(TSAXReader)
private
FParent: TSAXReader;
protected
procedure DoCharacters(const ch: PSAXChar; AStart, ALength: Integer); override;
procedure DoEndDocument; override;
procedure DoEndElement(const NamespaceURI, LocalName, QName: SAXString); override;
procedure DoEndPrefixMapping(const Prefix: SAXString); override;
procedure DoIgnorableWhitespace(const ch: PSAXChar; AStart, ALength: Integer); override;
procedure DoProcessingInstruction(const Target, Data: SAXString); override;
procedure DoSkippedEntity(const Name: SAXString); override;
procedure DoStartDocument; override;
procedure DoStartElement(const NamespaceURI, LocalName, QName: SAXString; Atts: TSAXAttributes); override;
procedure DoStartPrefixMapping(const Prefix, URI: SAXString); override;
procedure DoNotationDecl(const Name, PublicID, SystemID: SAXString); override;
procedure DoUnparsedEntityDecl(const Name, PublicID, SystemID, NotationName: SAXString); override;
function DoResolveEntity(const PublicID, SystemID: SAXString): TSAXInputSource; override;
procedure DoError(AException: ESAXParseException); override;
procedure DoFatalError(AException: ESAXParseException); override;
procedure DoWarning(AException: ESAXParseException); override;
public
property Parent: TSAXReader read FParent write FParent;
end;
// ===================================================================
// ===================================================================
implementation
constructor ESAXAttributeIndexError.Create(Index: Integer);
begin
inherited CreateFmt(SSAXAttributeIndexError, [Index]);
end;
{ TSAXInputSource }
constructor TSAXInputSource.Create;
begin
inherited Create;
end;
constructor TSAXInputSource.Create(AStream: TStream);
begin
inherited Create;
FStream := AStream;
end;
constructor TSAXInputSource.Create(const ASystemID: SAXString);
begin
inherited Create;
FSystemID := ASystemID;
end;
{ TSAXAttributes }
constructor TSAXAttributes.Create;
begin
inherited Create;
{$IFNDEF UseDynArrays}
FData := TList.Create;
{$ENDIF}
end;
constructor TSAXAttributes.Create(Atts: TSAXAttributes);
begin
inherited Create;
{$IFNDEF UseDynArrays}
FData := TList.Create;
{$ENDIF}
SetAttributes(Atts);
end;
{$IFNDEF UseDynArrays}
destructor TSAXAttributes.Destroy;
begin
Clear;
FData.Free;
inherited Destroy;
end;
{$ENDIF}
function TSAXAttributes.GetIndex(const QName: SAXString): Integer;
begin
Result := 0;
while Result < FLength do
begin
if Data[Result]^.QName = QName then
exit;
Inc(Result);
end;
Result := -1;
end;
function TSAXAttributes.GetIndex(const URI, LocalPart: SAXString): Integer;
begin
Result := 0;
while Result < FLength do
begin
if (Data[Result]^.URI = URI) and (Data[Result]^.LocalName = LocalPart) then
exit;
Inc(Result);
end;
Result := -1;
end;
function TSAXAttributes.GetLength: Integer;
begin
Result := FLength;
end;
function TSAXAttributes.GetLocalName(Index: Integer): SAXString;
begin
if (Index >= 0) and (Index < FLength) then
Result := Data[Index]^.LocalName
else
SetLength(Result, 0);
end;
function TSAXAttributes.GetQName(Index: Integer): SAXString;
begin
if (Index >= 0) and (Index < FLength) then
Result := Data[Index]^.QName
else
SetLength(Result, 0);
end;
function TSAXAttributes.GetType(Index: Integer): String;
begin
if (Index >= 0) and (Index < FLength) then
Result := Data[Index]^.AttrType
else
SetLength(Result, 0);
end;
function TSAXAttributes.GetType(const QName: SAXString): String;
var
i: Integer;
begin
for i := 0 to FLength - 1 do
if Data[i]^.QName = QName then
begin
Result := Data[i]^.AttrType;
exit;
end;
SetLength(Result, 0);
end;
function TSAXAttributes.GetType(const URI, LocalName: SAXString): String;
var
i: Integer;
begin
for i := 0 to FLength - 1 do
if (Data[i]^.URI = URI) and (Data[i]^.LocalName = LocalName) then
begin
Result := Data[i]^.AttrType;
exit;
end;
SetLength(Result, 0);
end;
function TSAXAttributes.GetURI(Index: Integer): SAXString;
begin
if (Index >= 0) and (Index < FLength) then
Result := Data[Index * 5]^.URI
else
SetLength(Result, 0);
end;
function TSAXAttributes.GetValue(Index: Integer): SAXString;
begin
if (Index >= 0) and (Index < FLength) then
Result := Data[Index]^.Value
else
SetLength(Result, 0);
end;
function TSAXAttributes.GetValue(const QName: SAXString): SAXString;
var
i: Integer;
begin
for i := 0 to FLength - 1 do
if Data[i]^.QName = QName then
begin
Result := Data[i]^.Value;
exit;
end;
SetLength(Result, 0);
end;
function TSAXAttributes.GetValue(const URI, LocalName: SAXString): SAXString;
var
i: Integer;
begin
for i := 0 to FLength - 1 do
if (Data[i]^.URI = URI) and (Data[i]^.LocalName = LocalName) then
begin
Result := Data[i]^.Value;
exit;
end;
SetLength(Result, 0);
end;
procedure TSAXAttributes.Clear;
{$IFDEF UseDynArrays}
begin
SetLength(Data, 0);
end;
{$ELSE}
var
i: Integer;
p: PSAXAttributeData;
begin
for i := 0 to FData.Count - 1 do
begin
p := PSAXAttributeData(FData[i]);
Dispose(p);
end;
end;
{$ENDIF}
procedure TSAXAttributes.SetAttributes(Atts: TSAXAttributes);
var
i: Integer;
begin
FLength := Atts.Length;
{$IFDEF UseDynArrays}
SetLength(Data, FLength);
{$ELSE}
FData.Count := FLength;
{$ENDIF}
for i := 0 to FLength - 1 do
{$IFDEF UseDynArrays}
with Data[i] do
{$ELSE}
with Data[i]^ do
{$ENDIF}
begin
URI := Atts.URIs[i];
LocalName := Atts.LocalNames[i];
QName := Atts.QNames[i];
AttrType := Atts.Types[i];
Value := Atts.Values[i];
end;
end;
procedure TSAXAttributes.AddAttribute(const AURI, ALocalName, AQName: SAXString;
const AType: String; const AValue: SAXString);
{$IFNDEF UseDynArrays}
var
p: PSAXAttributeData;
{$ENDIF}
begin
Inc(FLength);
{$IFDEF UseDynArrays}
SetLength(Data, FLength);
{$ELSE}
New(p);
FData.Add(p);
{$ENDIF}
{$IFDEF UseDynArrays}
with Data[FLength - 1] do
{$ELSE}
with Data[FLength - 1]^ do
{$ENDIF}
begin
URI := AURI;
LocalName := ALocalName;
QName := AQName;
AttrType := AType;
Value := AValue;
end;
end;
procedure TSAXAttributes.SetAttribute(Index: Integer;
const AURI, ALocalName, AQName: SAXString; const AType: String;
const AValue: SAXString);
begin
if (Index >= 0) and (Index < FLength) then
{$IFDEF UseDynArrays}
with Data[Index] do
{$ELSE}
with Data[Index]^ do
{$ENDIF}
begin
URI := AURI;
LocalName := ALocalName;
QName := AQName;
AttrType := AType;
Value := AValue;
end
else
BadIndex(Index);
end;
procedure TSAXAttributes.RemoveAttribute(Index: Integer);
{$IFDEF UseDynArrays}
var
i: Integer;
{$ENDIF}
begin
if (Index >= 0) and (Index < FLength) then
begin
{$IFDEF UseDynArrays}
for i := Index to FLength - 1 do
Data[i] := Data[i + 1];
Dec(FLength);
SetLength(Data, FLength);
{$ELSE}
FData.Delete(Index);
Dec(FLength);
{$ENDIF}
end else
BadIndex(Index);
end;
procedure TSAXAttributes.SetURI(Index: Integer; const AURI: SAXString);
begin
if (Index >= 0) and (Index < FLength) then
Data[Index]^.URI := AURI
else
BadIndex(Index);
end;
procedure TSAXAttributes.SetLocalName(Index: Integer;
const ALocalName: SAXString);
begin
if (Index >= 0) and (Index < FLength) then
Data[Index]^.LocalName := ALocalName
else
BadIndex(Index);
end;
procedure TSAXAttributes.SetQName(Index: Integer; const AQName: SAXString);
begin
if (Index >= 0) and (Index < FLength) then
Data[Index]^.QName := AQName
else
BadIndex(Index);
end;
procedure TSAXAttributes.SetType(Index: Integer; const AType: String);
begin
if (Index >= 0) and (Index < FLength) then
Data[Index]^.AttrType := AType
else
BadIndex(Index);
end;
procedure TSAXAttributes.SetValue(Index: Integer; const AValue: SAXString);
begin
if (Index >= 0) and (Index < FLength) then
Data[Index]^.Value := AValue
else
BadIndex(Index);
end;
{$IFNDEF UseDynArrays}
function TSAXAttributes.GetData(Index: Integer): PSAXAttributeData;
begin
Result := PSAXAttributeData(FData[Index]);
end;
{$ENDIF}
procedure TSAXAttributes.BadIndex(Index: Integer);
begin
raise ESAXAttributeIndexError.Create(Index) at get_caller_addr(get_frame);
end;
{ TSAXReader }
procedure TSAXReader.Parse(const SystemID: SAXString);
var
Input: TSAXInputSource;
begin
Input := TSAXInputSource.Create(SystemID);
try
Input.Stream := TFileStream.Create(SystemID, fmOpenRead);
try
Parse(Input);
finally
Input.Stream.Free;
end;
finally
Input.Free;
end;
end;
procedure TSAXReader.ParseStream(AStream: TStream);
var
Input: TSAXInputSource;
begin
Input := TSAXInputSource.Create(AStream);
try
Parse(Input);
finally
Input.Free;
end;
end;
function TSAXReader.DoResolveEntity(const PublicID,
SystemID: SAXString): TSAXInputSource;
begin
if Assigned(OnResolveEntity) then
Result := OnResolveEntity(Self, PublicID, SystemID)
else
Result := nil;
end;
procedure TSAXReader.DoNotationDecl(const Name, PublicID, SystemID: SAXString);
begin
if Assigned(OnNotationDecl) then
OnNotationDecl(Self, Name, PublicID, SystemID);
end;
procedure TSAXReader.DoUnparsedEntityDecl(const Name, PublicID,
SystemID, NotationName: SAXString);
begin
if Assigned(OnUnparsedEntityDecl) then
OnUnparsedEntityDecl(Self, Name, PublicID, SystemID, NotationName);
end;
procedure TSAXReader.DoCharacters(const ch: PSAXChar;
AStart, ALength: Integer);
begin
if Assigned(OnCharacters) then
OnCharacters(Self, ch, AStart, ALength);
end;
procedure TSAXReader.DoComment(const ch: PSAXChar;
AStart, ALength: Integer);
begin
if Assigned(OnComment) then
OnComment(Self, ch, AStart, ALength);
end;
procedure TSAXReader.DoEndDocument;
begin
if Assigned(OnEndDocument) then
OnEndDocument(Self);
end;
procedure TSAXReader.DoEndElement(const NamespaceURI,
LocalName, QName: SAXString);
begin
if Assigned(OnEndElement) then
OnEndElement(Self, NamespaceURI, LocalName, QName);
end;
procedure TSAXReader.DoEndPrefixMapping(const Prefix: SAXString);
begin
if Assigned(OnEndPrefixMapping) then
OnEndPrefixMapping(Self, Prefix);
end;
procedure TSAXReader.DoIgnorableWhitespace(const ch: PSAXChar;
AStart, ALength: Integer);
begin
if Assigned(OnIgnorableWhitespace) then
OnIgnorableWhitespace(Self, ch, AStart, ALength);
end;
procedure TSAXReader.DoProcessingInstruction(const Target,
Data: SAXString);
begin
if Assigned(OnProcessingInstruction) then
OnProcessingInstruction(Self, Target, Data);
end;
procedure TSAXReader.DoSkippedEntity(const Name: SAXString);
begin
if Assigned(OnSkippedEntity) then
OnSkippedEntity(Self, Name);
end;
procedure TSAXReader.DoStartDocument;
begin
if Assigned(OnStartDocument) then
OnStartDocument(Self);
end;
procedure TSAXReader.DoStartElement(const NamespaceURI,
LocalName, QName: SAXString; Atts: TSAXAttributes);
begin
if Assigned(OnStartElement) then
OnStartElement(Self, NamespaceURI, LocalName, QName, Atts);
end;
procedure TSAXReader.DoStartPrefixMapping(const Prefix, URI: SAXString);
begin
if Assigned(OnStartPrefixMapping) then
OnStartPrefixMapping(Self, Prefix, URI);
end;
procedure TSAXReader.DoError(AException: ESAXParseException);
begin
if Assigned(OnError) then
OnError(Self, AException);
AException.Free;
end;
procedure TSAXReader.DoFatalError(AException: ESAXParseException);
begin
if Assigned(OnFatalError) then
OnFatalError(Self, AException)
else
raise AException;
AException.Free;
end;
procedure TSAXReader.DoWarning(AException: ESAXParseException);
begin
if Assigned(OnWarning) then
OnWarning(Self, AException);
AException.Free;
end;
{ TSAXFilter }
function TSAXFilter.DoResolveEntity(const PublicID,
SystemID: SAXString): TSAXInputSource;
begin
if Assigned(OnResolveEntity) then
Result := OnResolveEntity(Self, PublicID, SystemID)
else if Assigned(Parent) then
Result := Parent.DoResolveEntity(PublicID, SystemID)
else
Result := nil;
end;
procedure TSAXFilter.DoNotationDecl(const Name, PublicID, SystemID: SAXString);
begin
if Assigned(OnNotationDecl) then
OnNotationDecl(Self, Name, PublicID, SystemID)
else if Assigned(Parent) then
Parent.DoNotationDecl(Name, PublicID, SystemID);
end;
procedure TSAXFilter.DoUnparsedEntityDecl(const Name, PublicID,
SystemID, NotationName: SAXString);
begin
if Assigned(OnUnparsedEntityDecl) then
OnUnparsedEntityDecl(Self, Name, PublicID, SystemID, NotationName)
else if Assigned(Parent) then
Parent.DoUnparsedEntityDecl(Name, PublicID, SystemID, NotationName);
end;
procedure TSAXFilter.DoCharacters(const ch: PSAXChar;
AStart, ALength: Integer);
begin
if Assigned(OnCharacters) then
OnCharacters(Self, ch, AStart, ALength)
else if Assigned(Parent) then
Parent.DoCharacters(ch, AStart, ALength);
end;
procedure TSAXFilter.DoEndDocument;
begin
if Assigned(OnEndDocument) then
OnEndDocument(Self)
else if Assigned(Parent) then
Parent.DoEndDocument;
end;
procedure TSAXFilter.DoEndElement(const NamespaceURI,
LocalName, QName: SAXString);
begin
if Assigned(OnEndElement) then
OnEndElement(Self, NamespaceURI, LocalName, QName)
else if Assigned(Parent) then
Parent.DoEndElement(NamespaceURI, LocalName, QName);
end;
procedure TSAXFilter.DoEndPrefixMapping(const Prefix: SAXString);
begin
if Assigned(OnEndPrefixMapping) then
OnEndPrefixMapping(Self, Prefix)
else if Assigned(Parent) then
Parent.DoEndPrefixMapping(Prefix);
end;
procedure TSAXFilter.DoIgnorableWhitespace(const ch: PSAXChar;
AStart, ALength: Integer);
begin
if Assigned(OnIgnorableWhitespace) then
OnIgnorableWhitespace(Self, ch, AStart, ALength)
else if Assigned(Parent) then
Parent.DoIgnorableWhitespace(ch, AStart, ALength);
end;
procedure TSAXFilter.DoProcessingInstruction(const Target,
Data: SAXString);
begin
if Assigned(OnProcessingInstruction) then
OnProcessingInstruction(Self, Target, Data)
else if Assigned(Parent) then
Parent.DoProcessingInstruction(Target, Data);
end;
procedure TSAXFilter.DoSkippedEntity(const Name: SAXString);
begin
if Assigned(OnSkippedEntity) then
OnSkippedEntity(Self, Name)
else if Assigned(Parent) then
Parent.DoSkippedEntity(Name);
end;
procedure TSAXFilter.DoStartDocument;
begin
if Assigned(OnStartDocument) then
OnStartDocument(Self)
else if Assigned(Parent) then
Parent.DoStartDocument;
end;
procedure TSAXFilter.DoStartElement(const NamespaceURI,
LocalName, QName: SAXString; Atts: TSAXAttributes);
begin
if Assigned(OnStartElement) then
OnStartElement(Self, NamespaceURI, LocalName, QName, Atts)
else if Assigned(Parent) then
Parent.DoStartElement(NamespaceURI, LocalName, QName, Atts);
end;
procedure TSAXFilter.DoStartPrefixMapping(const Prefix, URI: SAXString);
begin
if Assigned(OnStartPrefixMapping) then
OnStartPrefixMapping(Self, Prefix, URI)
else if Assigned(Parent) then
Parent.DoStartPrefixMapping(Prefix, URI);
end;
procedure TSAXFilter.DoError(AException: ESAXParseException);
begin
if Assigned(OnError) then
OnError(Self, AException)
else if Assigned(Parent) then
Parent.DoError(AException);
AException.Free;
end;
procedure TSAXFilter.DoFatalError(AException: ESAXParseException);
begin
if Assigned(OnFatalError) then
OnFatalError(Self, AException)
else if Assigned(Parent) then
Parent.DoFatalError(AException)
else
raise AException;
AException.Free;
end;
procedure TSAXFilter.DoWarning(AException: ESAXParseException);
begin
if Assigned(OnWarning) then
OnWarning(Self, AException)
else if Assigned(Parent) then
Parent.DoWarning(AException);
AException.Free;
end;
end.
{
$Log$
Revision 1.1 2002-12-11 21:06:07 sg
* Small cleanups
* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser
}

555
fcl/xml/sax_html.pp Normal file
View File

@ -0,0 +1,555 @@
{
$Id$
This file is part of the Free Component Library
HTML parser with SAX-like interface
Copyright (c) 2000-2002 by
Areca Systems GmbH / Sebastian Guenther, sg@freepascal.org
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{
Known problems:
* The whitespace handling does only work for processing the DOM tree.
Storing the DOM tree to a XML file will result in a quite ugly file.
(This probably has got much better with recent versions, which do
decent whitespace converting, but it's not tested really good.)
* Entity references in attribute values don't get parsed.
}
unit SAX_HTML;
interface
uses SysUtils, Classes, SAX, DOM;
type
{ THTMLReader: The HTML reader class }
THTMLScannerContext = (
scUnknown,
scWhitespace, // within whitespace
scText, // within text
scEntityReference, // within entity reference ("&...;")
scTag); // within a start tag or end tag
THTMLReader = class(TSAXReader)
private
FStarted: Boolean;
FEndOfStream: Boolean;
FScannerContext: THTMLScannerContext;
FTokenText: String;
FCurStringValueDelimiter: Char;
FAttrNameRead: Boolean;
protected
procedure EnterNewScannerContext(NewContext: THTMLScannerContext);
public
constructor Create;
destructor Destroy; override;
procedure Parse(AInput: TSAXInputSource); override; overload;
property EndOfStream: Boolean read FEndOfStream;
property ScannerContext: THTMLScannerContext read FScannerContext;
property TokenText: String read FTokenText;
end;
{ THTMLToDOMConverter }
THTMLNodeType = (ntWhitespace, ntText, ntEntityReference, ntTag);
THTMLNodeInfo = class
NodeType: THTMLNodeType;
DOMNode: TDOMNode;
end;
THTMLToDOMConverter = class
private
FReader: THTMLReader;
FDocument: TDOMDocument;
FElementStack: TList;
FNodeBuffer: TList;
procedure ReaderCharacters(Sender: TObject; const ch: PSAXChar;
Start, Count: Integer);
procedure ReaderIgnorableWhitespace(Sender: TObject; const ch: PSAXChar;
Start, Count: Integer);
procedure ReaderSkippedEntity(Sender: TObject; const Name: SAXString);
procedure ReaderStartElement(Sender: TObject;
const NamespaceURI, LocalName, RawName: SAXString; Attr: TSAXAttributes);
procedure ReaderEndElement(Sender: TObject;
const NamespaceURI, LocalName, RawName: SAXString);
public
constructor Create(AReader: THTMLReader; ADocument: TDOMDocument);
destructor Destroy; override;
end;
implementation
uses HTMLDefs;
const
WhitespaceChars = [#9, #10, #13, ' '];
constructor THTMLReader.Create;
begin
inherited Create;
FScannerContext := scUnknown;
end;
destructor THTMLReader.Destroy;
begin
if FStarted then
DoEndDocument;
inherited Destroy;
end;
procedure THTMLReader.Parse(AInput: TSAXInputSource);
const
MaxBufferSize = 1024;
var
Buffer: array[0..MaxBufferSize - 1] of Char;
BufferSize, BufferPos: Integer;
begin
if not FStarted then
begin
FStarted := True;
DoStartDocument;
end;
FEndOfStream := False;
while True do
begin
// Read data into the input buffer
BufferSize := AInput.Stream.Read(Buffer, MaxBufferSize);
if BufferSize = 0 then
begin
FEndOfStream := True;
break;
end;
BufferPos := 0;
while BufferPos < BufferSize do
case ScannerContext of
scUnknown:
case Buffer[BufferPos] of
#9, #10, #13, ' ':
EnterNewScannerContext(scWhitespace);
'&':
begin
Inc(BufferPos);
EnterNewScannerContext(scEntityReference);
end;
'<':
begin
Inc(BufferPos);
EnterNewScannerContext(scTag);
end;
else
EnterNewScannerContext(scText);
end;
scWhitespace:
case Buffer[BufferPos] of
#9, #10, #13, ' ':
begin
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
'&':
begin
Inc(BufferPos);
EnterNewScannerContext(scEntityReference);
end;
'<':
begin
Inc(BufferPos);
EnterNewScannerContext(scTag);
end;
else
EnterNewScannerContext(scText);
end;
scText:
case Buffer[BufferPos] of
#9, #10, #13, ' ':
EnterNewScannerContext(scWhitespace);
'&':
begin
Inc(BufferPos);
EnterNewScannerContext(scEntityReference);
end;
'<':
begin
Inc(BufferPos);
EnterNewScannerContext(scTag);
end;
else
begin
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
end;
scEntityReference:
if Buffer[BufferPos] = ';' then
begin
Inc(BufferPos);
EnterNewScannerContext(scUnknown);
end else if not (Buffer[BufferPos] in
['a'..'z', 'A'..'Z', '0'..'9', '#']) then
EnterNewScannerContext(scUnknown)
else
begin
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
scTag:
case Buffer[BufferPos] of
'''', '"':
begin
if FAttrNameRead then
begin
if FCurStringValueDelimiter = #0 then
FCurStringValueDelimiter := Buffer[BufferPos]
else if FCurStringValueDelimiter = Buffer[BufferPos] then
begin
FCurStringValueDelimiter := #0;
FAttrNameRead := False;
end;
end;
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
'=':
begin
FAttrNameRead := True;
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
'>':
begin
Inc(BufferPos);
if FCurStringValueDelimiter = #0 then
EnterNewScannerContext(scUnknown);
end;
else
begin
FTokenText := FTokenText + Buffer[BufferPos];
Inc(BufferPos);
end;
end;
end;
end;
end;
procedure THTMLReader.EnterNewScannerContext(NewContext: THTMLScannerContext);
function SplitTagString(const s: String; var Attr: TSAXAttributes): String;
var
i, j: Integer;
AttrName: String;
ValueDelimiter: Char;
DoIncJ: Boolean;
begin
Attr := nil;
i := Pos(' ', s);
if i <= 0 then
Result := LowerCase(s)
else
begin
Result := LowerCase(Copy(s, 1, i - 1));
Attr := TSAXAttributes.Create;
Inc(i);
while (i <= Length(s)) and (s[i] in WhitespaceChars) do
Inc(i);
SetLength(AttrName, 0);
j := i;
while j <= Length(s) do
if s[j] = '=' then
begin
AttrName := LowerCase(Copy(s, i, j - i));
Inc(j);
if (j < Length(s)) and ((s[j] = '''') or (s[j] = '"')) then
begin
ValueDelimiter := s[j];
Inc(j);
end else
ValueDelimiter := #0;
i := j;
DoIncJ := False;
while j <= Length(s) do
if ValueDelimiter = #0 then
if s[j] in WhitespaceChars then
break
else
Inc(j)
else if s[j] = ValueDelimiter then
begin
DoIncJ := True;
break
end else
Inc(j);
Attr.AddAttribute('', AttrName, '', '', Copy(s, i, j - i));
if DoIncJ then
Inc(j);
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
Inc(j);
i := j;
end
else if s[j] in WhitespaceChars then
begin
Attr.AddAttribute('', Copy(s, i, j - i), '', '', '');
Inc(j);
while (j <= Length(s)) and (s[j] in WhitespaceChars) do
Inc(j);
i := j;
end else
Inc(j);
end;
end;
var
Attr: TSAXAttributes;
EntString, TagName: String;
Found: Boolean;
Ent: Char;
i: Integer;
begin
case ScannerContext of
scWhitespace:
DoIgnorableWhitespace(PChar(TokenText), 1, Length(TokenText));
scText:
DoCharacters(PChar(TokenText), 0, Length(TokenText));
scEntityReference:
begin
if ResolveHTMLEntityReference(TokenText, Ent) then
begin
EntString := Ent;
DoCharacters(PChar(EntString), 0, 1);
end else
begin
{ Is this a predefined Unicode character entity? We must check this,
as undefined entities must be handled as text, for compatiblity
to popular browsers... }
Found := False;
for i := Low(UnicodeHTMLEntities) to High(UnicodeHTMLEntities) do
if UnicodeHTMLEntities[i] = TokenText then
begin
Found := True;
break;
end;
if Found then
DoSkippedEntity(TokenText)
else
DoCharacters(PChar('&' + TokenText), 0, Length(TokenText) + 1);
end;
end;
scTag:
if Length(TokenText) > 0 then
begin
Attr := nil;
if TokenText[1] = '/' then
begin
DoEndElement('',
SplitTagString(Copy(TokenText, 2, Length(TokenText)), Attr), '');
end else if TokenText[1] <> '!' then
begin
// Do NOT combine to a single line, as Attr is an output value!
TagName := SplitTagString(TokenText, Attr);
DoStartElement('', TagName, '', Attr);
end;
if Assigned(Attr) then
Attr.Free;
end;
end;
FScannerContext := NewContext;
SetLength(FTokenText, 0);
FCurStringValueDelimiter := #0;
FAttrNameRead := False;
end;
{ THTMLToDOMConverter }
constructor THTMLToDOMConverter.Create(AReader: THTMLReader; ADocument: TDOMDocument);
begin
inherited Create;
FReader := AReader;
FReader.OnCharacters := @ReaderCharacters;
FReader.OnIgnorableWhitespace := @ReaderIgnorableWhitespace;
FReader.OnSkippedEntity := @ReaderSkippedEntity;
FReader.OnStartElement := @ReaderStartElement;
FReader.OnEndElement := @ReaderEndElement;
FDocument := ADocument;
FElementStack := TList.Create;
FNodeBuffer := TList.Create;
end;
destructor THTMLToDOMConverter.Destroy;
var
i: Integer;
begin
// Theoretically, always exactly one item will remain - the root element:
for i := 0 to FNodeBuffer.Count - 1 do
THTMLNodeInfo(FNodeBuffer[i]).Free;
FNodeBuffer.Free;
FElementStack.Free;
inherited Destroy;
end;
procedure THTMLToDOMConverter.ReaderCharacters(Sender: TObject;
const ch: PSAXChar; Start, Count: Integer);
var
s: String;
NodeInfo: THTMLNodeInfo;
begin
SetLength(s, Count);
Move(ch^, s[1], Count * SizeOf(SAXChar));
NodeInfo := THTMLNodeInfo.Create;
NodeInfo.NodeType := ntText;
NodeInfo.DOMNode := FDocument.CreateTextNode(s);
FNodeBuffer.Add(NodeInfo);
end;
procedure THTMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
const ch: PSAXChar; Start, Count: Integer);
var
s: String;
NodeInfo: THTMLNodeInfo;
begin
SetLength(s, Count);
Move(ch^, s[1], Count * SizeOf(SAXChar));
NodeInfo := THTMLNodeInfo.Create;
NodeInfo.NodeType := ntWhitespace;
NodeInfo.DOMNode := FDocument.CreateTextNode(s);
FNodeBuffer.Add(NodeInfo);
end;
procedure THTMLToDOMConverter.ReaderSkippedEntity(Sender: TObject;
const Name: SAXString);
var
NodeInfo: THTMLNodeInfo;
begin
NodeInfo := THTMLNodeInfo.Create;
NodeInfo.NodeType := ntEntityReference;
NodeInfo.DOMNode := FDocument.CreateEntityReference(Name);
FNodeBuffer.Add(NodeInfo);
end;
procedure THTMLToDOMConverter.ReaderStartElement(Sender: TObject;
const NamespaceURI, LocalName, RawName: SAXString; Attr: TSAXAttributes);
var
NodeInfo: THTMLNodeInfo;
Element: TDOMElement;
i: Integer;
begin
Element := FDocument.CreateElement(LocalName);
if Assigned(Attr) then
begin
// WriteLn('Attribute: ', Attr.GetLength);
for i := 0 to Attr.GetLength - 1 do
begin
// WriteLn('#', i, ': LocalName = ', Attr.GetLocalName(i), ', Value = ', Attr.GetValue(i));
Element[Attr.GetLocalName(i)] := Attr.GetValue(i);
end;
end;
NodeInfo := THTMLNodeInfo.Create;
NodeInfo.NodeType := ntTag;
NodeInfo.DOMNode := Element;
if not Assigned(FDocument.DocumentElement) then
FDocument.AppendChild(NodeInfo.DOMNode);
FNodeBuffer.Add(NodeInfo);
end;
procedure THTMLToDOMConverter.ReaderEndElement(Sender: TObject;
const NamespaceURI, LocalName, RawName: SAXString);
var
NodeInfo, NodeInfo2: THTMLNodeInfo;
i, j: Integer;
TagInfo: PHTMLElementProps;
IsFirst: Boolean;
begin
// WriteLn('End: ', LocalName);
// Find the matching start tag
i := FNodeBuffer.Count - 1;
while i >= 0 do
begin
NodeInfo := THTMLNodeInfo(FNodeBuffer.Items[i]);
if (NodeInfo.NodeType = ntTag) and
(CompareText(NodeInfo.DOMNode.NodeName, LocalName) = 0) then
begin
// We found the matching start tag
TagInfo := nil;
for j := Low(HTMLElProps) to High(HTMLElProps) do
if CompareText(HTMLElProps[j].Name, LocalName) = 0 then
begin
TagInfo := @HTMLElProps[j];
break;
end;
Inc(i);
IsFirst := True;
while i < FNodeBuffer.Count do
begin
NodeInfo2 := THTMLNodeInfo(FNodeBuffer.Items[i]);
if (NodeInfo2.NodeType = ntWhitespace) and Assigned(TagInfo) and
(not (efPreserveWhitespace in TagInfo^.Flags)) then
// Handle whitespace, which doesn't need to get preserved...
if not (efPCDATAContent in TagInfo^.Flags) then
// No character data allowed within the current element
NodeInfo2.DOMNode.Free
else
begin
// Character data allowed, so normalize it
NodeInfo2.DOMNode.NodeValue := ' ';
NodeInfo.DOMNode.AppendChild(NodeInfo2.DOMNode)
end;
NodeInfo2.Free;
FNodeBuffer.Delete(i);
IsFirst := False;
end;
break;
end;
Dec(i);
end;
end;
end.
{
$Log$
Revision 1.1 2002-12-11 21:06:07 sg
* Small cleanups
* Replaced htmldoc unit with dom_html unit
* Added SAX parser framework and SAX HTML parser
}