* Patch from Sergei Gorelkin:

* excludes #$FFFE and #$FFFF from allowed XML 1.1 name chars, so
  IsXmlName result is correct when its argument comes not from the
  parser.
xmlread.pp:
+ Two new parsing options, Namespaces and ResolveExternals (not
  functional yet but needed to proceed).
* Fixed checking of WFC [28a], forces fatal error as soon as possible
  and prevents parsing of further (potentially malicious) data.
  Hopefully now it is truly compliant to the specs and not just
  satisfies the tests.
* In entity value literals, nesting is checked by entity, not by the
  input source (consistent to other places).
- Saving FCursor around attribute default value isn't necessary because
  FCursor is always nil while parsing the DTD.
* TList's changed to more lightweight TFPList's.
* Changed once more (probably the last time) recognizing the standalone
  percent sign in parameter entity declarations. Rationale is that
  FCurChar is no more out of sync with FSource.FBuf^, and therefore may
  be removed.

tests/xmlts.pp and tests/README:
+ Added support for the latest XML test suite (by skipping tests
  targeted for the upcoming fifth edition of XML specs).
+ 'Namespaces' option is passed to the parser.
* README updated with the latest testsuite URL.

git-svn-id: trunk@11303 -
This commit is contained in:
michael 2008-07-01 19:14:56 +00:00
parent f3afd6d934
commit d812fa0c92
5 changed files with 81 additions and 72 deletions

View File

@ -58,7 +58,7 @@ const
ns_3000 = [$41..$94, $A1..$FA] + [$07, $21..$29];
namingBitmap: array[0..$2F] of TSetOfByte = (
namingBitmap: array[0..$30] of TSetOfByte = (
[], // 00 - nothing allowed
[0..255], // 01 - all allowed
@ -165,10 +165,11 @@ const
[$70..$7D, $7F..$FF], // 2C $0300 - NameStart
[1..$FF], // 2D $3000 - NameStart
[0..$7D, $7F..$FF], // 2E $0300 - Names
[$0C..$0D, $3F..$40, $70..$FF] // 2F $2000 - Names
[$0C..$0D, $3F..$40, $70..$FF], // 2F $2000 - Names
[$00..$FD] // 30 $FF00 - both Name and NameStart
);
Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FF];
Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FE];
NamePages: array[0..511] of Byte = (
$02, $03, $04, $05, $06, $07, $08, $00,

View File

@ -64,12 +64,16 @@ type
FExpandEntities: Boolean;
FIgnoreComments: Boolean;
FCDSectionsAsText: Boolean;
FResolveExternals: Boolean;
FNamespaces: Boolean;
public
property Validate: Boolean read FValidate write FValidate;
property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
property Namespaces: Boolean read FNamespaces write FNamespaces;
end;
// NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
@ -148,6 +152,7 @@ type
FExternallyDeclared: Boolean;
FResolved: Boolean;
FOnStack: Boolean;
FBetweenDecls: Boolean;
FReplacementText: DOMString;
FStartLocation: TLocation;
end;
@ -162,6 +167,7 @@ type
FCursor: TObject; // weak reference
FLocation: TLocation;
LFPos: PWideChar;
FXML11Rules: Boolean;
FSystemID: WideString;
FPublicID: WideString;
FReloadHook: procedure of object;
@ -188,7 +194,6 @@ type
FBufStart: PWideChar;
FDecoder: TDecoder;
FSeenCR: Boolean;
FXML11Rules: Boolean;
FFixedUCS2: string;
FBufSize: Integer;
FSurrogate: WideChar;
@ -243,7 +248,7 @@ type
TContentParticle = class(TObject)
private
FParent: TContentParticle;
FChildren: TList;
FChildren: TFPList;
FIndex: Integer;
function GetChildCount: Integer;
function GetChild(Index: Integer): TContentParticle;
@ -294,6 +299,7 @@ type
FState: TXMLReadState;
FRecognizePE: Boolean;
FHavePERefs: Boolean;
FInsideDecl: Boolean;
FDocNotValid: Boolean;
FValue: TWideCharBuf;
FName: TWideCharBuf;
@ -302,8 +308,8 @@ type
FNamePages: PByteArray;
FDocType: TDOMDocumentTypeEx; // a shortcut
FPEMap: TDOMNamedNodeMap;
FIDRefs: TList;
FNotationRefs: TList;
FIDRefs: TFPList;
FNotationRefs: TFPList;
FCurrContentType: TElementContentType;
FSaViolation: Boolean;
FDTDStartPos: PWideChar;
@ -314,6 +320,8 @@ type
FExpandEntities: Boolean;
FIgnoreComments: Boolean;
FCDSectionsAsText: Boolean;
FResolveExternals: Boolean;
FNamespaces: Boolean;
procedure RaiseExpectedQmark;
procedure GetChar;
@ -328,8 +336,8 @@ type
procedure ParseQuantity(CP: TContentParticle);
procedure StoreLocation(out Loc: TLocation);
function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
procedure AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
procedure ClearRefs(aList: TList);
procedure AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
procedure ClearRefs(aList: TFPList);
procedure ValidateIdRefs;
procedure StandaloneError(LineOffs: Integer = 0);
procedure CallErrorHandler(E: EXMLReadError);
@ -345,7 +353,7 @@ type
procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
procedure FatalError(Expected: WideChar); overload;
function SkipWhitespace: Boolean;
function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
function SkipWhitespaceRaw: Boolean;
procedure ExpectWhitespace;
procedure ExpectString(const s: String);
@ -1111,7 +1119,7 @@ begin
E.Free;
end;
function TXMLReader.SkipWhitespace: Boolean;
function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
begin
Result := False;
repeat
@ -1129,19 +1137,24 @@ begin
'%': begin
if not FRecognizePE then
Exit;
GetChar;
if not CheckName then
// This is the only case where look-ahead is needed
if FSource.FBuf > FSource.FBufEnd-2 then
FSource.Reload;
if (not PercentAloneIsOk) or
(Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[hi(Word(FSource.FBuf[1]))]]) or
(FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
begin
if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
FatalError('Expected whitespace');
FCurChar := '%';
Exit;
end;
ExpectChar(';');
StartPE;
Result := True; // report whitespace on both ends of PE
Continue;
end;
Inc(FSource.FBuf); // skip '%'
FCurChar := FSource.FBuf^;
if not CheckName then
RaiseNameNotFound;
ExpectChar(';');
StartPE;
Result := True; // report whitespace upon entering the PE
Continue;
end
else Break;
end
else
Exit;
end;
@ -1219,8 +1232,8 @@ begin
inherited Create;
BufAllocate(FName, 128);
BufAllocate(FValue, 512);
FIDRefs := TList.Create;
FNotationRefs := TList.Create;
FIDRefs := TFPList.Create;
FNotationRefs := TFPList.Create;
// Set char rules to XML 1.0
FNamePages := @NamePages;
@ -1236,6 +1249,8 @@ begin
FExpandEntities := FCtrl.Options.ExpandEntities;
FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
FIgnoreComments := FCtrl.Options.IgnoreComments;
FResolveExternals := FCtrl.Options.ResolveExternals;
FNamespaces := FCtrl.Options.Namespaces;
end;
destructor TXMLReader.Destroy;
@ -1257,8 +1272,7 @@ procedure TXMLReader.XML11_BuildTables;
begin
FNamePages := Xml11NamePages;
FXML11 := True;
{ switching to xml11 may occur only with DecodingSource }
TXMLDecodingSource(FSource).FXml11Rules := True;
FSource.FXml11Rules := True;
end;
procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
@ -1504,17 +1518,26 @@ end;
function TXMLReader.ContextPop: Boolean;
var
Src: TXMLCharSource;
Error: Boolean;
begin
Result := Assigned(FSource.FParent) and (FSource.DTDSubsetType = dsNone);
if Result then
begin
Src := FSource.FParent;
Error := False;
if Assigned(FSource.FEntity) then
begin
TDOMEntityEx(FSource.FEntity).FOnStack := False;
// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
end;
FCursor := TDOMNode(FSource.FCursor);
FSource.Free;
FSource := Src;
FCurChar := FSource.FBuf^;
// correct position of this error is after PE reference
if Error then
BadPENesting(esFatal);
end;
end;
@ -1600,6 +1623,7 @@ begin
if PEnt.FOnStack then
FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
PEnt.FBetweenDecls := not FInsideDecl;
ContextPush(PEnt);
FHavePERefs := True;
end;
@ -2124,7 +2148,6 @@ end;
procedure TXMLReader.ParseAttlistDecl; // [52]
var
SaveCurNode: TDOMNode;
ValueRequired: Boolean;
Token: WideString;
ElDef: TDOMElementDef;
@ -2233,18 +2256,15 @@ begin
if AttDef.FDataType = dtId then
ValidationError('An attribute of type ID cannot have a default value',[]);
SaveCurNode := FCursor;
FCursor := AttDef;
// TODO: move this to ExpectAttValue?
StoreLocation(FTokenStart);
Inc(FTokenStart.LinePos);
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
try
ExpectAttValue;
finally
FCursor := SaveCurNode;
end;
// Saving/restoring FCursor is also redundant because it is always nil here.
ExpectAttValue;
FCursor := nil;
if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
end;
@ -2265,11 +2285,11 @@ end;
function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean; // [9]
var
Src: TXMLCharSource;
CurrentEntity: TObject;
begin
Src := FSource;
CurrentEntity := FSource.FEntity;
// "Included in literal": process until delimiter hit IN SAME context
while not ((FSource = Src) and CheckForChar(Delim)) do
while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do
if CheckForChar('%') then
begin
if not CheckName then
@ -2308,16 +2328,13 @@ var
Entity: TDOMEntityEx;
Map: TDOMNamedNodeMap;
begin
ExpectWhitespace;
if not SkipWhitespace(True) then
FatalError('Expected whitespace');
NDataAllowed := True;
Map := FDocType.Entities;
if CheckForChar('%') then // [72]
begin
if FRecognizePE then
SkipWhitespace // we know that there IS whitespace due to the check in
// previous call to SkipWhitespace
else
ExpectWhitespace;
ExpectWhitespace;
NDataAllowed := False;
if FPEMap = nil then
FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
@ -2338,10 +2355,7 @@ begin
StoreLocation(Entity.FStartLocation);
FValue.Length := 0;
if not ParseEntityDeclValue(Delim) then
begin
FTokenStart := Entity.FStartLocation;
FatalError('Literal has no closing quote', -1);
end;
DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
end
else
@ -2453,6 +2467,7 @@ begin
else
begin
FRecognizePE := FSource.DTDSubsetType <> dsInternal;
FInsideDecl := True;
Token := GetString(['A'..'Z']);
if Token = 'ELEMENT' then
ParseElementDecl
@ -2467,17 +2482,11 @@ begin
SkipWhitespace;
FRecognizePE := False;
{
MarkupDecl starting in PE and ending in root is a WFC [28a]
MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
}
// TODO: what if statrs in PE1 and ends in PE2, and other cases?
if CurrentEntity <> FSource.FEntity then
if Assigned(FSource.FEntity) then { ends in PE }
BadPENesting(esError)
else
BadPENesting(esFatal);
if CurrentEntity <> FSource.FEntity then
BadPENesting;
ExpectChar('>');
FInsideDecl := False;
end;
end;
until False;
@ -2659,7 +2668,7 @@ begin
PopVC;
end;
procedure TXMLReader.AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
var
w: PForwardRef;
begin
@ -2671,7 +2680,7 @@ begin
aList.Add(w);
end;
procedure TXMLReader.ClearRefs(aList: TList);
procedure TXMLReader.ClearRefs(aList: TFPList);
var
I: Integer;
begin
@ -3088,7 +3097,7 @@ end;
function TContentParticle.Add: TContentParticle;
begin
if FChildren = nil then
FChildren := TList.Create;
FChildren := TFPList.Create;
Result := TContentParticle.Create;
Result.FParent := Self;
Result.FIndex := FChildren.Add(Result);

View File

@ -55,6 +55,7 @@ begin
p^[$2f] := $29;
p^[$30] := $2d;
p^[$fd] := $28;
p^[$ff] := $30;
Move(p^, p^[256], 256);
p^[$100] := $19;

View File

@ -3,7 +3,7 @@ Test runner for w3.org XML compliance suite
The xmlts is intended to run the XML compliance suite from W3.org.
The suite includes 2500+ tests. It may be downloaded from
http://www.w3.org/XML/Test/xmlts20031210.zip (approx. 1.7 mBytes)
http://www.w3.org/XML/Test/xmlts20080205.zip (approx. 1.7 mBytes)
After compiling xmlts.pp, run it with the following command line:
xmlts <path-to-xmlconf.xml> <report-filename> [-t template.xml] [-v]
@ -23,16 +23,7 @@ Report is produced in xhtml format, use your favourite browser to view it.
As of 10.03.2007, the xml package does not support namespaces yet, so you might wish
to exclude namespace tests. To do this, edit xmlconf/xmlconf.xml file and comment out
two lines at the bottom which reference 'eduni-ns10' and 'eduni-ns11' testsuites.
(The last lines should look like:
&eduni-xml11;
<!-- &eduni-ns10; -->
<!-- &eduni-ns11; -->
</TESTSUITE>
)
the lines that contain references &eduni-ns10; &eduni-ns11; and &eduni-nse;
Testsuite errata

View File

@ -232,6 +232,7 @@ begin
if Child.NodeName = 'run-id' then
begin
newChild := nil;
if Data = 'name' then
newChild := FTemplate.createTextNode(parser)
else if Data = 'description' then
@ -358,6 +359,12 @@ begin
FErrCol := -1;
FTestID := Element['ID'];
TestType := Element['TYPE'];
if Pos(WideChar('5'), Element['EDITION']) > 0 then
begin
Inc(FSkipped);
Exit;
end;
root := GetBaseURI(Element, FRootUri);
ResolveRelativeURI(root, UTF8Encode(Element['URI']), s);
@ -393,7 +400,7 @@ begin
try
try
FParser.Options.Validate := FValidating;
// FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler;
FParser.ParseUri(s, TempDoc);
except