mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-01 11:51:01 +02:00
* Patch from Sergei Gorelkin:
* excludes #$FFFE and #$FFFF from allowed XML 1.1 name chars, so IsXmlName result is correct when its argument comes not from the parser. xmlread.pp: + Two new parsing options, Namespaces and ResolveExternals (not functional yet but needed to proceed). * Fixed checking of WFC [28a], forces fatal error as soon as possible and prevents parsing of further (potentially malicious) data. Hopefully now it is truly compliant to the specs and not just satisfies the tests. * In entity value literals, nesting is checked by entity, not by the input source (consistent to other places). - Saving FCursor around attribute default value isn't necessary because FCursor is always nil while parsing the DTD. * TList's changed to more lightweight TFPList's. * Changed once more (probably the last time) recognizing the standalone percent sign in parameter entity declarations. Rationale is that FCurChar is no more out of sync with FSource.FBuf^, and therefore may be removed. tests/xmlts.pp and tests/README: + Added support for the latest XML test suite (by skipping tests targeted for the upcoming fifth edition of XML specs). + 'Namespaces' option is passed to the parser. * README updated with the latest testsuite URL. git-svn-id: trunk@11303 -
This commit is contained in:
parent
f3afd6d934
commit
d812fa0c92
@ -58,7 +58,7 @@ const
|
||||
|
||||
ns_3000 = [$41..$94, $A1..$FA] + [$07, $21..$29];
|
||||
|
||||
namingBitmap: array[0..$2F] of TSetOfByte = (
|
||||
namingBitmap: array[0..$30] of TSetOfByte = (
|
||||
|
||||
[], // 00 - nothing allowed
|
||||
[0..255], // 01 - all allowed
|
||||
@ -165,10 +165,11 @@ const
|
||||
[$70..$7D, $7F..$FF], // 2C $0300 - NameStart
|
||||
[1..$FF], // 2D $3000 - NameStart
|
||||
[0..$7D, $7F..$FF], // 2E $0300 - Names
|
||||
[$0C..$0D, $3F..$40, $70..$FF] // 2F $2000 - Names
|
||||
[$0C..$0D, $3F..$40, $70..$FF], // 2F $2000 - Names
|
||||
[$00..$FD] // 30 $FF00 - both Name and NameStart
|
||||
);
|
||||
|
||||
Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FF];
|
||||
Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FE];
|
||||
|
||||
NamePages: array[0..511] of Byte = (
|
||||
$02, $03, $04, $05, $06, $07, $08, $00,
|
||||
|
@ -64,12 +64,16 @@ type
|
||||
FExpandEntities: Boolean;
|
||||
FIgnoreComments: Boolean;
|
||||
FCDSectionsAsText: Boolean;
|
||||
FResolveExternals: Boolean;
|
||||
FNamespaces: Boolean;
|
||||
public
|
||||
property Validate: Boolean read FValidate write FValidate;
|
||||
property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
|
||||
property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
|
||||
property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
|
||||
property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
|
||||
property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
|
||||
property Namespaces: Boolean read FNamespaces write FNamespaces;
|
||||
end;
|
||||
|
||||
// NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
|
||||
@ -148,6 +152,7 @@ type
|
||||
FExternallyDeclared: Boolean;
|
||||
FResolved: Boolean;
|
||||
FOnStack: Boolean;
|
||||
FBetweenDecls: Boolean;
|
||||
FReplacementText: DOMString;
|
||||
FStartLocation: TLocation;
|
||||
end;
|
||||
@ -162,6 +167,7 @@ type
|
||||
FCursor: TObject; // weak reference
|
||||
FLocation: TLocation;
|
||||
LFPos: PWideChar;
|
||||
FXML11Rules: Boolean;
|
||||
FSystemID: WideString;
|
||||
FPublicID: WideString;
|
||||
FReloadHook: procedure of object;
|
||||
@ -188,7 +194,6 @@ type
|
||||
FBufStart: PWideChar;
|
||||
FDecoder: TDecoder;
|
||||
FSeenCR: Boolean;
|
||||
FXML11Rules: Boolean;
|
||||
FFixedUCS2: string;
|
||||
FBufSize: Integer;
|
||||
FSurrogate: WideChar;
|
||||
@ -243,7 +248,7 @@ type
|
||||
TContentParticle = class(TObject)
|
||||
private
|
||||
FParent: TContentParticle;
|
||||
FChildren: TList;
|
||||
FChildren: TFPList;
|
||||
FIndex: Integer;
|
||||
function GetChildCount: Integer;
|
||||
function GetChild(Index: Integer): TContentParticle;
|
||||
@ -294,6 +299,7 @@ type
|
||||
FState: TXMLReadState;
|
||||
FRecognizePE: Boolean;
|
||||
FHavePERefs: Boolean;
|
||||
FInsideDecl: Boolean;
|
||||
FDocNotValid: Boolean;
|
||||
FValue: TWideCharBuf;
|
||||
FName: TWideCharBuf;
|
||||
@ -302,8 +308,8 @@ type
|
||||
FNamePages: PByteArray;
|
||||
FDocType: TDOMDocumentTypeEx; // a shortcut
|
||||
FPEMap: TDOMNamedNodeMap;
|
||||
FIDRefs: TList;
|
||||
FNotationRefs: TList;
|
||||
FIDRefs: TFPList;
|
||||
FNotationRefs: TFPList;
|
||||
FCurrContentType: TElementContentType;
|
||||
FSaViolation: Boolean;
|
||||
FDTDStartPos: PWideChar;
|
||||
@ -314,6 +320,8 @@ type
|
||||
FExpandEntities: Boolean;
|
||||
FIgnoreComments: Boolean;
|
||||
FCDSectionsAsText: Boolean;
|
||||
FResolveExternals: Boolean;
|
||||
FNamespaces: Boolean;
|
||||
|
||||
procedure RaiseExpectedQmark;
|
||||
procedure GetChar;
|
||||
@ -328,8 +336,8 @@ type
|
||||
procedure ParseQuantity(CP: TContentParticle);
|
||||
procedure StoreLocation(out Loc: TLocation);
|
||||
function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean;
|
||||
procedure AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
|
||||
procedure ClearRefs(aList: TList);
|
||||
procedure AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
|
||||
procedure ClearRefs(aList: TFPList);
|
||||
procedure ValidateIdRefs;
|
||||
procedure StandaloneError(LineOffs: Integer = 0);
|
||||
procedure CallErrorHandler(E: EXMLReadError);
|
||||
@ -345,7 +353,7 @@ type
|
||||
procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
|
||||
procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
|
||||
procedure FatalError(Expected: WideChar); overload;
|
||||
function SkipWhitespace: Boolean;
|
||||
function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
|
||||
function SkipWhitespaceRaw: Boolean;
|
||||
procedure ExpectWhitespace;
|
||||
procedure ExpectString(const s: String);
|
||||
@ -1111,7 +1119,7 @@ begin
|
||||
E.Free;
|
||||
end;
|
||||
|
||||
function TXMLReader.SkipWhitespace: Boolean;
|
||||
function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
|
||||
begin
|
||||
Result := False;
|
||||
repeat
|
||||
@ -1129,19 +1137,24 @@ begin
|
||||
'%': begin
|
||||
if not FRecognizePE then
|
||||
Exit;
|
||||
GetChar;
|
||||
if not CheckName then
|
||||
// This is the only case where look-ahead is needed
|
||||
if FSource.FBuf > FSource.FBufEnd-2 then
|
||||
FSource.Reload;
|
||||
if (not PercentAloneIsOk) or
|
||||
(Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[hi(Word(FSource.FBuf[1]))]]) or
|
||||
(FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
|
||||
begin
|
||||
if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
|
||||
FatalError('Expected whitespace');
|
||||
FCurChar := '%';
|
||||
Exit;
|
||||
end;
|
||||
ExpectChar(';');
|
||||
StartPE;
|
||||
Result := True; // report whitespace on both ends of PE
|
||||
Continue;
|
||||
end;
|
||||
Inc(FSource.FBuf); // skip '%'
|
||||
FCurChar := FSource.FBuf^;
|
||||
if not CheckName then
|
||||
RaiseNameNotFound;
|
||||
ExpectChar(';');
|
||||
StartPE;
|
||||
Result := True; // report whitespace upon entering the PE
|
||||
Continue;
|
||||
end
|
||||
else Break;
|
||||
end
|
||||
else
|
||||
Exit;
|
||||
end;
|
||||
@ -1219,8 +1232,8 @@ begin
|
||||
inherited Create;
|
||||
BufAllocate(FName, 128);
|
||||
BufAllocate(FValue, 512);
|
||||
FIDRefs := TList.Create;
|
||||
FNotationRefs := TList.Create;
|
||||
FIDRefs := TFPList.Create;
|
||||
FNotationRefs := TFPList.Create;
|
||||
|
||||
// Set char rules to XML 1.0
|
||||
FNamePages := @NamePages;
|
||||
@ -1236,6 +1249,8 @@ begin
|
||||
FExpandEntities := FCtrl.Options.ExpandEntities;
|
||||
FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
|
||||
FIgnoreComments := FCtrl.Options.IgnoreComments;
|
||||
FResolveExternals := FCtrl.Options.ResolveExternals;
|
||||
FNamespaces := FCtrl.Options.Namespaces;
|
||||
end;
|
||||
|
||||
destructor TXMLReader.Destroy;
|
||||
@ -1257,8 +1272,7 @@ procedure TXMLReader.XML11_BuildTables;
|
||||
begin
|
||||
FNamePages := Xml11NamePages;
|
||||
FXML11 := True;
|
||||
{ switching to xml11 may occur only with DecodingSource }
|
||||
TXMLDecodingSource(FSource).FXml11Rules := True;
|
||||
FSource.FXml11Rules := True;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
|
||||
@ -1504,17 +1518,26 @@ end;
|
||||
function TXMLReader.ContextPop: Boolean;
|
||||
var
|
||||
Src: TXMLCharSource;
|
||||
Error: Boolean;
|
||||
begin
|
||||
Result := Assigned(FSource.FParent) and (FSource.DTDSubsetType = dsNone);
|
||||
if Result then
|
||||
begin
|
||||
Src := FSource.FParent;
|
||||
Error := False;
|
||||
if Assigned(FSource.FEntity) then
|
||||
begin
|
||||
TDOMEntityEx(FSource.FEntity).FOnStack := False;
|
||||
// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
|
||||
Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
|
||||
end;
|
||||
FCursor := TDOMNode(FSource.FCursor);
|
||||
FSource.Free;
|
||||
FSource := Src;
|
||||
FCurChar := FSource.FBuf^;
|
||||
// correct position of this error is after PE reference
|
||||
if Error then
|
||||
BadPENesting(esFatal);
|
||||
end;
|
||||
end;
|
||||
|
||||
@ -1600,6 +1623,7 @@ begin
|
||||
if PEnt.FOnStack then
|
||||
FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
|
||||
|
||||
PEnt.FBetweenDecls := not FInsideDecl;
|
||||
ContextPush(PEnt);
|
||||
FHavePERefs := True;
|
||||
end;
|
||||
@ -2124,7 +2148,6 @@ end;
|
||||
|
||||
procedure TXMLReader.ParseAttlistDecl; // [52]
|
||||
var
|
||||
SaveCurNode: TDOMNode;
|
||||
ValueRequired: Boolean;
|
||||
Token: WideString;
|
||||
ElDef: TDOMElementDef;
|
||||
@ -2233,18 +2256,15 @@ begin
|
||||
if AttDef.FDataType = dtId then
|
||||
ValidationError('An attribute of type ID cannot have a default value',[]);
|
||||
|
||||
SaveCurNode := FCursor;
|
||||
FCursor := AttDef;
|
||||
// TODO: move this to ExpectAttValue?
|
||||
StoreLocation(FTokenStart);
|
||||
Inc(FTokenStart.LinePos);
|
||||
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
|
||||
// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
|
||||
try
|
||||
ExpectAttValue;
|
||||
finally
|
||||
FCursor := SaveCurNode;
|
||||
end;
|
||||
// Saving/restoring FCursor is also redundant because it is always nil here.
|
||||
ExpectAttValue;
|
||||
FCursor := nil;
|
||||
if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
|
||||
ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
|
||||
end;
|
||||
@ -2265,11 +2285,11 @@ end;
|
||||
|
||||
function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean; // [9]
|
||||
var
|
||||
Src: TXMLCharSource;
|
||||
CurrentEntity: TObject;
|
||||
begin
|
||||
Src := FSource;
|
||||
CurrentEntity := FSource.FEntity;
|
||||
// "Included in literal": process until delimiter hit IN SAME context
|
||||
while not ((FSource = Src) and CheckForChar(Delim)) do
|
||||
while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do
|
||||
if CheckForChar('%') then
|
||||
begin
|
||||
if not CheckName then
|
||||
@ -2308,16 +2328,13 @@ var
|
||||
Entity: TDOMEntityEx;
|
||||
Map: TDOMNamedNodeMap;
|
||||
begin
|
||||
ExpectWhitespace;
|
||||
if not SkipWhitespace(True) then
|
||||
FatalError('Expected whitespace');
|
||||
NDataAllowed := True;
|
||||
Map := FDocType.Entities;
|
||||
if CheckForChar('%') then // [72]
|
||||
begin
|
||||
if FRecognizePE then
|
||||
SkipWhitespace // we know that there IS whitespace due to the check in
|
||||
// previous call to SkipWhitespace
|
||||
else
|
||||
ExpectWhitespace;
|
||||
ExpectWhitespace;
|
||||
NDataAllowed := False;
|
||||
if FPEMap = nil then
|
||||
FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
|
||||
@ -2338,10 +2355,7 @@ begin
|
||||
StoreLocation(Entity.FStartLocation);
|
||||
FValue.Length := 0;
|
||||
if not ParseEntityDeclValue(Delim) then
|
||||
begin
|
||||
FTokenStart := Entity.FStartLocation;
|
||||
FatalError('Literal has no closing quote', -1);
|
||||
end;
|
||||
DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
|
||||
SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length);
|
||||
end
|
||||
else
|
||||
@ -2453,6 +2467,7 @@ begin
|
||||
else
|
||||
begin
|
||||
FRecognizePE := FSource.DTDSubsetType <> dsInternal;
|
||||
FInsideDecl := True;
|
||||
Token := GetString(['A'..'Z']);
|
||||
if Token = 'ELEMENT' then
|
||||
ParseElementDecl
|
||||
@ -2467,17 +2482,11 @@ begin
|
||||
|
||||
SkipWhitespace;
|
||||
FRecognizePE := False;
|
||||
{
|
||||
MarkupDecl starting in PE and ending in root is a WFC [28a]
|
||||
MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14)
|
||||
}
|
||||
// TODO: what if statrs in PE1 and ends in PE2, and other cases?
|
||||
if CurrentEntity <> FSource.FEntity then
|
||||
if Assigned(FSource.FEntity) then { ends in PE }
|
||||
BadPENesting(esError)
|
||||
else
|
||||
BadPENesting(esFatal);
|
||||
|
||||
if CurrentEntity <> FSource.FEntity then
|
||||
BadPENesting;
|
||||
ExpectChar('>');
|
||||
FInsideDecl := False;
|
||||
end;
|
||||
end;
|
||||
until False;
|
||||
@ -2659,7 +2668,7 @@ begin
|
||||
PopVC;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer);
|
||||
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
|
||||
var
|
||||
w: PForwardRef;
|
||||
begin
|
||||
@ -2671,7 +2680,7 @@ begin
|
||||
aList.Add(w);
|
||||
end;
|
||||
|
||||
procedure TXMLReader.ClearRefs(aList: TList);
|
||||
procedure TXMLReader.ClearRefs(aList: TFPList);
|
||||
var
|
||||
I: Integer;
|
||||
begin
|
||||
@ -3088,7 +3097,7 @@ end;
|
||||
function TContentParticle.Add: TContentParticle;
|
||||
begin
|
||||
if FChildren = nil then
|
||||
FChildren := TList.Create;
|
||||
FChildren := TFPList.Create;
|
||||
Result := TContentParticle.Create;
|
||||
Result.FParent := Self;
|
||||
Result.FIndex := FChildren.Add(Result);
|
||||
|
@ -55,6 +55,7 @@ begin
|
||||
p^[$2f] := $29;
|
||||
p^[$30] := $2d;
|
||||
p^[$fd] := $28;
|
||||
p^[$ff] := $30;
|
||||
|
||||
Move(p^, p^[256], 256);
|
||||
p^[$100] := $19;
|
||||
|
@ -3,7 +3,7 @@ Test runner for w3.org XML compliance suite
|
||||
|
||||
The xmlts is intended to run the XML compliance suite from W3.org.
|
||||
The suite includes 2500+ tests. It may be downloaded from
|
||||
http://www.w3.org/XML/Test/xmlts20031210.zip (approx. 1.7 mBytes)
|
||||
http://www.w3.org/XML/Test/xmlts20080205.zip (approx. 1.7 mBytes)
|
||||
After compiling xmlts.pp, run it with the following command line:
|
||||
|
||||
xmlts <path-to-xmlconf.xml> <report-filename> [-t template.xml] [-v]
|
||||
@ -23,16 +23,7 @@ Report is produced in xhtml format, use your favourite browser to view it.
|
||||
|
||||
As of 10.03.2007, the xml package does not support namespaces yet, so you might wish
|
||||
to exclude namespace tests. To do this, edit xmlconf/xmlconf.xml file and comment out
|
||||
two lines at the bottom which reference 'eduni-ns10' and 'eduni-ns11' testsuites.
|
||||
|
||||
(The last lines should look like:
|
||||
|
||||
&eduni-xml11;
|
||||
<!-- &eduni-ns10; -->
|
||||
<!-- &eduni-ns11; -->
|
||||
|
||||
</TESTSUITE>
|
||||
)
|
||||
the lines that contain references &eduni-ns10; &eduni-ns11; and &eduni-nse;
|
||||
|
||||
|
||||
Testsuite errata
|
||||
|
@ -232,6 +232,7 @@ begin
|
||||
|
||||
if Child.NodeName = 'run-id' then
|
||||
begin
|
||||
newChild := nil;
|
||||
if Data = 'name' then
|
||||
newChild := FTemplate.createTextNode(parser)
|
||||
else if Data = 'description' then
|
||||
@ -358,6 +359,12 @@ begin
|
||||
FErrCol := -1;
|
||||
FTestID := Element['ID'];
|
||||
TestType := Element['TYPE'];
|
||||
if Pos(WideChar('5'), Element['EDITION']) > 0 then
|
||||
begin
|
||||
Inc(FSkipped);
|
||||
Exit;
|
||||
end;
|
||||
|
||||
root := GetBaseURI(Element, FRootUri);
|
||||
ResolveRelativeURI(root, UTF8Encode(Element['URI']), s);
|
||||
|
||||
@ -393,7 +400,7 @@ begin
|
||||
try
|
||||
try
|
||||
FParser.Options.Validate := FValidating;
|
||||
// FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
|
||||
FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no');
|
||||
FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler;
|
||||
FParser.ParseUri(s, TempDoc);
|
||||
except
|
||||
|
Loading…
Reference in New Issue
Block a user