diff --git a/packages/fcl-xml/src/names.inc b/packages/fcl-xml/src/names.inc index 68852d39a7..e23b07992a 100644 --- a/packages/fcl-xml/src/names.inc +++ b/packages/fcl-xml/src/names.inc @@ -58,7 +58,7 @@ const ns_3000 = [$41..$94, $A1..$FA] + [$07, $21..$29]; - namingBitmap: array[0..$2F] of TSetOfByte = ( + namingBitmap: array[0..$30] of TSetOfByte = ( [], // 00 - nothing allowed [0..255], // 01 - all allowed @@ -165,10 +165,11 @@ const [$70..$7D, $7F..$FF], // 2C $0300 - NameStart [1..$FF], // 2D $3000 - NameStart [0..$7D, $7F..$FF], // 2E $0300 - Names - [$0C..$0D, $3F..$40, $70..$FF] // 2F $2000 - Names + [$0C..$0D, $3F..$40, $70..$FF], // 2F $2000 - Names + [$00..$FD] // 30 $FF00 - both Name and NameStart ); - Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FF]; + Xml11HighPages: TSetOfByte = [0..$21, $2C..$D7, $F9..$FE]; NamePages: array[0..511] of Byte = ( $02, $03, $04, $05, $06, $07, $08, $00, diff --git a/packages/fcl-xml/src/xmlread.pp b/packages/fcl-xml/src/xmlread.pp index ada1dc8b37..1b53bfeec4 100644 --- a/packages/fcl-xml/src/xmlread.pp +++ b/packages/fcl-xml/src/xmlread.pp @@ -64,12 +64,16 @@ type FExpandEntities: Boolean; FIgnoreComments: Boolean; FCDSectionsAsText: Boolean; + FResolveExternals: Boolean; + FNamespaces: Boolean; public property Validate: Boolean read FValidate write FValidate; property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace; property ExpandEntities: Boolean read FExpandEntities write FExpandEntities; property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments; property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText; + property ResolveExternals: Boolean read FResolveExternals write FResolveExternals; + property Namespaces: Boolean read FNamespaces write FNamespaces; end; // NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1 @@ -148,6 +152,7 @@ type FExternallyDeclared: Boolean; FResolved: Boolean; FOnStack: Boolean; + FBetweenDecls: Boolean; FReplacementText: DOMString; FStartLocation: TLocation; end; @@ -162,6 +167,7 @@ type FCursor: TObject; // weak reference FLocation: TLocation; LFPos: PWideChar; + FXML11Rules: Boolean; FSystemID: WideString; FPublicID: WideString; FReloadHook: procedure of object; @@ -188,7 +194,6 @@ type FBufStart: PWideChar; FDecoder: TDecoder; FSeenCR: Boolean; - FXML11Rules: Boolean; FFixedUCS2: string; FBufSize: Integer; FSurrogate: WideChar; @@ -243,7 +248,7 @@ type TContentParticle = class(TObject) private FParent: TContentParticle; - FChildren: TList; + FChildren: TFPList; FIndex: Integer; function GetChildCount: Integer; function GetChild(Index: Integer): TContentParticle; @@ -294,6 +299,7 @@ type FState: TXMLReadState; FRecognizePE: Boolean; FHavePERefs: Boolean; + FInsideDecl: Boolean; FDocNotValid: Boolean; FValue: TWideCharBuf; FName: TWideCharBuf; @@ -302,8 +308,8 @@ type FNamePages: PByteArray; FDocType: TDOMDocumentTypeEx; // a shortcut FPEMap: TDOMNamedNodeMap; - FIDRefs: TList; - FNotationRefs: TList; + FIDRefs: TFPList; + FNotationRefs: TFPList; FCurrContentType: TElementContentType; FSaViolation: Boolean; FDTDStartPos: PWideChar; @@ -314,6 +320,8 @@ type FExpandEntities: Boolean; FIgnoreComments: Boolean; FCDSectionsAsText: Boolean; + FResolveExternals: Boolean; + FNamespaces: Boolean; procedure RaiseExpectedQmark; procedure GetChar; @@ -328,8 +336,8 @@ type procedure ParseQuantity(CP: TContentParticle); procedure StoreLocation(out Loc: TLocation); function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: WideString): Boolean; - procedure AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer); - procedure ClearRefs(aList: TList); + procedure AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer); + procedure ClearRefs(aList: TFPList); procedure ValidateIdRefs; procedure StandaloneError(LineOffs: Integer = 0); procedure CallErrorHandler(E: EXMLReadError); @@ -345,7 +353,7 @@ type procedure FatalError(const descr: String; LineOffs: Integer=0); overload; procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload; procedure FatalError(Expected: WideChar); overload; - function SkipWhitespace: Boolean; + function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean; function SkipWhitespaceRaw: Boolean; procedure ExpectWhitespace; procedure ExpectString(const s: String); @@ -1111,7 +1119,7 @@ begin E.Free; end; -function TXMLReader.SkipWhitespace: Boolean; +function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean; begin Result := False; repeat @@ -1129,19 +1137,24 @@ begin '%': begin if not FRecognizePE then Exit; - GetChar; - if not CheckName then +// This is the only case where look-ahead is needed + if FSource.FBuf > FSource.FBufEnd-2 then + FSource.Reload; + if (not PercentAloneIsOk) or + (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[hi(Word(FSource.FBuf[1]))]]) or + (FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then begin - if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then - FatalError('Expected whitespace'); - FCurChar := '%'; - Exit; - end; - ExpectChar(';'); - StartPE; - Result := True; // report whitespace on both ends of PE - Continue; - end; + Inc(FSource.FBuf); // skip '%' + FCurChar := FSource.FBuf^; + if not CheckName then + RaiseNameNotFound; + ExpectChar(';'); + StartPE; + Result := True; // report whitespace upon entering the PE + Continue; + end + else Break; + end else Exit; end; @@ -1219,8 +1232,8 @@ begin inherited Create; BufAllocate(FName, 128); BufAllocate(FValue, 512); - FIDRefs := TList.Create; - FNotationRefs := TList.Create; + FIDRefs := TFPList.Create; + FNotationRefs := TFPList.Create; // Set char rules to XML 1.0 FNamePages := @NamePages; @@ -1236,6 +1249,8 @@ begin FExpandEntities := FCtrl.Options.ExpandEntities; FCDSectionsAsText := FCtrl.Options.CDSectionsAsText; FIgnoreComments := FCtrl.Options.IgnoreComments; + FResolveExternals := FCtrl.Options.ResolveExternals; + FNamespaces := FCtrl.Options.Namespaces; end; destructor TXMLReader.Destroy; @@ -1257,8 +1272,7 @@ procedure TXMLReader.XML11_BuildTables; begin FNamePages := Xml11NamePages; FXML11 := True; - { switching to xml11 may occur only with DecodingSource } - TXMLDecodingSource(FSource).FXml11Rules := True; + FSource.FXml11Rules := True; end; procedure TXMLReader.ProcessXML(ASource: TXMLCharSource); @@ -1504,17 +1518,26 @@ end; function TXMLReader.ContextPop: Boolean; var Src: TXMLCharSource; + Error: Boolean; begin Result := Assigned(FSource.FParent) and (FSource.DTDSubsetType = dsNone); if Result then begin Src := FSource.FParent; + Error := False; if Assigned(FSource.FEntity) then + begin TDOMEntityEx(FSource.FEntity).FOnStack := False; +// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl + Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl; + end; FCursor := TDOMNode(FSource.FCursor); FSource.Free; FSource := Src; FCurChar := FSource.FBuf^; +// correct position of this error is after PE reference + if Error then + BadPENesting(esFatal); end; end; @@ -1600,6 +1623,7 @@ begin if PEnt.FOnStack then FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]); + PEnt.FBetweenDecls := not FInsideDecl; ContextPush(PEnt); FHavePERefs := True; end; @@ -2124,7 +2148,6 @@ end; procedure TXMLReader.ParseAttlistDecl; // [52] var - SaveCurNode: TDOMNode; ValueRequired: Boolean; Token: WideString; ElDef: TDOMElementDef; @@ -2233,18 +2256,15 @@ begin if AttDef.FDataType = dtId then ValidationError('An attribute of type ID cannot have a default value',[]); - SaveCurNode := FCursor; FCursor := AttDef; // TODO: move this to ExpectAttValue? StoreLocation(FTokenStart); Inc(FTokenStart.LinePos); // See comments to valid-sa-094: PE expansion should be disabled in AttDef. // ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed - try - ExpectAttValue; - finally - FCursor := SaveCurNode; - end; +// Saving/restoring FCursor is also redundant because it is always nil here. + ExpectAttValue; + FCursor := nil; if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]); end; @@ -2265,11 +2285,11 @@ end; function TXMLReader.ParseEntityDeclValue(Delim: WideChar): Boolean; // [9] var - Src: TXMLCharSource; + CurrentEntity: TObject; begin - Src := FSource; + CurrentEntity := FSource.FEntity; // "Included in literal": process until delimiter hit IN SAME context - while not ((FSource = Src) and CheckForChar(Delim)) do + while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do if CheckForChar('%') then begin if not CheckName then @@ -2308,16 +2328,13 @@ var Entity: TDOMEntityEx; Map: TDOMNamedNodeMap; begin - ExpectWhitespace; + if not SkipWhitespace(True) then + FatalError('Expected whitespace'); NDataAllowed := True; Map := FDocType.Entities; if CheckForChar('%') then // [72] begin - if FRecognizePE then - SkipWhitespace // we know that there IS whitespace due to the check in - // previous call to SkipWhitespace - else - ExpectWhitespace; + ExpectWhitespace; NDataAllowed := False; if FPEMap = nil then FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE); @@ -2338,10 +2355,7 @@ begin StoreLocation(Entity.FStartLocation); FValue.Length := 0; if not ParseEntityDeclValue(Delim) then - begin - FTokenStart := Entity.FStartLocation; - FatalError('Literal has no closing quote', -1); - end; + DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation); SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length); end else @@ -2453,6 +2467,7 @@ begin else begin FRecognizePE := FSource.DTDSubsetType <> dsInternal; + FInsideDecl := True; Token := GetString(['A'..'Z']); if Token = 'ELEMENT' then ParseElementDecl @@ -2467,17 +2482,11 @@ begin SkipWhitespace; FRecognizePE := False; -{ - MarkupDecl starting in PE and ending in root is a WFC [28a] - MarkupDecl starting in root but ending in PE is a VC (erratum 2e-14) -} - // TODO: what if statrs in PE1 and ends in PE2, and other cases? - if CurrentEntity <> FSource.FEntity then - if Assigned(FSource.FEntity) then { ends in PE } - BadPENesting(esError) - else - BadPENesting(esFatal); + + if CurrentEntity <> FSource.FEntity then + BadPENesting; ExpectChar('>'); + FInsideDecl := False; end; end; until False; @@ -2659,7 +2668,7 @@ begin PopVC; end; -procedure TXMLReader.AddForwardRef(aList: TList; Buf: PWideChar; Length: Integer); +procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer); var w: PForwardRef; begin @@ -2671,7 +2680,7 @@ begin aList.Add(w); end; -procedure TXMLReader.ClearRefs(aList: TList); +procedure TXMLReader.ClearRefs(aList: TFPList); var I: Integer; begin @@ -3088,7 +3097,7 @@ end; function TContentParticle.Add: TContentParticle; begin if FChildren = nil then - FChildren := TList.Create; + FChildren := TFPList.Create; Result := TContentParticle.Create; Result.FParent := Self; Result.FIndex := FChildren.Add(Result); diff --git a/packages/fcl-xml/src/xmlutils.pp b/packages/fcl-xml/src/xmlutils.pp index 11c0f3e0cc..e550816958 100644 --- a/packages/fcl-xml/src/xmlutils.pp +++ b/packages/fcl-xml/src/xmlutils.pp @@ -55,6 +55,7 @@ begin p^[$2f] := $29; p^[$30] := $2d; p^[$fd] := $28; + p^[$ff] := $30; Move(p^, p^[256], 256); p^[$100] := $19; diff --git a/packages/fcl-xml/tests/README b/packages/fcl-xml/tests/README index 3376e368a2..61b2d7450d 100644 --- a/packages/fcl-xml/tests/README +++ b/packages/fcl-xml/tests/README @@ -3,7 +3,7 @@ Test runner for w3.org XML compliance suite The xmlts is intended to run the XML compliance suite from W3.org. The suite includes 2500+ tests. It may be downloaded from -http://www.w3.org/XML/Test/xmlts20031210.zip (approx. 1.7 mBytes) +http://www.w3.org/XML/Test/xmlts20080205.zip (approx. 1.7 mBytes) After compiling xmlts.pp, run it with the following command line: xmlts [-t template.xml] [-v] @@ -23,16 +23,7 @@ Report is produced in xhtml format, use your favourite browser to view it. As of 10.03.2007, the xml package does not support namespaces yet, so you might wish to exclude namespace tests. To do this, edit xmlconf/xmlconf.xml file and comment out -two lines at the bottom which reference 'eduni-ns10' and 'eduni-ns11' testsuites. - -(The last lines should look like: - - &eduni-xml11; - - - - -) +the lines that contain references &eduni-ns10; &eduni-ns11; and &eduni-nse; Testsuite errata diff --git a/packages/fcl-xml/tests/xmlts.pp b/packages/fcl-xml/tests/xmlts.pp index 85a878549e..11f9831550 100644 --- a/packages/fcl-xml/tests/xmlts.pp +++ b/packages/fcl-xml/tests/xmlts.pp @@ -232,6 +232,7 @@ begin if Child.NodeName = 'run-id' then begin + newChild := nil; if Data = 'name' then newChild := FTemplate.createTextNode(parser) else if Data = 'description' then @@ -358,6 +359,12 @@ begin FErrCol := -1; FTestID := Element['ID']; TestType := Element['TYPE']; + if Pos(WideChar('5'), Element['EDITION']) > 0 then + begin + Inc(FSkipped); + Exit; + end; + root := GetBaseURI(Element, FRootUri); ResolveRelativeURI(root, UTF8Encode(Element['URI']), s); @@ -393,7 +400,7 @@ begin try try FParser.Options.Validate := FValidating; -// FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no'); + FParser.Options.Namespaces := (Element['NAMESPACE'] <> 'no'); FParser.OnError := {$IFDEF FPC}@{$ENDIF}ErrorHandler; FParser.ParseUri(s, TempDoc); except