mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-01 15:30:43 +02:00
* xmlread.pp, modified attribute parsing code to use DOM-independent data structures (second part)
* ExpectAttValue() now builds both plain string value and first-level node chain. * Normalize() procedure moved to xmlutils.pp and made publicly available as BufNormalize. * ParseLiteral() cleaned of attribute-specific code; at this point it is clear it won't be used for parsing attributes. git-svn-id: trunk@16186 -
This commit is contained in:
parent
e0d8556cae
commit
7e5ff8ca89
@ -338,7 +338,7 @@ type
|
||||
PrefixLen: Integer; // to avoid recalculation
|
||||
end;
|
||||
|
||||
TLiteralType = (ltPlain, ltAttr, ltTokAttr, ltPubid, ltEntity);
|
||||
TLiteralType = (ltPlain, ltPubid, ltEntity);
|
||||
|
||||
TXMLReader = class
|
||||
private
|
||||
@ -437,8 +437,8 @@ type
|
||||
procedure CheckNCName;
|
||||
function ExpectName: WideString; // [5]
|
||||
function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
|
||||
Required: Boolean; Normalized: PBoolean = nil): Boolean;
|
||||
procedure ExpectAttValue(attrData: PNodeData); // [10]
|
||||
Required: Boolean): Boolean;
|
||||
function ExpectAttValue(attrData: PNodeData; NonCDATA: Boolean): Boolean; // [10]
|
||||
procedure ParseComment(discard: Boolean); // [15]
|
||||
procedure ParsePI; // [16]
|
||||
procedure CreatePINode;
|
||||
@ -1590,17 +1590,24 @@ const
|
||||
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
|
||||
GT_Delim: TSetOfChar = [#0, '>'];
|
||||
|
||||
procedure TXMLReader.ExpectAttValue(AttrData: PNodeData);
|
||||
{ Parse attribute literal, producing plain string value in AttrData.FValueStr.
|
||||
If entity references are encountered and FExpandEntities=False, also builds
|
||||
a node chain starting from AttrData.FNext. Node chain is built only for the
|
||||
first level. If NonCDATA=True, additionally normalizes whitespace in string value.
|
||||
Returns True if value actually needed normalization }
|
||||
function TXMLReader.ExpectAttValue(AttrData: PNodeData; NonCDATA: Boolean): Boolean;
|
||||
var
|
||||
wc: WideChar;
|
||||
Delim: WideChar;
|
||||
ent: TDOMEntityEx;
|
||||
start: TObject;
|
||||
curr: PNodeData;
|
||||
StartPos: Integer;
|
||||
begin
|
||||
SkipQuote(Delim);
|
||||
curr := AttrData;
|
||||
FValue.Length := 0;
|
||||
StartPos := 0;
|
||||
start := FSource.FEntity;
|
||||
repeat
|
||||
wc := FSource.SkipUntil(FValue, AttrDelims);
|
||||
@ -1612,14 +1619,14 @@ begin
|
||||
Continue;
|
||||
|
||||
ent := EntityCheck(True);
|
||||
if (ent = nil) or (not FExpandEntities) then
|
||||
if ((ent = nil) or (not FExpandEntities)) and (FSource.FEntity = start) then
|
||||
begin
|
||||
if FValue.Length > 0 then
|
||||
if FValue.Length > StartPos then
|
||||
begin
|
||||
curr := AllocAttributeValueChunk(curr);
|
||||
curr^.FNodeType := ntText;
|
||||
SetString(curr^.FValueStr, FValue.Buffer, FValue.Length);
|
||||
FValue.Length := 0;
|
||||
// without PWideChar typecast and in {$T-}, FPC treats '@' result as PAnsiChar...
|
||||
SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
|
||||
end;
|
||||
curr := AllocAttributeValueChunk(curr);
|
||||
curr^.FNodeType := ntEntityReference;
|
||||
@ -1628,8 +1635,9 @@ begin
|
||||
SetString(curr^.FValueStr, FName.Buffer, FName.Length)
|
||||
else
|
||||
curr^.FValueStr := ent.FName;
|
||||
end
|
||||
else
|
||||
end;
|
||||
StartPos := FValue.Length;
|
||||
if Assigned(ent) then
|
||||
ContextPush(ent);
|
||||
end
|
||||
else if wc <> #0 then
|
||||
@ -1641,22 +1649,28 @@ begin
|
||||
wc := #32;
|
||||
BufAppend(FValue, wc);
|
||||
end
|
||||
else if (FSource.FEntity = start) or not ContextPop then // #0
|
||||
FatalError('Literal has no closing quote', -1);
|
||||
else
|
||||
begin
|
||||
if (FSource.FEntity = start) or not ContextPop then // #0
|
||||
FatalError('Literal has no closing quote', -1);
|
||||
StartPos := FValue.Length;
|
||||
end;
|
||||
until False;
|
||||
if Assigned(attrData^.FNext) then // complex case
|
||||
if Assigned(attrData^.FNext) then
|
||||
begin
|
||||
FAttrCleanupFlag := True;
|
||||
if FValue.Length > 0 then
|
||||
if FValue.Length > StartPos then
|
||||
begin
|
||||
curr := AllocAttributeValueChunk(curr);
|
||||
curr^.FNodeType := ntText;
|
||||
SetString(curr^.FValueStr, FValue.Buffer, FValue.Length);
|
||||
SetString(curr^.FValueStr, PWideChar(@FValue.Buffer[StartPos]), FValue.Length-StartPos);
|
||||
end;
|
||||
end
|
||||
end;
|
||||
if nonCDATA then
|
||||
BufNormalize(FValue, Result)
|
||||
else
|
||||
SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
|
||||
FValue.Length := 0;
|
||||
Result := False;
|
||||
SetString(attrData^.FValueStr, FValue.Buffer, FValue.Length);
|
||||
end;
|
||||
|
||||
const
|
||||
@ -1830,58 +1844,19 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure Normalize(var Buf: TWideCharBuf; Modified: PBoolean);
|
||||
var
|
||||
Dst, Src: Integer;
|
||||
begin
|
||||
Dst := 0;
|
||||
Src := 0;
|
||||
// skip leading space if any
|
||||
while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
|
||||
Inc(Src);
|
||||
|
||||
while Src < Buf.Length do
|
||||
begin
|
||||
if Buf.Buffer[Src] = ' ' then
|
||||
begin
|
||||
// Dst cannot be 0 here, because leading space is already skipped
|
||||
if Buf.Buffer[Dst-1] <> ' ' then
|
||||
begin
|
||||
Buf.Buffer[Dst] := ' ';
|
||||
Inc(Dst);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Buf.Buffer[Dst] := Buf.Buffer[Src];
|
||||
Inc(Dst);
|
||||
end;
|
||||
Inc(Src);
|
||||
end;
|
||||
// trailing space (only one possible due to compression)
|
||||
if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
|
||||
Dec(Dst);
|
||||
|
||||
if Assigned(Modified) then
|
||||
Modified^ := Dst <> Buf.Length;
|
||||
Buf.Length := Dst;
|
||||
end;
|
||||
|
||||
const
|
||||
LiteralDelims: array[TLiteralType] of TSetOfChar = (
|
||||
[#0, '''', '"'], // ltPlain
|
||||
[#0, '<', '&', '''', '"', #9, #10, #13], // ltAttr
|
||||
[#0, '<', '&', '''', '"', #9, #10, #13], // ltTokAttr
|
||||
[#0, '''', '"', #13, #10], // ltPubid
|
||||
[#0, '%', '&', '''', '"'] // ltEntity
|
||||
);
|
||||
|
||||
function TXMLReader.ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
|
||||
Required: Boolean; Normalized: PBoolean): Boolean;
|
||||
Required: Boolean): Boolean;
|
||||
var
|
||||
start: TObject;
|
||||
wc, Delim: WideChar;
|
||||
ent: TDOMEntityEx;
|
||||
dummy: Boolean;
|
||||
begin
|
||||
SkipQuote(Delim, Required);
|
||||
Result := (Delim <> #0);
|
||||
@ -1900,32 +1875,18 @@ begin
|
||||
FatalError('PE reference not allowed here in internal subset', FName.Length+2);
|
||||
StartPE;
|
||||
end
|
||||
else if wc = '&' then { ltAttr, ltTokAttr, ltEntity }
|
||||
else if wc = '&' then { ltEntity }
|
||||
begin
|
||||
if ParseRef(ToFill) then // charRefs always expanded
|
||||
Continue;
|
||||
if aType = ltEntity then // bypass
|
||||
begin
|
||||
BufAppend(ToFill, '&');
|
||||
BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
|
||||
BufAppend(ToFill, ';');
|
||||
end
|
||||
else // include
|
||||
begin
|
||||
if ResolvePredefined then
|
||||
Continue;
|
||||
ent := EntityCheck(True);
|
||||
if ent = nil then
|
||||
Continue;
|
||||
ContextPush(ent);
|
||||
end;
|
||||
BufAppend(ToFill, '&');
|
||||
BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
|
||||
BufAppend(ToFill, ';');
|
||||
end
|
||||
else if wc = '<' then
|
||||
FatalError('Character ''<'' is not allowed in attribute value')
|
||||
else if wc <> #0 then
|
||||
begin
|
||||
FSource.NextChar;
|
||||
if (wc = #10) or (wc = #13) or (wc = #9) then
|
||||
if (wc = #10) or (wc = #13) then
|
||||
wc := #32
|
||||
// terminating delimiter must be in the same context as the starting one
|
||||
else if (wc = Delim) and (start = FSource.FEntity) then
|
||||
@ -1935,8 +1896,8 @@ begin
|
||||
else if (FSource.FEntity = start) or not ContextPop then // #0
|
||||
FatalError('Literal has no closing quote', -1);
|
||||
until False;
|
||||
if aType in [ltTokAttr, ltPubid] then
|
||||
Normalize(ToFill, Normalized);
|
||||
if aType = ltPubid then
|
||||
BufNormalize(ToFill, dummy);
|
||||
end;
|
||||
|
||||
function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
|
||||
@ -2527,7 +2488,7 @@ begin
|
||||
|
||||
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
|
||||
attrData := AllocAttributeData(nil);
|
||||
ExpectAttValue(attrData);
|
||||
ExpectAttValue(attrData, dt <> dtCDATA);
|
||||
|
||||
LoadAttribute(attrData, AttDef); // convert to DOM form
|
||||
CleanupAttributeData;
|
||||
@ -3152,30 +3113,17 @@ var
|
||||
attrData: PNodeData;
|
||||
AttDef: TDOMAttrDef;
|
||||
i: Integer;
|
||||
normalized: Boolean;
|
||||
|
||||
{ still needs a temp to store AttDef.Value }
|
||||
procedure CheckValue;
|
||||
var
|
||||
AttValue, OldValue: WideString;
|
||||
begin
|
||||
if FStandalone and AttDef.ExternallyDeclared then
|
||||
begin
|
||||
OldValue := Attr.Value;
|
||||
Attr.DataType := AttDef.DataType;
|
||||
AttValue := Attr.Value;
|
||||
if AttValue <> OldValue then
|
||||
StandaloneError(-1);
|
||||
end
|
||||
else
|
||||
begin
|
||||
Attr.DataType := AttDef.DataType;
|
||||
AttValue := Attr.Value;
|
||||
end;
|
||||
// TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
|
||||
if (AttDef.Default = adFixed) and (AttDef.Value <> AttValue) then
|
||||
ValidationError('Value of attribute ''%s'' does not match its #FIXED default',[AttDef.Name], -1);
|
||||
if not ValidateAttrSyntax(AttDef, AttValue) then
|
||||
ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name], -1);
|
||||
ValidateAttrValue(Attr, AttValue);
|
||||
if (AttDef.Default = adFixed) and (AttDef.Value <> attrData^.FValueStr) then
|
||||
ValidationError('Value of attribute ''%s'' does not match its #FIXED default',[attrData^.FQName^.Key], -1);
|
||||
if not ValidateAttrSyntax(AttDef, attrData^.FValueStr) then
|
||||
ValidationError('Attribute ''%s'' type mismatch', [attrData^.FQName^.Key], -1);
|
||||
ValidateAttrValue(Attr, attrData^.FValueStr);
|
||||
end;
|
||||
|
||||
begin
|
||||
@ -3201,13 +3149,19 @@ begin
|
||||
FatalError('Duplicate attribute', FName.Length);
|
||||
|
||||
ExpectEq;
|
||||
ExpectAttValue(attrData);
|
||||
normalized := ExpectAttValue(attrData, Assigned(AttDef) and (AttDef.DataType <> dtCDATA));
|
||||
|
||||
LoadAttribute(attrData, attr);
|
||||
|
||||
elem.Attributes.SetNamedItem(attr);
|
||||
if Assigned(AttDef) and ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
|
||||
begin
|
||||
Attr.DataType := AttDef.DataType;
|
||||
if normalized and FStandalone and AttDef.ExternallyDeclared then
|
||||
StandaloneError(-1);
|
||||
|
||||
CheckValue;
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
|
||||
|
@ -159,6 +159,7 @@ procedure BufAllocate(var ABuffer: TWideCharBuf; ALength: Integer);
|
||||
procedure BufAppend(var ABuffer: TWideCharBuf; wc: WideChar);
|
||||
procedure BufAppendChunk(var ABuf: TWideCharBuf; pstart, pend: PWideChar);
|
||||
function BufEquals(const ABuf: TWideCharBuf; const Arg: WideString): Boolean;
|
||||
procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
|
||||
|
||||
{ Built-in decoder functions for UTF-8, UTF-16 and ISO-8859-1 }
|
||||
|
||||
@ -912,6 +913,42 @@ begin
|
||||
CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(WideChar));
|
||||
end;
|
||||
|
||||
procedure BufNormalize(var Buf: TWideCharBuf; out Modified: Boolean);
|
||||
var
|
||||
Dst, Src: Integer;
|
||||
begin
|
||||
Dst := 0;
|
||||
Src := 0;
|
||||
// skip leading space if any
|
||||
while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
|
||||
Inc(Src);
|
||||
|
||||
while Src < Buf.Length do
|
||||
begin
|
||||
if Buf.Buffer[Src] = ' ' then
|
||||
begin
|
||||
// Dst cannot be 0 here, because leading space is already skipped
|
||||
if Buf.Buffer[Dst-1] <> ' ' then
|
||||
begin
|
||||
Buf.Buffer[Dst] := ' ';
|
||||
Inc(Dst);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
Buf.Buffer[Dst] := Buf.Buffer[Src];
|
||||
Inc(Dst);
|
||||
end;
|
||||
Inc(Src);
|
||||
end;
|
||||
// trailing space (only one possible due to compression)
|
||||
if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
|
||||
Dec(Dst);
|
||||
|
||||
Modified := Dst <> Buf.Length;
|
||||
Buf.Length := Dst;
|
||||
end;
|
||||
|
||||
{ standard decoders }
|
||||
|
||||
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: PWideChar; var OutCnt: Cardinal): Integer; stdcall;
|
||||
|
Loading…
Reference in New Issue
Block a user