Apply patch proposed by J. Gareth Moreton in:

bug report #0036882: [Feature] Class and record definition XML dump extension

  These patches extend the node dump feature (enabled with DEBUG_NODE_XML)
  so it also dumps class and record definitions to the XML file. They are contained within <definition> tags.
  Currently only fields and constants are dumped to the XML file. Methods, constant and variable definitions may be added later.

git-svn-id: trunk@47658 -
This commit is contained in:
pierre 2020-12-01 11:45:14 +00:00
parent 9901a11d22
commit af17e9460b
3 changed files with 350 additions and 329 deletions

View File

@ -394,8 +394,6 @@ interface
procedure XMLPrintNodeInfo(var T: Text); dynamic;
procedure XMLPrintNodeData(var T: Text); virtual;
procedure XMLPrintNodeTree(var T: Text); virtual;
class function SanitiseXMLString(const S: ansistring): ansistring; static;
class function WritePointer(const P: Pointer): ansistring; static;
{$endif DEBUG_NODE_XML}
procedure concattolist(l : tlinkedlist);virtual;
function ischild(p : tnode) : boolean;virtual;
@ -493,14 +491,6 @@ interface
function ppuloadnodetree(ppufile:tcompilerppufile):tnode;
procedure ppuwritenodetree(ppufile:tcompilerppufile;n:tnode);
const
printnodespacing = ' ';
var
{ indention used when writing the tree to the screen }
printnodeindention : string;
procedure printnodeindent;
procedure printnodeunindent;
procedure printnode(var t:text;n:tnode);
procedure printnode(n:tnode);
{$ifdef DEBUG_NODE_XML}
@ -663,18 +653,6 @@ implementation
end;
procedure printnodeindent;
begin
printnodeindention:=printnodeindention+printnodespacing;
end;
procedure printnodeunindent;
begin
delete(printnodeindention,1,length(printnodespacing));
end;
procedure printnode(var t:text;n:tnode);
begin
if assigned(n) then
@ -982,309 +960,6 @@ implementation
PrintNodeUnindent;
WriteLn(T, PrintNodeIndention, '</', nodetype2str[nodetype], '>');
end;
class function TNode.WritePointer(const P: Pointer): ansistring;
begin
case PtrUInt(P) of
0:
WritePointer := 'nil';
1..$FFFF:
WritePointer := '$' + hexstr(PtrUInt(P), 4);
$10000..$FFFFFFFF:
WritePointer := '$' + hexstr(PtrUInt(P), 8);
{$ifdef CPU64}
else
WritePointer := '$' + hexstr(PtrUInt(P), 16);
{$endif CPU64}
end;
end;
class function TNode.SanitiseXMLString(const S: ansistring): ansistring;
var
X, UTF8Len, UTF8Char, CurrentChar: Integer;
needs_quoting, in_quotes, add_end_quote: Boolean;
DoASCII: Boolean;
{ Write the given byte as #xxx }
procedure EncodeControlChar(Value: Byte);
begin
if X = Length(Result) then
add_end_quote := False;
Delete(Result, X, 1);
if in_quotes then
begin
Insert('#' + tostr(Value) + '''', Result, X);
{ If the entire string consists of control characters, it
doesn't need quoting, so only set the flag here }
needs_quoting := True;
in_quotes := False;
end
else
Insert('#' + tostr(Value), Result, X);
end;
{ Write the given byte as either a plain character or an XML keyword }
procedure EncodeStandardChar(Value: Byte);
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
{ Check the character for anything that could be mistaken for an XML element }
case CurrentChar of
Ord('#'):
{ Required to differentiate '#27' from the escape code #27, for example }
needs_quoting:=true;
Ord('<'):
begin
Delete(Result, X, 1);
Insert('&lt;', Result, X);
end;
Ord('>'):
begin
Delete(Result, X, 1);
Insert('&gt;', Result, X);
end;
Ord('&'):
begin
Delete(Result, X, 1);
Insert('&amp;', Result, X);
end;
Ord('"'):
begin
needs_quoting := True;
Delete(Result, X, 1);
Insert('&quot;', Result, X);
end;
Ord(''''):
begin
needs_quoting:=true;
{ Simply double it like in pascal strings }
Insert('''', Result, X);
end;
else
{ Do nothing };
end;
end;
{ Convert character between $80 and $FF to UTF-8 }
procedure EncodeExtendedChar(Value: Byte);
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
case Value of
$80..$BF: { Add $C2 before the value }
Insert(#$C2, Result, X);
$C0..$FF: { Zero the $40 bit and add $C3 before the value }
begin
Result[X] := Char(Byte(Result[X]) and $BF);
Insert(#$C3, Result, X);
end;
else
{ Previous conditions should prevent this procedure from being
called if Value < $80 }
InternalError(2019061901);
end;
end;
begin
needs_quoting := False;
Result := S;
{ Gets set to True if an invalid UTF-8 sequence is found }
DoASCII := False;
{ By setting in_quotes to false here, we can exclude the single
quotation marks surrounding the string if it doesn't contain any
control characters, or consists entirely of control characters. }
in_quotes := False;
add_end_quote := True;
X := Length(Result);
while X > 0 do
begin
CurrentChar := Ord(Result[X]);
{ Control characters and extended characters need special handling }
case CurrentChar of
$00..$1F, $7F:
EncodeControlChar(CurrentChar);
$20..$7E:
EncodeStandardChar(CurrentChar);
{ UTF-8 continuation byte }
$80..$BF:
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
UTF8Char := CurrentChar and $3F; { The data bits of the continuation byte }
UTF8Len := 1; { This variable actually holds 1 less than the length }
{ By setting DoASCII to true, it marks the string as 'invalid UTF-8'
automatically if it reaches the beginning of the string unexpectedly }
DoASCII := True;
Dec(X);
while X > 0 do
begin
CurrentChar := Ord(Result[X]);
case CurrentChar of
{ A standard character here is invalid UTF-8 }
$00..$7F:
Break;
{ Another continuation byte }
$80..$BF:
begin
UTF8Char := UTF8Char or ((CurrentChar and $3F) shl (6 * UTF8Len));
Inc(UTF8Len);
if UTF8Len >= 4 then
{ Sequence too long }
Break;
end;
{ Lead byte for 2-byte sequences }
$C2..$DF:
begin
if UTF8Len <> 1 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $1F) shl 6);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$0080..$07FF:
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Lead byte for 3-byte sequences }
$E0..$EF:
begin
if UTF8Len <> 2 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $0F) shl 12);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$0800..$D7FF, $E000..$FFFF: { $D800..$DFFF is reserved and hence invalid }
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Lead byte for 4-byte sequences }
$F0..$F4:
begin
if UTF8Len <> 3 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $07) shl 18);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$010000..$10FFFF:
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Invalid character }
else
Break;
end;
end;
if DoASCII then
Break;
{ If all is fine, we don't need to encode any more characters }
end;
{ Invalid UTF-8 bytes and lead bytes without continuation bytes }
$C0..$FF:
begin
DoASCII := True;
Break;
end;
end;
Dec(X);
end;
{ UTF-8 failed, so encode the string as plain ASCII }
if DoASCII then
begin
{ Reset the flags and Result }
needs_quoting := False;
Result := S;
in_quotes := False;
add_end_quote := True;
for X := Length(Result) downto 1 do
begin
CurrentChar := Ord(Result[X]);
{ Control characters and extended characters need special handling }
case CurrentChar of
$00..$1F, $7F:
EncodeControlChar(CurrentChar);
$20..$7E:
EncodeStandardChar(CurrentChar);
{ Extended characters }
else
EncodeExtendedChar(CurrentChar);
end;
end;
end;
if needs_quoting then
begin
if in_quotes then
Result := '''' + Result;
if add_end_quote then
Result := Result + '''';
end;
end;
{$endif DEBUG_NODE_XML}
function tnode.isequal(p : tnode) : boolean;

View File

@ -1475,7 +1475,7 @@ implementation
if Assigned(procdef.struct) then
begin
if Assigned(procdef.struct.objrealname) then
Write(T, ' struct="', TNode.SanitiseXMLString(procdef.struct.objrealname^), '"')
Write(T, ' struct="', SanitiseXMLString(procdef.struct.objrealname^), '"')
else
Write(T, ' struct="&lt;NULL&gt;"');
end;
@ -1523,7 +1523,7 @@ implementation
PrintType('package stub');
end;
Write(T, ' name="', TNode.SanitiseXMLString(procdef.customprocname([pno_showhidden, pno_noclassmarker])), '"');
Write(T, ' name="', SanitiseXMLString(procdef.customprocname([pno_showhidden, pno_noclassmarker])), '"');
if po_hascallingconvention in procdef.procoptions then
Write(T, ' convention="', proccalloptionStr[procdef.proccalloption], '"');
@ -1533,7 +1533,7 @@ implementation
PrintNodeIndent;
if Assigned(procdef.returndef) and not is_void(procdef.returndef) then
WriteLn(T, PrintNodeIndention, '<returndef>', TNode.SanitiseXMLString(procdef.returndef.typesymbolprettyname), '</returndef>');
WriteLn(T, PrintNodeIndention, '<returndef>', SanitiseXMLString(procdef.returndef.typesymbolprettyname), '</returndef>');
if po_reintroduce in procdef.procoptions then
PrintOption('reintroduce');
@ -2472,7 +2472,6 @@ implementation
printproc( 'after parsing');
{$ifdef DEBUG_NODE_XML}
printnodeindention := printnodespacing;
XMLPrintProc;
{$endif DEBUG_NODE_XML}
@ -2913,6 +2912,8 @@ implementation
WriteLn(T, '<?xml version="1.0" encoding="utf-8"?>');
WriteLn(T, '<', RootName, ' name="', ModuleName, '">');
Close(T);
printnodeindention := printnodespacing;
end;

View File

@ -119,6 +119,21 @@ interface
procedure DoneVerbose;
const
printnodespacing = ' ';
var
{ indention used when writing a node tree to the screen }
printnodeindention : string;
{ Node dumping support functions }
procedure printnodeindent; inline;
procedure printnodeunindent; inline;
{$ifdef DEBUG_NODE_XML}
function SanitiseXMLString(const S: ansistring): ansistring;
function WritePointer(const P: Pointer): ansistring;
function WriteGUID(const GUID: TGUID): ansistring;
{$endif DEBUG_NODE_XML}
implementation
@ -1019,6 +1034,336 @@ implementation
end;
procedure printnodeindent; inline;
begin
printnodeindention:=printnodeindention+printnodespacing;
end;
procedure printnodeunindent; inline;
begin
delete(printnodeindention,1,length(printnodespacing));
end;
{$ifdef DEBUG_NODE_XML}
function WritePointer(const P: Pointer): ansistring;
begin
case PtrUInt(P) of
0:
WritePointer := 'nil';
1..$FFFF:
WritePointer := '$' + hexstr(PtrUInt(P), 4);
$10000..$FFFFFFFF:
WritePointer := '$' + hexstr(PtrUInt(P), 8);
{$ifdef CPU64}
else
WritePointer := '$' + hexstr(PtrUInt(P), 16);
{$endif CPU64}
end;
end;
function WriteGUID(const GUID: TGUID): ansistring;
var
i: Integer;
begin
Result := '{' + hexstr(GUID.D1, 8) + '-' + hexstr(GUID.D2, 4) + '-' + hexstr(GUID.D3, 4) + '-';
for i := 0 to 7 do
Result := Result + hexstr(GUID.D4[i], 2);
Result := Result + '}';
end;
function SanitiseXMLString(const S: ansistring): ansistring;
var
X, UTF8Len, UTF8Char, CurrentChar: Integer;
needs_quoting, in_quotes, add_end_quote: Boolean;
DoASCII: Boolean;
{ Write the given byte as #xxx }
procedure EncodeControlChar(Value: Byte);
begin
if X = Length(Result) then
add_end_quote := False;
Delete(Result, X, 1);
if in_quotes then
begin
Insert('#' + tostr(Value) + '''', Result, X);
{ If the entire string consists of control characters, it
doesn't need quoting, so only set the flag here }
needs_quoting := True;
in_quotes := False;
end
else
Insert('#' + tostr(Value), Result, X);
end;
{ Write the given byte as either a plain character or an XML keyword }
procedure EncodeStandardChar(Value: Byte);
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
{ Check the character for anything that could be mistaken for an XML element }
case CurrentChar of
Ord('#'):
{ Required to differentiate '#27' from the escape code #27, for example }
needs_quoting:=true;
Ord('<'):
begin
Delete(Result, X, 1);
Insert('&lt;', Result, X);
end;
Ord('>'):
begin
Delete(Result, X, 1);
Insert('&gt;', Result, X);
end;
Ord('&'):
begin
Delete(Result, X, 1);
Insert('&amp;', Result, X);
end;
Ord('"'):
begin
needs_quoting := True;
Delete(Result, X, 1);
Insert('&quot;', Result, X);
end;
Ord(''''):
begin
needs_quoting:=true;
{ Simply double it like in pascal strings }
Insert('''', Result, X);
end;
else
{ Do nothing };
end;
end;
{ Convert character between $80 and $FF to UTF-8 }
procedure EncodeExtendedChar(Value: Byte);
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
case Value of
$80..$BF: { Add $C2 before the value }
Insert(#$C2, Result, X);
$C0..$FF: { Zero the $40 bit and add $C3 before the value }
begin
Result[X] := Char(Byte(Result[X]) and $BF);
Insert(#$C3, Result, X);
end;
else
{ Previous conditions should prevent this procedure from being
called if Value < $80 }
InternalError(2019061901);
end;
end;
begin
needs_quoting := False;
Result := S;
{ Gets set to True if an invalid UTF-8 sequence is found }
DoASCII := False;
{ By setting in_quotes to false here, we can exclude the single
quotation marks surrounding the string if it doesn't contain any
control characters, or consists entirely of control characters. }
in_quotes := False;
add_end_quote := True;
X := Length(Result);
while X > 0 do
begin
CurrentChar := Ord(Result[X]);
{ Control characters and extended characters need special handling }
case CurrentChar of
$00..$1F, $7F:
EncodeControlChar(CurrentChar);
$20..$7E:
EncodeStandardChar(CurrentChar);
{ UTF-8 continuation byte }
$80..$BF:
begin
if not in_quotes then
begin
in_quotes := True;
if (X < Length(Result)) then
begin
needs_quoting := True;
Insert('''', Result, X + 1)
end;
end;
UTF8Char := CurrentChar and $3F; { The data bits of the continuation byte }
UTF8Len := 1; { This variable actually holds 1 less than the length }
{ By setting DoASCII to true, it marks the string as 'invalid UTF-8'
automatically if it reaches the beginning of the string unexpectedly }
DoASCII := True;
Dec(X);
while X > 0 do
begin
CurrentChar := Ord(Result[X]);
case CurrentChar of
{ A standard character here is invalid UTF-8 }
$00..$7F:
Break;
{ Another continuation byte }
$80..$BF:
begin
UTF8Char := UTF8Char or ((CurrentChar and $3F) shl (6 * UTF8Len));
Inc(UTF8Len);
if UTF8Len >= 4 then
{ Sequence too long }
Break;
end;
{ Lead byte for 2-byte sequences }
$C2..$DF:
begin
if UTF8Len <> 1 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $1F) shl 6);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$0080..$07FF:
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Lead byte for 3-byte sequences }
$E0..$EF:
begin
if UTF8Len <> 2 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $0F) shl 12);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$0800..$D7FF, $E000..$FFFF: { $D800..$DFFF is reserved and hence invalid }
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Lead byte for 4-byte sequences }
$F0..$F4:
begin
if UTF8Len <> 3 then Break;
UTF8Char := UTF8Char or ((CurrentChar and $07) shl 18);
{ Check to see if the code is in range and not part of an 'overlong' sequence }
case UTF8Char of
$010000..$10FFFF:
DoASCII := False;
else
{ Do nothing - DoASCII is already true }
end;
Break;
end;
{ Invalid character }
else
Break;
end;
end;
if DoASCII then
Break;
{ If all is fine, we don't need to encode any more characters }
end;
{ Invalid UTF-8 bytes and lead bytes without continuation bytes }
$C0..$FF:
begin
DoASCII := True;
Break;
end;
end;
Dec(X);
end;
{ UTF-8 failed, so encode the string as plain ASCII }
if DoASCII then
begin
{ Reset the flags and Result }
needs_quoting := False;
Result := S;
in_quotes := False;
add_end_quote := True;
for X := Length(Result) downto 1 do
begin
CurrentChar := Ord(Result[X]);
{ Control characters and extended characters need special handling }
case CurrentChar of
$00..$1F, $7F:
EncodeControlChar(CurrentChar);
$20..$7E:
EncodeStandardChar(CurrentChar);
{ Extended characters }
else
EncodeExtendedChar(CurrentChar);
end;
end;
end;
if needs_quoting then
begin
if in_quotes then
Result := '''' + Result;
if add_end_quote then
Result := Result + '''';
end;
end;
{$endif DEBUG_NODE_XML}
initialization
constexp.internalerrorproc:=@internalerror;
finalization