mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 07:47:59 +02:00
Apply patch proposed by J. Gareth Moreton in:
bug report #0036882: [Feature] Class and record definition XML dump extension These patches extend the node dump feature (enabled with DEBUG_NODE_XML) so it also dumps class and record definitions to the XML file. They are contained within <definition> tags. Currently only fields and constants are dumped to the XML file. Methods, constant and variable definitions may be added later. git-svn-id: trunk@47658 -
This commit is contained in:
parent
9901a11d22
commit
af17e9460b
@ -394,8 +394,6 @@ interface
|
||||
procedure XMLPrintNodeInfo(var T: Text); dynamic;
|
||||
procedure XMLPrintNodeData(var T: Text); virtual;
|
||||
procedure XMLPrintNodeTree(var T: Text); virtual;
|
||||
class function SanitiseXMLString(const S: ansistring): ansistring; static;
|
||||
class function WritePointer(const P: Pointer): ansistring; static;
|
||||
{$endif DEBUG_NODE_XML}
|
||||
procedure concattolist(l : tlinkedlist);virtual;
|
||||
function ischild(p : tnode) : boolean;virtual;
|
||||
@ -493,14 +491,6 @@ interface
|
||||
function ppuloadnodetree(ppufile:tcompilerppufile):tnode;
|
||||
procedure ppuwritenodetree(ppufile:tcompilerppufile;n:tnode);
|
||||
|
||||
const
|
||||
printnodespacing = ' ';
|
||||
var
|
||||
{ indention used when writing the tree to the screen }
|
||||
printnodeindention : string;
|
||||
|
||||
procedure printnodeindent;
|
||||
procedure printnodeunindent;
|
||||
procedure printnode(var t:text;n:tnode);
|
||||
procedure printnode(n:tnode);
|
||||
{$ifdef DEBUG_NODE_XML}
|
||||
@ -663,18 +653,6 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
procedure printnodeindent;
|
||||
begin
|
||||
printnodeindention:=printnodeindention+printnodespacing;
|
||||
end;
|
||||
|
||||
|
||||
procedure printnodeunindent;
|
||||
begin
|
||||
delete(printnodeindention,1,length(printnodespacing));
|
||||
end;
|
||||
|
||||
|
||||
procedure printnode(var t:text;n:tnode);
|
||||
begin
|
||||
if assigned(n) then
|
||||
@ -982,309 +960,6 @@ implementation
|
||||
PrintNodeUnindent;
|
||||
WriteLn(T, PrintNodeIndention, '</', nodetype2str[nodetype], '>');
|
||||
end;
|
||||
|
||||
class function TNode.WritePointer(const P: Pointer): ansistring;
|
||||
begin
|
||||
case PtrUInt(P) of
|
||||
0:
|
||||
WritePointer := 'nil';
|
||||
1..$FFFF:
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 4);
|
||||
$10000..$FFFFFFFF:
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 8);
|
||||
{$ifdef CPU64}
|
||||
else
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 16);
|
||||
{$endif CPU64}
|
||||
end;
|
||||
end;
|
||||
|
||||
class function TNode.SanitiseXMLString(const S: ansistring): ansistring;
|
||||
var
|
||||
X, UTF8Len, UTF8Char, CurrentChar: Integer;
|
||||
needs_quoting, in_quotes, add_end_quote: Boolean;
|
||||
DoASCII: Boolean;
|
||||
|
||||
{ Write the given byte as #xxx }
|
||||
procedure EncodeControlChar(Value: Byte);
|
||||
begin
|
||||
if X = Length(Result) then
|
||||
add_end_quote := False;
|
||||
|
||||
Delete(Result, X, 1);
|
||||
if in_quotes then
|
||||
begin
|
||||
Insert('#' + tostr(Value) + '''', Result, X);
|
||||
|
||||
{ If the entire string consists of control characters, it
|
||||
doesn't need quoting, so only set the flag here }
|
||||
needs_quoting := True;
|
||||
|
||||
in_quotes := False;
|
||||
end
|
||||
else
|
||||
Insert('#' + tostr(Value), Result, X);
|
||||
end;
|
||||
|
||||
{ Write the given byte as either a plain character or an XML keyword }
|
||||
procedure EncodeStandardChar(Value: Byte);
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Check the character for anything that could be mistaken for an XML element }
|
||||
case CurrentChar of
|
||||
Ord('#'):
|
||||
{ Required to differentiate '#27' from the escape code #27, for example }
|
||||
needs_quoting:=true;
|
||||
|
||||
Ord('<'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('<', Result, X);
|
||||
end;
|
||||
Ord('>'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('>', Result, X);
|
||||
end;
|
||||
Ord('&'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('&', Result, X);
|
||||
end;
|
||||
Ord('"'):
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Delete(Result, X, 1);
|
||||
Insert('"', Result, X);
|
||||
end;
|
||||
Ord(''''):
|
||||
begin
|
||||
needs_quoting:=true;
|
||||
{ Simply double it like in pascal strings }
|
||||
Insert('''', Result, X);
|
||||
end;
|
||||
else
|
||||
{ Do nothing };
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Convert character between $80 and $FF to UTF-8 }
|
||||
procedure EncodeExtendedChar(Value: Byte);
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
case Value of
|
||||
$80..$BF: { Add $C2 before the value }
|
||||
Insert(#$C2, Result, X);
|
||||
$C0..$FF: { Zero the $40 bit and add $C3 before the value }
|
||||
begin
|
||||
Result[X] := Char(Byte(Result[X]) and $BF);
|
||||
Insert(#$C3, Result, X);
|
||||
end;
|
||||
else
|
||||
{ Previous conditions should prevent this procedure from being
|
||||
called if Value < $80 }
|
||||
InternalError(2019061901);
|
||||
end;
|
||||
end;
|
||||
|
||||
begin
|
||||
needs_quoting := False;
|
||||
Result := S;
|
||||
|
||||
{ Gets set to True if an invalid UTF-8 sequence is found }
|
||||
DoASCII := False;
|
||||
|
||||
{ By setting in_quotes to false here, we can exclude the single
|
||||
quotation marks surrounding the string if it doesn't contain any
|
||||
control characters, or consists entirely of control characters. }
|
||||
in_quotes := False;
|
||||
|
||||
add_end_quote := True;
|
||||
|
||||
X := Length(Result);
|
||||
while X > 0 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
{ Control characters and extended characters need special handling }
|
||||
case CurrentChar of
|
||||
$00..$1F, $7F:
|
||||
EncodeControlChar(CurrentChar);
|
||||
|
||||
$20..$7E:
|
||||
EncodeStandardChar(CurrentChar);
|
||||
|
||||
{ UTF-8 continuation byte }
|
||||
$80..$BF:
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
UTF8Char := CurrentChar and $3F; { The data bits of the continuation byte }
|
||||
UTF8Len := 1; { This variable actually holds 1 less than the length }
|
||||
|
||||
{ By setting DoASCII to true, it marks the string as 'invalid UTF-8'
|
||||
automatically if it reaches the beginning of the string unexpectedly }
|
||||
DoASCII := True;
|
||||
|
||||
Dec(X);
|
||||
while X > 0 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
case CurrentChar of
|
||||
{ A standard character here is invalid UTF-8 }
|
||||
$00..$7F:
|
||||
Break;
|
||||
|
||||
{ Another continuation byte }
|
||||
$80..$BF:
|
||||
begin
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $3F) shl (6 * UTF8Len));
|
||||
|
||||
Inc(UTF8Len);
|
||||
if UTF8Len >= 4 then
|
||||
{ Sequence too long }
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 2-byte sequences }
|
||||
$C2..$DF:
|
||||
begin
|
||||
if UTF8Len <> 1 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $1F) shl 6);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$0080..$07FF:
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 3-byte sequences }
|
||||
$E0..$EF:
|
||||
begin
|
||||
if UTF8Len <> 2 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $0F) shl 12);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$0800..$D7FF, $E000..$FFFF: { $D800..$DFFF is reserved and hence invalid }
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 4-byte sequences }
|
||||
$F0..$F4:
|
||||
begin
|
||||
if UTF8Len <> 3 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $07) shl 18);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$010000..$10FFFF:
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Invalid character }
|
||||
else
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
|
||||
if DoASCII then
|
||||
Break;
|
||||
|
||||
{ If all is fine, we don't need to encode any more characters }
|
||||
end;
|
||||
|
||||
{ Invalid UTF-8 bytes and lead bytes without continuation bytes }
|
||||
$C0..$FF:
|
||||
begin
|
||||
DoASCII := True;
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
|
||||
Dec(X);
|
||||
end;
|
||||
|
||||
{ UTF-8 failed, so encode the string as plain ASCII }
|
||||
if DoASCII then
|
||||
begin
|
||||
{ Reset the flags and Result }
|
||||
needs_quoting := False;
|
||||
Result := S;
|
||||
in_quotes := False;
|
||||
add_end_quote := True;
|
||||
|
||||
for X := Length(Result) downto 1 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
{ Control characters and extended characters need special handling }
|
||||
case CurrentChar of
|
||||
$00..$1F, $7F:
|
||||
EncodeControlChar(CurrentChar);
|
||||
|
||||
$20..$7E:
|
||||
EncodeStandardChar(CurrentChar);
|
||||
|
||||
{ Extended characters }
|
||||
else
|
||||
EncodeExtendedChar(CurrentChar);
|
||||
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
if needs_quoting then
|
||||
begin
|
||||
if in_quotes then
|
||||
Result := '''' + Result;
|
||||
|
||||
if add_end_quote then
|
||||
Result := Result + '''';
|
||||
end;
|
||||
end;
|
||||
{$endif DEBUG_NODE_XML}
|
||||
|
||||
function tnode.isequal(p : tnode) : boolean;
|
||||
|
@ -1475,7 +1475,7 @@ implementation
|
||||
if Assigned(procdef.struct) then
|
||||
begin
|
||||
if Assigned(procdef.struct.objrealname) then
|
||||
Write(T, ' struct="', TNode.SanitiseXMLString(procdef.struct.objrealname^), '"')
|
||||
Write(T, ' struct="', SanitiseXMLString(procdef.struct.objrealname^), '"')
|
||||
else
|
||||
Write(T, ' struct="<NULL>"');
|
||||
end;
|
||||
@ -1523,7 +1523,7 @@ implementation
|
||||
PrintType('package stub');
|
||||
end;
|
||||
|
||||
Write(T, ' name="', TNode.SanitiseXMLString(procdef.customprocname([pno_showhidden, pno_noclassmarker])), '"');
|
||||
Write(T, ' name="', SanitiseXMLString(procdef.customprocname([pno_showhidden, pno_noclassmarker])), '"');
|
||||
|
||||
if po_hascallingconvention in procdef.procoptions then
|
||||
Write(T, ' convention="', proccalloptionStr[procdef.proccalloption], '"');
|
||||
@ -1533,7 +1533,7 @@ implementation
|
||||
PrintNodeIndent;
|
||||
|
||||
if Assigned(procdef.returndef) and not is_void(procdef.returndef) then
|
||||
WriteLn(T, PrintNodeIndention, '<returndef>', TNode.SanitiseXMLString(procdef.returndef.typesymbolprettyname), '</returndef>');
|
||||
WriteLn(T, PrintNodeIndention, '<returndef>', SanitiseXMLString(procdef.returndef.typesymbolprettyname), '</returndef>');
|
||||
|
||||
if po_reintroduce in procdef.procoptions then
|
||||
PrintOption('reintroduce');
|
||||
@ -2472,7 +2472,6 @@ implementation
|
||||
printproc( 'after parsing');
|
||||
|
||||
{$ifdef DEBUG_NODE_XML}
|
||||
printnodeindention := printnodespacing;
|
||||
XMLPrintProc;
|
||||
{$endif DEBUG_NODE_XML}
|
||||
|
||||
@ -2913,6 +2912,8 @@ implementation
|
||||
WriteLn(T, '<?xml version="1.0" encoding="utf-8"?>');
|
||||
WriteLn(T, '<', RootName, ' name="', ModuleName, '">');
|
||||
Close(T);
|
||||
|
||||
printnodeindention := printnodespacing;
|
||||
end;
|
||||
|
||||
|
||||
|
@ -119,6 +119,21 @@ interface
|
||||
procedure DoneVerbose;
|
||||
|
||||
|
||||
const
|
||||
printnodespacing = ' ';
|
||||
var
|
||||
{ indention used when writing a node tree to the screen }
|
||||
printnodeindention : string;
|
||||
|
||||
|
||||
{ Node dumping support functions }
|
||||
procedure printnodeindent; inline;
|
||||
procedure printnodeunindent; inline;
|
||||
{$ifdef DEBUG_NODE_XML}
|
||||
function SanitiseXMLString(const S: ansistring): ansistring;
|
||||
function WritePointer(const P: Pointer): ansistring;
|
||||
function WriteGUID(const GUID: TGUID): ansistring;
|
||||
{$endif DEBUG_NODE_XML}
|
||||
|
||||
implementation
|
||||
|
||||
@ -1019,6 +1034,336 @@ implementation
|
||||
end;
|
||||
|
||||
|
||||
procedure printnodeindent; inline;
|
||||
begin
|
||||
printnodeindention:=printnodeindention+printnodespacing;
|
||||
end;
|
||||
|
||||
|
||||
procedure printnodeunindent; inline;
|
||||
begin
|
||||
delete(printnodeindention,1,length(printnodespacing));
|
||||
end;
|
||||
|
||||
{$ifdef DEBUG_NODE_XML}
|
||||
function WritePointer(const P: Pointer): ansistring;
|
||||
begin
|
||||
case PtrUInt(P) of
|
||||
0:
|
||||
WritePointer := 'nil';
|
||||
1..$FFFF:
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 4);
|
||||
$10000..$FFFFFFFF:
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 8);
|
||||
{$ifdef CPU64}
|
||||
else
|
||||
WritePointer := '$' + hexstr(PtrUInt(P), 16);
|
||||
{$endif CPU64}
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function WriteGUID(const GUID: TGUID): ansistring;
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
Result := '{' + hexstr(GUID.D1, 8) + '-' + hexstr(GUID.D2, 4) + '-' + hexstr(GUID.D3, 4) + '-';
|
||||
for i := 0 to 7 do
|
||||
Result := Result + hexstr(GUID.D4[i], 2);
|
||||
|
||||
Result := Result + '}';
|
||||
end;
|
||||
|
||||
|
||||
function SanitiseXMLString(const S: ansistring): ansistring;
|
||||
var
|
||||
X, UTF8Len, UTF8Char, CurrentChar: Integer;
|
||||
needs_quoting, in_quotes, add_end_quote: Boolean;
|
||||
DoASCII: Boolean;
|
||||
|
||||
{ Write the given byte as #xxx }
|
||||
procedure EncodeControlChar(Value: Byte);
|
||||
begin
|
||||
if X = Length(Result) then
|
||||
add_end_quote := False;
|
||||
|
||||
Delete(Result, X, 1);
|
||||
if in_quotes then
|
||||
begin
|
||||
Insert('#' + tostr(Value) + '''', Result, X);
|
||||
|
||||
{ If the entire string consists of control characters, it
|
||||
doesn't need quoting, so only set the flag here }
|
||||
needs_quoting := True;
|
||||
|
||||
in_quotes := False;
|
||||
end
|
||||
else
|
||||
Insert('#' + tostr(Value), Result, X);
|
||||
end;
|
||||
|
||||
{ Write the given byte as either a plain character or an XML keyword }
|
||||
procedure EncodeStandardChar(Value: Byte);
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Check the character for anything that could be mistaken for an XML element }
|
||||
case CurrentChar of
|
||||
Ord('#'):
|
||||
{ Required to differentiate '#27' from the escape code #27, for example }
|
||||
needs_quoting:=true;
|
||||
|
||||
Ord('<'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('<', Result, X);
|
||||
end;
|
||||
Ord('>'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('>', Result, X);
|
||||
end;
|
||||
Ord('&'):
|
||||
begin
|
||||
Delete(Result, X, 1);
|
||||
Insert('&', Result, X);
|
||||
end;
|
||||
Ord('"'):
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Delete(Result, X, 1);
|
||||
Insert('"', Result, X);
|
||||
end;
|
||||
Ord(''''):
|
||||
begin
|
||||
needs_quoting:=true;
|
||||
{ Simply double it like in pascal strings }
|
||||
Insert('''', Result, X);
|
||||
end;
|
||||
else
|
||||
{ Do nothing };
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Convert character between $80 and $FF to UTF-8 }
|
||||
procedure EncodeExtendedChar(Value: Byte);
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
case Value of
|
||||
$80..$BF: { Add $C2 before the value }
|
||||
Insert(#$C2, Result, X);
|
||||
$C0..$FF: { Zero the $40 bit and add $C3 before the value }
|
||||
begin
|
||||
Result[X] := Char(Byte(Result[X]) and $BF);
|
||||
Insert(#$C3, Result, X);
|
||||
end;
|
||||
else
|
||||
{ Previous conditions should prevent this procedure from being
|
||||
called if Value < $80 }
|
||||
InternalError(2019061901);
|
||||
end;
|
||||
end;
|
||||
|
||||
begin
|
||||
needs_quoting := False;
|
||||
Result := S;
|
||||
|
||||
{ Gets set to True if an invalid UTF-8 sequence is found }
|
||||
DoASCII := False;
|
||||
|
||||
{ By setting in_quotes to false here, we can exclude the single
|
||||
quotation marks surrounding the string if it doesn't contain any
|
||||
control characters, or consists entirely of control characters. }
|
||||
in_quotes := False;
|
||||
|
||||
add_end_quote := True;
|
||||
|
||||
X := Length(Result);
|
||||
while X > 0 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
{ Control characters and extended characters need special handling }
|
||||
case CurrentChar of
|
||||
$00..$1F, $7F:
|
||||
EncodeControlChar(CurrentChar);
|
||||
|
||||
$20..$7E:
|
||||
EncodeStandardChar(CurrentChar);
|
||||
|
||||
{ UTF-8 continuation byte }
|
||||
$80..$BF:
|
||||
begin
|
||||
if not in_quotes then
|
||||
begin
|
||||
in_quotes := True;
|
||||
if (X < Length(Result)) then
|
||||
begin
|
||||
needs_quoting := True;
|
||||
Insert('''', Result, X + 1)
|
||||
end;
|
||||
end;
|
||||
|
||||
UTF8Char := CurrentChar and $3F; { The data bits of the continuation byte }
|
||||
UTF8Len := 1; { This variable actually holds 1 less than the length }
|
||||
|
||||
{ By setting DoASCII to true, it marks the string as 'invalid UTF-8'
|
||||
automatically if it reaches the beginning of the string unexpectedly }
|
||||
DoASCII := True;
|
||||
|
||||
Dec(X);
|
||||
while X > 0 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
case CurrentChar of
|
||||
{ A standard character here is invalid UTF-8 }
|
||||
$00..$7F:
|
||||
Break;
|
||||
|
||||
{ Another continuation byte }
|
||||
$80..$BF:
|
||||
begin
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $3F) shl (6 * UTF8Len));
|
||||
|
||||
Inc(UTF8Len);
|
||||
if UTF8Len >= 4 then
|
||||
{ Sequence too long }
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 2-byte sequences }
|
||||
$C2..$DF:
|
||||
begin
|
||||
if UTF8Len <> 1 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $1F) shl 6);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$0080..$07FF:
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 3-byte sequences }
|
||||
$E0..$EF:
|
||||
begin
|
||||
if UTF8Len <> 2 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $0F) shl 12);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$0800..$D7FF, $E000..$FFFF: { $D800..$DFFF is reserved and hence invalid }
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Lead byte for 4-byte sequences }
|
||||
$F0..$F4:
|
||||
begin
|
||||
if UTF8Len <> 3 then Break;
|
||||
|
||||
UTF8Char := UTF8Char or ((CurrentChar and $07) shl 18);
|
||||
|
||||
{ Check to see if the code is in range and not part of an 'overlong' sequence }
|
||||
case UTF8Char of
|
||||
$010000..$10FFFF:
|
||||
DoASCII := False;
|
||||
else
|
||||
{ Do nothing - DoASCII is already true }
|
||||
end;
|
||||
Break;
|
||||
end;
|
||||
|
||||
{ Invalid character }
|
||||
else
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
|
||||
if DoASCII then
|
||||
Break;
|
||||
|
||||
{ If all is fine, we don't need to encode any more characters }
|
||||
end;
|
||||
|
||||
{ Invalid UTF-8 bytes and lead bytes without continuation bytes }
|
||||
$C0..$FF:
|
||||
begin
|
||||
DoASCII := True;
|
||||
Break;
|
||||
end;
|
||||
end;
|
||||
|
||||
Dec(X);
|
||||
end;
|
||||
|
||||
{ UTF-8 failed, so encode the string as plain ASCII }
|
||||
if DoASCII then
|
||||
begin
|
||||
{ Reset the flags and Result }
|
||||
needs_quoting := False;
|
||||
Result := S;
|
||||
in_quotes := False;
|
||||
add_end_quote := True;
|
||||
|
||||
for X := Length(Result) downto 1 do
|
||||
begin
|
||||
CurrentChar := Ord(Result[X]);
|
||||
|
||||
{ Control characters and extended characters need special handling }
|
||||
case CurrentChar of
|
||||
$00..$1F, $7F:
|
||||
EncodeControlChar(CurrentChar);
|
||||
|
||||
$20..$7E:
|
||||
EncodeStandardChar(CurrentChar);
|
||||
|
||||
{ Extended characters }
|
||||
else
|
||||
EncodeExtendedChar(CurrentChar);
|
||||
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
if needs_quoting then
|
||||
begin
|
||||
if in_quotes then
|
||||
Result := '''' + Result;
|
||||
|
||||
if add_end_quote then
|
||||
Result := Result + '''';
|
||||
end;
|
||||
end;
|
||||
{$endif DEBUG_NODE_XML}
|
||||
|
||||
|
||||
initialization
|
||||
constexp.internalerrorproc:=@internalerror;
|
||||
finalization
|
||||
|
Loading…
Reference in New Issue
Block a user