mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-03 14:10:24 +02:00
utils: patch to unicode utils from Inoussa:
The CLDR parser now actually parses the selected collation type only. This should significantly reduce the memory consumption. git-svn-id: trunk@23883 -
This commit is contained in:
parent
be1fcc8e3d
commit
f12a0b7ece
@ -144,6 +144,8 @@ type
|
|||||||
|
|
||||||
TCldrCollation = class;
|
TCldrCollation = class;
|
||||||
|
|
||||||
|
{ TCldrCollationItem }
|
||||||
|
|
||||||
TCldrCollationItem = class
|
TCldrCollationItem = class
|
||||||
private
|
private
|
||||||
FBackwards: Boolean;
|
FBackwards: Boolean;
|
||||||
@ -153,6 +155,7 @@ type
|
|||||||
FRules: TReorderSequenceArray;
|
FRules: TReorderSequenceArray;
|
||||||
FTypeName: string;
|
FTypeName: string;
|
||||||
public
|
public
|
||||||
|
procedure Clear();
|
||||||
property Parent : TCldrCollation read FParent;
|
property Parent : TCldrCollation read FParent;
|
||||||
property TypeName : string read FTypeName write FTypeName;
|
property TypeName : string read FTypeName write FTypeName;
|
||||||
property Base : string read FBase write FBase;
|
property Base : string read FBase write FBase;
|
||||||
@ -187,6 +190,8 @@ type
|
|||||||
property Items[Index : Integer] : TCldrCollationItem read GetItem;
|
property Items[Index : Integer] : TCldrCollationItem read GetItem;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
TCldrParserMode = (HeaderParsing, FullParsing);
|
||||||
|
|
||||||
function ComputeWeigths(
|
function ComputeWeigths(
|
||||||
const AData : PReorderUnit;
|
const AData : PReorderUnit;
|
||||||
const ADataLen : Integer;
|
const ADataLen : Integer;
|
||||||
@ -1104,6 +1109,17 @@ begin
|
|||||||
Result := locNotFound;
|
Result := locNotFound;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
{ TCldrCollationItem }
|
||||||
|
|
||||||
|
procedure TCldrCollationItem.Clear();
|
||||||
|
begin
|
||||||
|
FBackwards := False;
|
||||||
|
FBase := '';
|
||||||
|
FChangedFields := [];
|
||||||
|
SetLength(FRules,0);
|
||||||
|
FTypeName := '';
|
||||||
|
end;
|
||||||
|
|
||||||
{ TCldrCollation }
|
{ TCldrCollation }
|
||||||
|
|
||||||
function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
|
function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
|
||||||
|
@ -102,6 +102,7 @@ var
|
|||||||
i , c: Integer;
|
i , c: Integer;
|
||||||
collation : TCldrCollation;
|
collation : TCldrCollation;
|
||||||
dataPath, outputPath : string;
|
dataPath, outputPath : string;
|
||||||
|
collationItem : TCldrCollationItem;
|
||||||
begin
|
begin
|
||||||
{$ifdef test_suite}
|
{$ifdef test_suite}
|
||||||
exec_tests();
|
exec_tests();
|
||||||
@ -150,17 +151,20 @@ begin
|
|||||||
endianStream := nil;
|
endianStream := nil;
|
||||||
collation := TCldrCollation.Create();
|
collation := TCldrCollation.Create();
|
||||||
try
|
try
|
||||||
ParseCollationDocument(collationFileName,collation);
|
ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
|
||||||
WriteLn(Format(' Collation Count = %d',[collation.ItemCount]));
|
WriteLn(Format(' Collation Count = %d',[collation.ItemCount]));
|
||||||
if (collation.ItemCount = 0) then begin
|
if (collation.ItemCount = 0) then begin
|
||||||
WriteLn('No collation in this file.');
|
WriteLn('No collation in this file.');
|
||||||
end else begin
|
end else begin
|
||||||
for i := 0 to collation.ItemCount - 1 do
|
for i := 0 to collation.ItemCount - 1 do
|
||||||
WriteLn(Format(' Item[%d] = %d "resets"; Type = %s',[i, Length(collation.Items[i].Rules),collation.Items[i].TypeName]));
|
WriteLn(Format(' Item[%d] = (Type = %s)',[i, collation.Items[i].TypeName]));
|
||||||
if (collation.Find(collationTypeName) = nil) then
|
collationItem := collation.Find(collationTypeName);
|
||||||
|
if (collationItem = nil) then begin
|
||||||
collationTypeName := FindCollationDefaultItemName(collation);
|
collationTypeName := FindCollationDefaultItemName(collation);
|
||||||
WriteLn('Collation Item Name : ',collationTypeName);
|
collationItem := collation.Find(collationTypeName);
|
||||||
|
end;
|
||||||
|
WriteLn(Format('Parsing Collation Item "%s" ...',[collationTypeName]));
|
||||||
|
ParseCollationDocument(collationFileName,collationItem,collationTypeName);
|
||||||
|
|
||||||
s := dataPath + 'UCA_Rules_SHORT.xml';
|
s := dataPath + 'UCA_Rules_SHORT.xml';
|
||||||
WriteLn;
|
WriteLn;
|
||||||
|
@ -32,13 +32,33 @@ uses
|
|||||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
|
procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
|
||||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
|
procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
|
||||||
|
|
||||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
procedure ParseCollationDocument(
|
||||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
ADoc : TDOMDocument;
|
||||||
|
ACollation : TCldrCollation;
|
||||||
|
AMode : TCldrParserMode
|
||||||
|
);overload;
|
||||||
|
procedure ParseCollationDocument(
|
||||||
|
const AFileName : string;
|
||||||
|
ACollation : TCldrCollation;
|
||||||
|
AMode : TCldrParserMode
|
||||||
|
);overload;
|
||||||
|
|
||||||
|
procedure ParseCollationDocument(
|
||||||
|
const AFileName : string;
|
||||||
|
ACollation : TCldrCollationItem;
|
||||||
|
AType : string
|
||||||
|
);overload;
|
||||||
|
procedure ParseCollationDocument(
|
||||||
|
ADoc : TDOMDocument;
|
||||||
|
ACollation : TCldrCollationItem;
|
||||||
|
AType : string
|
||||||
|
);overload;
|
||||||
|
|
||||||
resourcestring
|
resourcestring
|
||||||
sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
|
sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
|
||||||
sCodePointExpected = 'Code Point node expected as child at this position "%d".';
|
sCodePointExpected = 'Code Point node expected as child at this position "%d".';
|
||||||
sCollationsNodeNotFound = '"collations" node not found.';
|
sCollationsNodeNotFound = '"collations" node not found.';
|
||||||
|
sCollationTypeNotFound = 'collation "Type" not found : "%s".';
|
||||||
sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
|
sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
|
||||||
sInvalidResetClause = 'Invalid "Reset" clause.';
|
sInvalidResetClause = 'Invalid "Reset" clause.';
|
||||||
sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
|
sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
|
||||||
@ -500,7 +520,11 @@ begin
|
|||||||
SetLength(r,0);
|
SetLength(r,0);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure ParseCollationItem(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
|
procedure ParseCollationItem(
|
||||||
|
ACollationNode : TDOMElement;
|
||||||
|
AItem : TCldrCollationItem;
|
||||||
|
AMode : TCldrParserMode
|
||||||
|
);
|
||||||
var
|
var
|
||||||
n : TDOMNode;
|
n : TDOMNode;
|
||||||
rulesElement : TDOMElement;
|
rulesElement : TDOMElement;
|
||||||
@ -515,7 +539,8 @@ begin
|
|||||||
AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
|
AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
|
||||||
if AItem.Backwards then
|
if AItem.Backwards then
|
||||||
AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
|
AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
|
||||||
|
AItem.Rules := nil;
|
||||||
|
if (AMode = TCldrParserMode.FullParsing) then begin
|
||||||
SetLength(statementList,15);
|
SetLength(statementList,15);
|
||||||
sal := 0;
|
sal := 0;
|
||||||
statement := @statementList[0];
|
statement := @statementList[0];
|
||||||
@ -550,8 +575,13 @@ begin
|
|||||||
SetLength(statementList,sal);
|
SetLength(statementList,sal);
|
||||||
AItem.Rules := statementList;
|
AItem.Rules := statementList;
|
||||||
end;
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
procedure ParseCollationDocument(
|
||||||
|
ADoc : TDOMDocument;
|
||||||
|
ACollation : TCldrCollation;
|
||||||
|
AMode : TCldrParserMode
|
||||||
|
);
|
||||||
var
|
var
|
||||||
rulesNodes, n : TDOMNode;
|
rulesNodes, n : TDOMNode;
|
||||||
collationsElement, rulesElement : TDOMElement;
|
collationsElement, rulesElement : TDOMElement;
|
||||||
@ -576,7 +606,7 @@ begin
|
|||||||
n := nl[i];
|
n := nl[i];
|
||||||
if (n.NodeName = s_COLLATION) then begin
|
if (n.NodeName = s_COLLATION) then begin
|
||||||
item := TCldrCollationItem.Create();
|
item := TCldrCollationItem.Create();
|
||||||
ParseCollationItem((n as TDOMElement),item);
|
ParseCollationItem((n as TDOMElement),item,AMode);
|
||||||
ACollation.Add(item);
|
ACollation.Add(item);
|
||||||
item := nil;
|
item := nil;
|
||||||
end
|
end
|
||||||
@ -588,6 +618,25 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure ParseCollationDocument(
|
||||||
|
ADoc : TDOMDocument;
|
||||||
|
ACollation : TCldrCollationItem;
|
||||||
|
AType : string
|
||||||
|
);
|
||||||
|
var
|
||||||
|
xv : TXPathVariable;
|
||||||
|
begin
|
||||||
|
xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
|
||||||
|
try
|
||||||
|
if (xv.AsNodeSet.Count = 0) then
|
||||||
|
raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
|
||||||
|
ACollation.Clear();
|
||||||
|
ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
|
||||||
|
finally
|
||||||
|
xv.Free();
|
||||||
|
end
|
||||||
|
end;
|
||||||
|
|
||||||
function ReadXMLFile(f: TStream) : TXMLDocument;
|
function ReadXMLFile(f: TStream) : TXMLDocument;
|
||||||
var
|
var
|
||||||
src : TXMLInputSource;
|
src : TXMLInputSource;
|
||||||
@ -618,17 +667,37 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
procedure ParseCollationDocument(
|
||||||
|
const AFileName : string;
|
||||||
|
ACollation : TCldrCollation;
|
||||||
|
AMode : TCldrParserMode
|
||||||
|
);
|
||||||
var
|
var
|
||||||
doc : TXMLDocument;
|
doc : TXMLDocument;
|
||||||
begin
|
begin
|
||||||
doc := ReadXMLFile(AFileName);
|
doc := ReadXMLFile(AFileName);
|
||||||
try
|
try
|
||||||
ParseCollationDocument(doc,ACollation);
|
ParseCollationDocument(doc,ACollation,AMode);
|
||||||
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
|
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
|
||||||
finally
|
finally
|
||||||
doc.Free();
|
doc.Free();
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure ParseCollationDocument(
|
||||||
|
const AFileName : string;
|
||||||
|
ACollation : TCldrCollationItem;
|
||||||
|
AType : string
|
||||||
|
);
|
||||||
|
var
|
||||||
|
doc : TXMLDocument;
|
||||||
|
begin
|
||||||
|
doc := ReadXMLFile(AFileName);
|
||||||
|
try
|
||||||
|
ParseCollationDocument(doc,ACollation,AType);
|
||||||
|
finally
|
||||||
|
doc.Free();
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
|
Loading…
Reference in New Issue
Block a user