mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-09 07:28:26 +02:00
utils: patch to unicode utils from Inoussa:
The CLDR parser now actually parses the selected collation type only. This should significantly reduce the memory consumption. git-svn-id: trunk@23883 -
This commit is contained in:
parent
be1fcc8e3d
commit
f12a0b7ece
@ -144,6 +144,8 @@ type
|
||||
|
||||
TCldrCollation = class;
|
||||
|
||||
{ TCldrCollationItem }
|
||||
|
||||
TCldrCollationItem = class
|
||||
private
|
||||
FBackwards: Boolean;
|
||||
@ -153,6 +155,7 @@ type
|
||||
FRules: TReorderSequenceArray;
|
||||
FTypeName: string;
|
||||
public
|
||||
procedure Clear();
|
||||
property Parent : TCldrCollation read FParent;
|
||||
property TypeName : string read FTypeName write FTypeName;
|
||||
property Base : string read FBase write FBase;
|
||||
@ -187,6 +190,8 @@ type
|
||||
property Items[Index : Integer] : TCldrCollationItem read GetItem;
|
||||
end;
|
||||
|
||||
TCldrParserMode = (HeaderParsing, FullParsing);
|
||||
|
||||
function ComputeWeigths(
|
||||
const AData : PReorderUnit;
|
||||
const ADataLen : Integer;
|
||||
@ -1104,6 +1109,17 @@ begin
|
||||
Result := locNotFound;
|
||||
end;
|
||||
|
||||
{ TCldrCollationItem }
|
||||
|
||||
procedure TCldrCollationItem.Clear();
|
||||
begin
|
||||
FBackwards := False;
|
||||
FBase := '';
|
||||
FChangedFields := [];
|
||||
SetLength(FRules,0);
|
||||
FTypeName := '';
|
||||
end;
|
||||
|
||||
{ TCldrCollation }
|
||||
|
||||
function TCldrCollation.GetItem(Index : Integer): TCldrCollationItem;
|
||||
|
@ -102,6 +102,7 @@ var
|
||||
i , c: Integer;
|
||||
collation : TCldrCollation;
|
||||
dataPath, outputPath : string;
|
||||
collationItem : TCldrCollationItem;
|
||||
begin
|
||||
{$ifdef test_suite}
|
||||
exec_tests();
|
||||
@ -150,17 +151,20 @@ begin
|
||||
endianStream := nil;
|
||||
collation := TCldrCollation.Create();
|
||||
try
|
||||
ParseCollationDocument(collationFileName,collation);
|
||||
ParseCollationDocument(collationFileName,collation,TCldrParserMode.HeaderParsing);
|
||||
WriteLn(Format(' Collation Count = %d',[collation.ItemCount]));
|
||||
if (collation.ItemCount = 0) then begin
|
||||
WriteLn('No collation in this file.');
|
||||
end else begin
|
||||
for i := 0 to collation.ItemCount - 1 do
|
||||
WriteLn(Format(' Item[%d] = %d "resets"; Type = %s',[i, Length(collation.Items[i].Rules),collation.Items[i].TypeName]));
|
||||
if (collation.Find(collationTypeName) = nil) then
|
||||
WriteLn(Format(' Item[%d] = (Type = %s)',[i, collation.Items[i].TypeName]));
|
||||
collationItem := collation.Find(collationTypeName);
|
||||
if (collationItem = nil) then begin
|
||||
collationTypeName := FindCollationDefaultItemName(collation);
|
||||
WriteLn('Collation Item Name : ',collationTypeName);
|
||||
|
||||
collationItem := collation.Find(collationTypeName);
|
||||
end;
|
||||
WriteLn(Format('Parsing Collation Item "%s" ...',[collationTypeName]));
|
||||
ParseCollationDocument(collationFileName,collationItem,collationTypeName);
|
||||
|
||||
s := dataPath + 'UCA_Rules_SHORT.xml';
|
||||
WriteLn;
|
||||
|
@ -32,13 +32,33 @@ uses
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
|
||||
procedure ParseInitialDocument(ASequence : POrderedCharacters; AFileName : string);overload;
|
||||
|
||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
||||
procedure ParseCollationDocument(
|
||||
ADoc : TDOMDocument;
|
||||
ACollation : TCldrCollation;
|
||||
AMode : TCldrParserMode
|
||||
);overload;
|
||||
procedure ParseCollationDocument(
|
||||
const AFileName : string;
|
||||
ACollation : TCldrCollation;
|
||||
AMode : TCldrParserMode
|
||||
);overload;
|
||||
|
||||
procedure ParseCollationDocument(
|
||||
const AFileName : string;
|
||||
ACollation : TCldrCollationItem;
|
||||
AType : string
|
||||
);overload;
|
||||
procedure ParseCollationDocument(
|
||||
ADoc : TDOMDocument;
|
||||
ACollation : TCldrCollationItem;
|
||||
AType : string
|
||||
);overload;
|
||||
|
||||
resourcestring
|
||||
sCaseNothandled = 'This case is not handled : "%s", Position = %d.';
|
||||
sCodePointExpected = 'Code Point node expected as child at this position "%d".';
|
||||
sCollationsNodeNotFound = '"collations" node not found.';
|
||||
sCollationTypeNotFound = 'collation "Type" not found : "%s".';
|
||||
sHexAttributeExpected = '"hex" attribute expected at this position "%d".';
|
||||
sInvalidResetClause = 'Invalid "Reset" clause.';
|
||||
sNodeNameAssertMessage = 'Expected NodeName "%s", got "%s".';
|
||||
@ -500,7 +520,11 @@ begin
|
||||
SetLength(r,0);
|
||||
end;
|
||||
|
||||
procedure ParseCollationItem(ACollationNode : TDOMElement; AItem : TCldrCollationItem);
|
||||
procedure ParseCollationItem(
|
||||
ACollationNode : TDOMElement;
|
||||
AItem : TCldrCollationItem;
|
||||
AMode : TCldrParserMode
|
||||
);
|
||||
var
|
||||
n : TDOMNode;
|
||||
rulesElement : TDOMElement;
|
||||
@ -515,43 +539,49 @@ begin
|
||||
AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
|
||||
if AItem.Backwards then
|
||||
AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWard];
|
||||
|
||||
SetLength(statementList,15);
|
||||
sal := 0;
|
||||
statement := @statementList[0];
|
||||
s := EvaluateXPathStr('suppress_contractions',ACollationNode);
|
||||
if (s <> '') then begin
|
||||
if (ParseDeletion(s,statement) > 0) then begin
|
||||
Inc(sal);
|
||||
Inc(statement);
|
||||
end else begin
|
||||
statement^.Clear();
|
||||
end;
|
||||
end;
|
||||
n := ACollationNode.FindNode(s_RULES);
|
||||
if (n <> nil) then begin
|
||||
rulesElement := n as TDOMElement;
|
||||
c := rulesElement.ChildNodes.Count;
|
||||
nextPos := 0;
|
||||
i := 0;
|
||||
while (i < c) do begin
|
||||
statement^.Clear();
|
||||
if not ParseStatement(rulesElement,i,statement,nextPos) then
|
||||
Break;
|
||||
i := nextPos;
|
||||
Inc(statement);
|
||||
Inc(sal);
|
||||
if (sal >= Length(statementList)) then begin
|
||||
SetLength(statementList,(sal*2));
|
||||
statement := @statementList[(sal-1)];
|
||||
AItem.Rules := nil;
|
||||
if (AMode = TCldrParserMode.FullParsing) then begin
|
||||
SetLength(statementList,15);
|
||||
sal := 0;
|
||||
statement := @statementList[0];
|
||||
s := EvaluateXPathStr('suppress_contractions',ACollationNode);
|
||||
if (s <> '') then begin
|
||||
if (ParseDeletion(s,statement) > 0) then begin
|
||||
Inc(sal);
|
||||
Inc(statement);
|
||||
end else begin
|
||||
statement^.Clear();
|
||||
end;
|
||||
end;
|
||||
n := ACollationNode.FindNode(s_RULES);
|
||||
if (n <> nil) then begin
|
||||
rulesElement := n as TDOMElement;
|
||||
c := rulesElement.ChildNodes.Count;
|
||||
nextPos := 0;
|
||||
i := 0;
|
||||
while (i < c) do begin
|
||||
statement^.Clear();
|
||||
if not ParseStatement(rulesElement,i,statement,nextPos) then
|
||||
Break;
|
||||
i := nextPos;
|
||||
Inc(statement);
|
||||
Inc(sal);
|
||||
if (sal >= Length(statementList)) then begin
|
||||
SetLength(statementList,(sal*2));
|
||||
statement := @statementList[(sal-1)];
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(statementList,sal);
|
||||
AItem.Rules := statementList;
|
||||
end;
|
||||
SetLength(statementList,sal);
|
||||
AItem.Rules := statementList;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(ADoc : TDOMDocument; ACollation : TCldrCollation);
|
||||
procedure ParseCollationDocument(
|
||||
ADoc : TDOMDocument;
|
||||
ACollation : TCldrCollation;
|
||||
AMode : TCldrParserMode
|
||||
);
|
||||
var
|
||||
rulesNodes, n : TDOMNode;
|
||||
collationsElement, rulesElement : TDOMElement;
|
||||
@ -576,7 +606,7 @@ begin
|
||||
n := nl[i];
|
||||
if (n.NodeName = s_COLLATION) then begin
|
||||
item := TCldrCollationItem.Create();
|
||||
ParseCollationItem((n as TDOMElement),item);
|
||||
ParseCollationItem((n as TDOMElement),item,AMode);
|
||||
ACollation.Add(item);
|
||||
item := nil;
|
||||
end
|
||||
@ -588,6 +618,25 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(
|
||||
ADoc : TDOMDocument;
|
||||
ACollation : TCldrCollationItem;
|
||||
AType : string
|
||||
);
|
||||
var
|
||||
xv : TXPathVariable;
|
||||
begin
|
||||
xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
|
||||
try
|
||||
if (xv.AsNodeSet.Count = 0) then
|
||||
raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
|
||||
ACollation.Clear();
|
||||
ParseCollationItem((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
|
||||
finally
|
||||
xv.Free();
|
||||
end
|
||||
end;
|
||||
|
||||
function ReadXMLFile(f: TStream) : TXMLDocument;
|
||||
var
|
||||
src : TXMLInputSource;
|
||||
@ -618,17 +667,37 @@ begin
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(const AFileName : string; ACollation : TCldrCollation);
|
||||
procedure ParseCollationDocument(
|
||||
const AFileName : string;
|
||||
ACollation : TCldrCollation;
|
||||
AMode : TCldrParserMode
|
||||
);
|
||||
var
|
||||
doc : TXMLDocument;
|
||||
begin
|
||||
doc := ReadXMLFile(AFileName);
|
||||
try
|
||||
ParseCollationDocument(doc,ACollation);
|
||||
ParseCollationDocument(doc,ACollation,AMode);
|
||||
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
|
||||
finally
|
||||
doc.Free();
|
||||
end;
|
||||
end;
|
||||
|
||||
procedure ParseCollationDocument(
|
||||
const AFileName : string;
|
||||
ACollation : TCldrCollationItem;
|
||||
AType : string
|
||||
);
|
||||
var
|
||||
doc : TXMLDocument;
|
||||
begin
|
||||
doc := ReadXMLFile(AFileName);
|
||||
try
|
||||
ParseCollationDocument(doc,ACollation,AType);
|
||||
finally
|
||||
doc.Free();
|
||||
end;
|
||||
end;
|
||||
|
||||
end.
|
||||
|
Loading…
Reference in New Issue
Block a user