fpc/utils/unicode/cldrxml.pas

1368 lines
37 KiB
ObjectPascal

{ Parser of the CLDR collation xml files.
Copyright (c) 2013, 2014, 2015 by Inoussa OUEDRAOGO
The source code is distributed under the Library GNU
General Public License with the following modification:
- object files and libraries linked into an application may be
distributed without source code.
If you didn't receive a copy of the file COPYING, contact:
Free Software Foundation
675 Mass Ave
Cambridge, MA 02139
USA
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
}
{ The procedure whoses names lasted by 'XML' (ParseInitialDocumentXML,
ParseCollationDocumentXML, ...) are for older CLDR versions (CDLR <= 23); The
old version was unsing a XML syntax for collation's rules specifications.
The new versions (and going forward) will be using the text syntax.
}
unit cldrxml;
{$mode delphi}{$H+}
{$TypedAddress on}
interface
uses
Classes, SysUtils, DOM,
cldrhelper;
type
{ TCldrCollationFileLoader }
TCldrCollationFileLoader = class(TInterfacedObject,ICldrCollationLoader)
private
FPath : string;
private
procedure SetPath(APath : string);
function BuildFileName(ALanguage : string) : string;
procedure CheckFile(AFileName : string);
protected
procedure LoadCollation(
const ALanguage : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
procedure LoadCollationType(
const ALanguage,
ATypeName : string;
AType : TCldrCollationItem
);
public
constructor Create(APath : string);
end;
{ TCldrCollationStreamLoader }
TCldrCollationStreamLoader = class(TInterfacedObject,ICldrCollationLoader)
private
FLanguages : array of string;
FStreams : array of TStream;
private
procedure CheckContent(ALanguage : string);
function IndexOf(ALanguage : string) : Integer;
protected
procedure LoadCollation(
const ALanguage : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
procedure LoadCollationType(
const ALanguage,
ATypeName : string;
AType : TCldrCollationItem
);
public
constructor Create(
const ALanguages : array of string;
const AStreams : array of TStream
);
destructor Destroy();override;
end;
procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);overload;
procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);overload;
procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
);overload;
procedure ParseCollationDocumentXML(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
procedure ParseCollationDocumentXML(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
);overload;
//-----------------------------------------------------
procedure ParseCollationDocument2(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
procedure ParseCollationDocument2(
AStream : TStream;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);overload;
procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
);overload;
procedure ParseCollationDocument2(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
);overload;
procedure ParseCollationDocument2(
AStream : TStream;
ACollation : TCldrCollationItem;
AType : string
);overload;
implementation
uses
typinfo, RtlConsts, XMLRead, XPath, Helper, unicodeset, cldrtxt;
const
s_ALT = 'alt';
s_AT = 'at';
//s_BEFORE = 'before';
s_CODEPOINT = 'codepoint';
s_COLLATION = 'collation';
s_COLLATIONS = 'collations';
s_CONTEXT = 'context';
//s_DEFAULT = 'default';
s_EXTEND = 'extend';
s_HEX = 'hex';
s_POSITION = 'position';
s_RESET = 'reset';
s_RULES = 'rules';
//s_STANDART = 'standard';
s_TYPE = 'type';
s_CR = 'cr';
procedure CheckNodeName(ANode : TDOMNode; const AExpectedName : DOMString);
begin
if (ANode.NodeName <> AExpectedName) then
raise Exception.CreateFmt(sNodeNameAssertMessage,[AExpectedName,ANode.NodeName]);
end;
function CharToReorderWeigthKind(const AChar : Char) : TReorderWeigthKind;inline;
begin
case AChar of
'p' : Result := TReorderWeigthKind.PriMary;
's' : Result := TReorderWeigthKind.Secondary;
't' : Result := TReorderWeigthKind.Tertiary;
'i' : Result := TReorderWeigthKind.Identity;
else
Result := TReorderWeigthKind.Identity;
end;
end;
function DomString2UnicodeCodePointArray(const AValue : DOMString): TUnicodeCodePointArray;
var
u4str : UCS4String;
k : Integer;
begin
if (Length(AValue) = 0) then
exit(nil);
if (Length(AValue) = 1) then begin
SetLength(Result,1);
Result[0] := Ord(AValue[1])
end else begin
u4str := WideStringToUCS4String(AValue);
k := Length(u4str) - 1; // remove the last #0
SetLength(Result,k);
for k := 0 to k - 1 do
Result[k] := u4str[k];
end;
end;
function ParseStatementXML(
ARules : TDOMElement;
AStartPosition : Integer;
AStatement : PReorderSequence;
var ANextPos : Integer
) : Boolean;
var
startPosition : Integer;
statement : PReorderSequence;
elementActualCount : Integer;
list : TDOMNodeList;
inBlock : Boolean;
procedure SkipComments();
begin
while (startPosition < list.Count) do begin
if (list[startPosition].NodeType <> COMMENT_NODE) then
Break;
Inc(startPosition);
end;
end;
function parse_reset() : Integer;
var
n, t : TDOMNode;
s : string;
logicalPos : TReorderLogicalReset;
begin
SkipComments();
n := list[startPosition];
CheckNodeName(n,s_RESET);
if n.HasChildNodes() then begin
n := n.FirstChild;
if (n.NodeType = TEXT_NODE) then begin
statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(n).Data));
Result := startPosition+1;
end else begin
if not TryStrToLogicalReorder(n.NodeName,logicalPos) then
raise Exception.CreateFmt(sUnknownResetLogicalPosition,[n.NodeName]);
statement^.LogicalPosition := logicalPos;
Result := startPosition+1;
end;
end else if not n.HasChildNodes() then begin
if (list[startPosition+1].NodeName = s_POSITION) then begin
s := list[startPosition+1].Attributes.GetNamedItem(s_AT).NodeValue;
if not TryStrToLogicalReorder(s,logicalPos) then
raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
statement^.LogicalPosition := logicalPos;
Result := startPosition+2;
end else begin
t := list[startPosition+1];
{if (t.NodeType <> TEXT_NODE) then
raise Exception.CreateFmt(sTextNodeChildExpected,[(startPosition+1),(t.NodeName+'('+t.ClassName+')')]);}
if (t.NodeType = TEXT_NODE) then
statement^.Reset := DomString2UnicodeCodePointArray(Trim(TDOMText(t).Data))
else
statement^.Reset := DomString2UnicodeCodePointArray(' ');
Result := startPosition+2;
end;
end;
if (statement^.LogicalPosition = TReorderLogicalReset.None) and
(Length(statement^.Reset) = 0)
then
raise Exception.Create(sInvalidResetClause);
end;
procedure EnsureElementLength(const ALength : Integer);
var
k, d : Integer;
begin
k := Length(statement^.Elements);
if (k < ALength) then begin
k := ALength;
if (k = 0) then begin
k := 50;
end else begin
if (k < 10) then
d := 10
else
d := 2;
k := k * d;
end;
SetLength(statement^.Elements,k);
end;
end;
procedure AddElement(
const AChars : array of UCS4Char;
const AWeigthKind : TReorderWeigthKind;
const AContext : DOMString
);overload;
var
kp : PReorderUnit;
k : Integer;
begin
EnsureElementLength(elementActualCount+1);
kp := @statement^.Elements[elementActualCount];
SetLength(kp^.Characters,Length(AChars));
for k := 0 to Length(AChars) - 1 do
kp^.Characters[k] := AChars[k];
kp^.WeigthKind := AWeigthKind;
elementActualCount := elementActualCount + 1;
if (AContext <> '') then
kp^.Context := DomString2UnicodeCodePointArray(AContext);
end;
procedure ReadChars(
ANode : TDOMNode;
APos : Integer;
var AChars : UCS4String
);
var
t : TDOMNode;
u4str : UCS4String;
s : DOMString;
begin
if not ANode.HasChildNodes() then begin
SetLength(AChars,1);
AChars[0] := Ord(UnicodeChar(' '));
exit;
//raise Exception.CreateFmt(sCodePointExpected + ANode.ClassName,[APos]);
end;
t := ANode.FindNode(s_CODEPOINT);
if (t = nil) then begin
if (ANode.ChildNodes.Count <> 1) then
raise Exception.CreateFmt(sUniqueChildNodeExpected,[APos]);
t := ANode.ChildNodes[0];
if not t.InheritsFrom(TDOMText) then
raise Exception.CreateFmt(sTextNodeChildExpected,[APos,(t.NodeName+'('+t.ClassName+')')]);
s := TDOMText(t).Data;
if (Length(s) = 1) then begin
SetLength(AChars,1);
AChars[0] := Ord(s[1]);
end else begin
u4str := WideStringToUCS4String(s);
AChars := u4str;
SetLength(AChars,Length(AChars)-1);
end;
end else begin
t := t.Attributes.GetNamedItem(s_HEX);
if (t = nil) then
raise Exception.CreateFmt(sHexAttributeExpected,[APos]);
SetLength(AChars,1);
AChars[0] := StrToInt('$'+t.NodeValue);
end
end;
procedure AddPrefixChars(const APrefix : array of UCS4Char; var ADest : TUnicodeCodePointArray);
var
k : Integer;
begin
k := Length(ADest);
SetLength(ADest,(k+Length(APrefix)));
Move(ADest[0],ADest[k+1],(SizeOf(k*ADest[0])));
for k := 0 to k - 1 do
ADest[k] := APrefix[k];
end;
function ReadNextItem(const APos : Integer) : Integer;
var
n, t : TDOMNode;
contextStr : DOMString;
w : TReorderWeigthKind;
isSimpleCharTag : Boolean;
simpleCharTag : AnsiChar;
last : PReorderUnit;
u4str : UCS4String;
k : Integer;
begin
contextStr := '';
Result := APos;
n := list[APos];
isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
if isSimpleCharTag then begin
simpleCharTag := AnsiChar(n.NodeName[1]);
if (simpleCharTag = 'x') then begin
inBlock := True;
n := n.FirstChild;
if (n.NodeName = s_CONTEXT) then begin
if n.HasChildNodes() then begin
t := n.FirstChild;
if (t.NodeType = TEXT_NODE) then
contextStr := TDOMText(t).Data;
end;
n := n.NextSibling;
end;
isSimpleCharTag := (Length(n.NodeName) = 1) and (Ord(n.NodeName[1])<=127);
if isSimpleCharTag then
simpleCharTag := AnsiChar(n.NodeName[1]);
end;
end;
if isSimpleCharTag and (simpleCharTag in ['p','s','t','i']) then begin
w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
ReadChars(n,APos,u4str);
AddElement(u4str,w,contextStr);
Result := Result + 1;
if not inBlock then
exit;
last := @statement^.Elements[elementActualCount-1];
n := n.NextSibling;
if (n <> nil) and (n.NodeName = s_EXTEND) then begin
ReadChars(n,APos,u4str);
SetLength(last^.ExpansionChars,Length(u4str));
for k := 0 to Length(u4str) - 1 do
last^.ExpansionChars[k] := u4str[k];
end;
exit;
end;
if (Length(n.NodeName) = 2) and (n.NodeName[2] = 'c') and
(Ord(n.NodeName[1])<=127) and (AnsiChar(n.NodeName[1]) in ['p','s','t','i'])
then begin
w := CharToReorderWeigthKind(AnsiChar(n.NodeName[1]));
ReadChars(n,APos,u4str);
for k := Low(u4str) to High(u4str) do
AddElement(u4str[k],w,contextStr);
Result := Result + 1;
exit;
end;
raise Exception.CreateFmt(sCaseNothandled,[n.NodeName,APos]);
end;
var
i, c : Integer;
n : TDOMNode;
begin
Result := False;
inBlock := False;
elementActualCount := 0;
if (AStartPosition <= 0) then
startPosition := 0
else
startPosition := AStartPosition;
i := startPosition;
list := ARules.ChildNodes;
c := list.Count;
if (c <= i) then
exit;
statement := AStatement;
statement^.Clear();
n := list[i];
i := parse_reset();
while (i < c) do begin
n := list[i];
if (n.NodeName = s_RESET) then
Break;
i := ReadNextItem(i);
end;
SetLength(statement^.Elements,elementActualCount);
Result := (i > startPosition);
if Result then
ANextPos := i;
end;
procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; ADoc : TDOMDocument);
var
n : TDOMNode;
rulesElement : TDOMElement;
i, c, nextPost : Integer;
statement : TReorderSequence;
p : PReorderUnit;
begin
n := ADoc.DocumentElement.FindNode(s_RULES);
if (n = nil) then
raise Exception.Create(sRulesNodeNotFound);
rulesElement := n as TDOMElement;
c := rulesElement.ChildNodes.Count;
ASequence^.Clear();
SetLength(ASequence^.Data,c+100);
nextPost := 0;
i := 0;
while (i < c) do begin
statement.Clear();
if not ParseStatementXML(rulesElement,i,@statement,nextPost) then
Break;
i := nextPost;
try
ASequence^.ApplyStatement(@statement);
except
on e : Exception do begin
e.Message := Format('%s Position = %d',[e.Message,i]);
raise;
end;
end;
end;
if (ASequence^.ActualLength > 0) then begin
p := @ASequence^.Data[0];
for i := 0 to ASequence^.ActualLength - 1 do begin
p^.Changed := False;
Inc(p);
end;
end;
end;
procedure ParseInitialDocumentXML(ASequence : POrderedCharacters; AFileName : string);
var
doc : TXMLDocument;
begin
ReadXMLFile(doc,AFileName);
try
ParseInitialDocumentXML(ASequence,doc);
finally
doc.Free();
end;
end;
function EvaluateXPathStr(const AExpression : string; AContextNode : TDOMNode): DOMString;
var
xv : TXPathVariable;
begin
xv := EvaluateXPathExpression(AExpression,AContextNode);
try
if (xv <> nil) then
Result := xv.AsText
else
Result := '';
finally
xv.Free();
end;
end;
function ParseDeletion(
const APattern : DOMString;
ASequence : PReorderSequence
) : Integer;
var
r : array of TReorderUnit;
c : Integer;
uset : TUnicodeSet;
it : TUnicodeSet.TIterator;
p : PReorderUnit;
begin
if (APattern = '') then
exit(0);
it := nil;
uset := TUnicodeSet.Create();
try
uset.AddPattern(APattern);
it := uset.CreateIterator();
c := 0;
it.Reset();
while it.MoveNext() do begin
Inc(c);
end;
SetLength(r,c);
p := @r[0];
it.Reset();
while it.MoveNext() do begin
p^.Clear();
p^.WeigthKind := TReorderWeigthKind.Deletion;
p^.Characters := Copy(it.GetCurrent());
Inc(p);
end;
ASequence^.Clear();
ASequence^.Elements := r;
finally
it.Free();
uset.Free();
end;
r := nil;
Result := c;
end;
procedure ParseCollationItemXML(
ACollationNode : TDOMElement;
AItem : TCldrCollationItem;
AMode : TCldrParserMode
);
var
n : TDOMNode;
rulesElement : TDOMElement;
i, c, nextPos : Integer;
statementList : TReorderSequenceArray;
sal : Integer;//statement actual length
statement : PReorderSequence;
s : DOMString;
begin
AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
AItem.Base := EvaluateXPathStr('base',ACollationNode);
AItem.Backwards := (EvaluateXPathStr('settings/@backwards',ACollationNode) = 'on');
if AItem.Backwards then
AItem.ChangedFields := AItem.ChangedFields + [TCollationField.BackWards];
AItem.Rules := nil;
if (AMode = TCldrParserMode.FullParsing) then begin
SetLength(statementList,15);
sal := 0;
statement := @statementList[0];
s := EvaluateXPathStr('suppress_contractions',ACollationNode);
if (s <> '') then begin
if (ParseDeletion(s,statement) > 0) then begin
Inc(sal);
Inc(statement);
end else begin
statement^.Clear();
end;
end;
n := ACollationNode.FindNode(s_RULES);
if (n <> nil) then begin
rulesElement := n as TDOMElement;
c := rulesElement.ChildNodes.Count;
nextPos := 0;
i := 0;
while (i < c) do begin
statement^.Clear();
if not ParseStatementXML(rulesElement,i,statement,nextPos) then
Break;
i := nextPos;
Inc(statement);
Inc(sal);
if (sal >= Length(statementList)) then begin
SetLength(statementList,(sal*2));
statement := @statementList[(sal-1)];
end;
end;
end;
SetLength(statementList,sal);
AItem.Rules := statementList;
end;
end;
function NextPart(
const ABuffer : string;
const AStartPos : Integer;
const ASeparator : Char;
out ANextStart : Integer
) : string;
var
c, sp, i : Integer;
begin
c := Length(ABuffer);
if (c < 1) or (AStartPos > c) then begin
ANextStart := c+1;
Result := '';
exit;
end;
if (AStartPos > 0) then
sp := AStartPos
else
sp := 1;
i := sp;
while (i <= c) do begin
if (ABuffer[i] = ASeparator) then
break;
i := i+1;
end;
Result := Copy(ABuffer,sp,(i-sp));
if (i <= c) then
i := i+1;
ANextStart := i;
end;
procedure HandleSetting_Import(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
var
buffer, lang, col, s : UTF8String;
i, ns : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
end;
buffer := ASetting^.Values[0];
lang := NextPart(buffer,1,'-',ns);
i := ns;
col := '';
s := NextPart(buffer,i,'-',ns);
if (s <> '') then begin
if (s <> 'u') then
raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
i := ns;
s := NextPart(buffer,i,'-',ns);
if (s <> 'co') then
raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
s := Trim(Copy(buffer,ns,(Length(buffer)-ns+1)));
if (s = '') then
raise Exception.CreateFmt(sInvalidImportStatement,[buffer]);
col := s;
end;
if (col = '') then
col := COLLATION_ITEM_DEFAULT;
if (LowerCase(lang) = 'und') then
lang := 'root';
AItem.Imports.Add(lang,col);
ASetting^.Understood := True;
end;
procedure HandleSetting_Backwards(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
var
buffer : UTF8String;
i : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidBackwardsStatement,[buffer]);
end;
if (ASetting^.Values[0] = '2') then
AItem.Backwards := True
else
raise Exception.CreateFmt(
sInvalidSettingValue,
[SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
);
AItem.ChangedFields := AItem.ChangedFields+[TCollationField.BackWards];
ASetting^.Understood := True;
end;
procedure HandleSetting_Alternate(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
var
buffer : UTF8String;
i : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidAlternateStatement,[buffer]);
end;
buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
if (buffer = 'non-ignorable') then
AItem.VariableWeight := ucaNonIgnorable
else if (buffer = 'shifted') then
AItem.VariableWeight := ucaShifted
else
raise Exception.CreateFmt(
sInvalidSettingValue,
[SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
);
AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Alternate];
ASetting^.Understood := True;
end;
procedure HandleSetting_Normalization(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
var
buffer : UTF8String;
i : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidNormalizationStatement,[buffer]);
end;
buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
if (buffer = 'off') then
AItem.Normalization := False
else if (buffer = 'on') then
AItem.Normalization := True
else
raise Exception.CreateFmt(
sInvalidSettingValue,
[SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
);
AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Normalization];
ASetting^.Understood := True;
end;
procedure HandleSetting_Strength(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
var
buffer : UTF8String;
i : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidStrengthStatement,[buffer]);
end;
buffer := UTF8String(LowerCase(UnicodeString(ASetting^.Values[0])));
if (buffer = '1') then
AItem.Strength := TComparisonStrength.Primary
else if (buffer = '2') then
AItem.Strength := TComparisonStrength.Secondary
else if (buffer = '3') then
AItem.Strength := TComparisonStrength.Tertiary
else if (buffer = '4') then
AItem.Strength := TComparisonStrength.Quaternary
else if (buffer = 'i') then
AItem.Strength := TComparisonStrength.Identity
else
raise Exception.CreateFmt(
sInvalidSettingValue,
[SETTING_OPTION_STRINGS[ASetting^.OptionValue],ASetting^.Values[0]]
);
AItem.ChangedFields := AItem.ChangedFields+[TCollationField.Strength];
ASetting^.Understood := True;
end;
procedure HandleSetting_EMPTY_PROC(
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
begin
//
end;
type
TSettingHandlerProc = procedure (
AItem : TCldrCollationItem;
ASetting : PSettingRec
);
const
SETTING_HANDLERS : array[TSettingOption] of TSettingHandlerProc =(
HandleSetting_EMPTY_PROC, HandleSetting_Strength, HandleSetting_Alternate,
//Unknown, Strength, Alternate,
HandleSetting_Backwards, HandleSetting_Normalization, HandleSetting_EMPTY_PROC,
//Backwards, Normalization, CaseLevel,
HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC,
//CaseFirst, HiraganaQ, NumericOrdering,
HandleSetting_EMPTY_PROC, HandleSetting_EMPTY_PROC, HandleSetting_Import,
//Reorder, MaxVariable Import
HandleSetting_EMPTY_PROC,
//SuppressContractions has a special handling see Process_SuppressContractions
HandleSetting_EMPTY_PROC
//Optimize
);
procedure HandleSettings(AItem : TCldrCollationItem);
var
i, c : Integer;
p : PSettingRec;
begin
c := Length(AItem.Settings);
if (c < 1) then
exit;
p := @AItem.Settings[0];
for i := 0 to c-1 do begin
SETTING_HANDLERS[p^.OptionValue](AItem,p);
Inc(p);
end;
end;
function Process_SuppressContractions(
ASetting : PSettingRec;
AStatement : PReorderSequence
) : Boolean;
var
buffer : UTF8String;
i : Integer;
begin
if (Length(ASetting^.Values) <> 1) then begin
buffer := '';
if (Length(ASetting^.Values) > 0) then begin
for i := 0 to Length(ASetting^.Values)-1 do
buffer := Format('%s + "%s"',[ASetting^.Values[i]]);
end;
raise Exception.CreateFmt(sInvalidSuppressContractionsStatement,[buffer]);
end;
Result := (ParseDeletion(DOMString(ASetting^.Values[0]),AStatement) > 0);
ASetting.Understood := Result;
end;
procedure ParseCollationItem2(
ACollationNode : TDOMElement;
AItem : TCldrCollationItem;
AMode : TCldrParserMode
);
var
statementList : TReorderSequenceArray;
sal : Integer;//statement actual length
statement : PReorderSequence;
procedure AddStatementToArray();
begin
Inc(statement);
Inc(sal);
if (sal >= Length(statementList)) then begin
SetLength(statementList,(sal*2));
statement := @statementList[(sal-1)];
end;
end;
var
n : TDOMNode;
rulesElement : TDOMCDATASection;
i, c, nextPos : Integer;
parsedStatement : TParsedStatement;
s : DOMString;
u8 : UTF8String;
buffer : PAnsiChar;
lineCount : Integer;
settingArray : TSettingRecArray;
begin
AItem.TypeName := ACollationNode.GetAttribute(s_TYPE);
AItem.Alt := ACollationNode.GetAttribute(s_ALT);
AItem.Settings := nil;
AItem.Rules := nil;
AItem.Mode := AMode;
if (AMode = TCldrParserMode.FullParsing) then begin
SetLength(statementList,15);
sal := 0;
statement := @statementList[0];
n := ACollationNode.FindNode(s_CR);
if (n <> nil) then begin
n := (n as TDOMElement).FirstChild;
rulesElement := n as TDOMCDATASection;
s := rulesElement.Data;
u8 := UTF8Encode(s);
c := Length(u8);
buffer := @u8[1];
nextPos := 0;
i := 0;
lineCount := 0;
Clear(parsedStatement);
settingArray := AItem.Settings;
while (i < c) do begin
statement^.Clear();
if not ParseStatement(buffer,i,c,@parsedStatement,nextPos,lineCount) then
Break;
if (parsedStatement.Kind = TStatementKind.Sequence) then begin
statement^.Assign(@parsedStatement.ReorderSequence);
AddStatementToArray();
end else if (parsedStatement.Kind = TStatementKind.Setting) then begin
if (parsedStatement.Setting.OptionValue = TSettingOption.SuppressContractions) then begin
if Process_SuppressContractions(@parsedStatement.Setting,statement) then
AddStatementToArray()
else
statement^.Clear();
end;
AddItem(settingArray,@parsedStatement.Setting);
end;
i := nextPos;
end;
AItem.Settings := settingArray;
if (Length(AItem.Settings) > 0) then
HandleSettings(AItem);
end;
SetLength(statementList,sal);
AItem.Rules := statementList;
end;
end;
procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
n : TDOMNode;
collationsElement : TDOMElement;
i, c : Integer;
item : TCldrCollationItem;
nl : TDOMNodeList;
begin
n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
if (n = nil) then
raise Exception.Create(sCollationsNodeNotFound);
collationsElement := n as TDOMElement;
ACollation.Clear();
ACollation.Mode := AMode;
ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
ACollation.DefaultType := EvaluateXPathStr('collations/default/@type',ADoc.DocumentElement);
if collationsElement.HasChildNodes() then begin
nl := collationsElement.ChildNodes;
c := nl.Count;
item := nil;
try
for i := 0 to c - 1 do begin
n := nl[i];
if (n.NodeName = s_COLLATION) then begin
item := TCldrCollationItem.Create();
ParseCollationItemXML((n as TDOMElement),item,AMode);
ACollation.Add(item);
item := nil;
end
end;
except
FreeAndNil(item);
raise;
end;
end;
end;
procedure ParseCollationDocumentXML(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
);
var
xv : TXPathVariable;
begin
xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
try
if (xv.AsNodeSet.Count = 0) then
raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
ACollation.Clear();
ParseCollationItemXML((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
finally
xv.Free();
end
end;
procedure ParseCollationDocument2(
ADoc : TDOMDocument;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
n : TDOMNode;
collationsElement : TDOMElement;
i, c : Integer;
item, tempItem : TCldrCollationItem;
nl : TDOMNodeList;
isnew : boolean;
begin
n := ADoc.DocumentElement.FindNode(s_COLLATIONS);
if (n = nil) then
raise Exception.Create(sCollationsNodeNotFound);
collationsElement := n as TDOMElement;
//ACollation.Clear();
ACollation.Mode := AMode;
ACollation.Language := EvaluateXPathStr('identity/language/@type',ADoc.DocumentElement);
ACollation.Version := EvaluateXPathStr('identity/version/@number',ADoc.DocumentElement);
ACollation.DefaultType := EvaluateXPathStr('collations/defaultCollation',ADoc.DocumentElement);
if collationsElement.HasChildNodes() then begin
nl := collationsElement.ChildNodes;
c := nl.Count;
tempItem := TCldrCollationItem.Create();
try
item := nil;
try
for i := 0 to c - 1 do begin
n := nl[i];
if (n.NodeName = s_COLLATION) then begin
tempItem.Clear();
ParseCollationItem2((n as TDOMElement),tempItem,TCldrParserMode.HeaderParsing);
item := ACollation.Find(tempItem.TypeName);
isnew := (item = nil);
if isnew then
item := TCldrCollationItem.Create();
if isnew or (item.Mode < AMode) then
ParseCollationItem2((n as TDOMElement),item,AMode);
if isnew then
ACollation.Add(item);
item := nil;
end
end;
except
FreeAndNil(item);
raise;
end;
finally
tempItem.Free();
end;
end;
end;
procedure ParseCollationDocument2(
ADoc : TDOMDocument;
ACollation : TCldrCollationItem;
AType : string
);
var
xv : TXPathVariable;
begin
xv := EvaluateXPathExpression(Format('collations/collation[@type=%s]',[QuotedStr(AType)]),ADoc.DocumentElement);
try
if (xv.AsNodeSet.Count = 0) then
raise Exception.CreateFmt(sCollationTypeNotFound,[AType]);
ACollation.Clear();
ParseCollationItem2((TDOMNode(xv.AsNodeSet[0]) as TDOMElement),ACollation,TCldrParserMode.FullParsing);
finally
xv.Free();
end
end;
function ReadXMLFile(f: TStream) : TXMLDocument;overload;
var
src : TXMLInputSource;
parser: TDOMParser;
begin
src := TXMLInputSource.Create(f);
parser := TDOMParser.Create();
try
parser.Options.IgnoreComments := True;
parser.Parse(src, Result);
finally
src.Free();
parser.Free;
end;
end;
function ReadXMLFile(const AFilename: String) : TXMLDocument;overload;
var
FileStream: TStream;
begin
Result := nil;
FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
try
Result := ReadXMLFile(FileStream);
finally
FileStream.Free;
end;
end;
procedure ParseCollationDocumentXML(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AFileName);
try
ParseCollationDocumentXML(doc,ACollation,AMode);
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
finally
doc.Free();
end;
end;
procedure ParseCollationDocumentXML(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AFileName);
try
ParseCollationDocumentXML(doc,ACollation,AType);
finally
doc.Free();
end;
end;
procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AFileName);
try
ParseCollationDocument2(doc,ACollation,AMode);
ACollation.LocalID := ExtractFileName(ChangeFileExt(AFileName,''));
finally
doc.Free();
end;
end;
procedure ParseCollationDocument2(
AStream : TStream;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AStream);
try
ParseCollationDocument2(doc,ACollation,AMode);
finally
doc.Free();
end;
end;
procedure ParseCollationDocument2(
const AFileName : string;
ACollation : TCldrCollationItem;
AType : string
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AFileName);
try
ParseCollationDocument2(doc,ACollation,AType);
finally
doc.Free();
end;
end;
procedure ParseCollationDocument2(
AStream : TStream;
ACollation : TCldrCollationItem;
AType : string
);
var
doc : TXMLDocument;
begin
doc := ReadXMLFile(AStream);
try
ParseCollationDocument2(doc,ACollation,AType);
finally
doc.Free();
end;
end;
{ TCldrCollationStreamLoader }
procedure TCldrCollationStreamLoader.CheckContent(ALanguage: string);
begin
if not FileExists(ALanguage) then
raise EFOpenError.CreateFmt(SFOpenError,[ALanguage]);
end;
function TCldrCollationStreamLoader.IndexOf(ALanguage: string): Integer;
var
i : Integer;
begin
for i := Low(FLanguages) to High(FLanguages) do begin
if (FLanguages[i] = ALanguage) then begin
Result := i;
exit;
end;
end;
Result := -1;
end;
procedure TCldrCollationStreamLoader.LoadCollation(
const ALanguage : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
i : Integer;
locStream : TStream;
begin
i := IndexOf(ALanguage);
if (i < 0) then
CheckContent(ALanguage);
locStream := FStreams[i];
locStream.Position := 0;
ParseCollationDocument2(locStream,ACollation,AMode);
end;
procedure TCldrCollationStreamLoader.LoadCollationType(
const ALanguage,
ATypeName : string;
AType : TCldrCollationItem
);
var
i : Integer;
locStream : TStream;
begin
i := IndexOf(ALanguage);
if (i < 0) then
CheckContent(ALanguage);
locStream := FStreams[i];
locStream.Position := 0;
ParseCollationDocument2(locStream,AType,ATypeName);
end;
constructor TCldrCollationStreamLoader.Create(
const ALanguages : array of string;
const AStreams : array of TStream
);
var
c, i : Integer;
begin
c := Length(ALanguages);
if (Length(AStreams) < c) then
c := Length(AStreams);
SetLength(FLanguages,c);
SetLength(FStreams,c);
for i := Low(ALanguages) to High(ALanguages) do begin
FLanguages[i] := ALanguages[i];
FStreams[i] := AStreams[i];
end;
end;
destructor TCldrCollationStreamLoader.Destroy();
var
i : Integer;
begin
for i := Low(FStreams) to High(FStreams) do
FreeAndNil(FStreams[i]);
end;
{ TCldrCollationFileLoader }
procedure TCldrCollationFileLoader.SetPath(APath: string);
var
s : string;
begin
if (APath = '') then
s := ''
else
s := IncludeTrailingPathDelimiter(APath);
if (s <> FPath) then
FPath := s;
end;
function TCldrCollationFileLoader.BuildFileName(ALanguage: string): string;
begin
Result := Format('%s%s.xml',[FPath,ALanguage]);
end;
procedure TCldrCollationFileLoader.CheckFile(AFileName: string);
begin
if not FileExists(AFileName) then
raise EFOpenError.CreateFmt(SFOpenError,[AFileName]);
end;
procedure TCldrCollationFileLoader.LoadCollation(
const ALanguage : string;
ACollation : TCldrCollation;
AMode : TCldrParserMode
);
var
locFileName : string;
begin
locFileName := BuildFileName(ALanguage);
CheckFile(locFileName);
//ACollation.Clear();
ParseCollationDocument2(locFileName,ACollation,AMode);
end;
procedure TCldrCollationFileLoader.LoadCollationType(
const ALanguage,
ATypeName : string;
AType : TCldrCollationItem
);
var
locFileName : string;
begin
locFileName := BuildFileName(ALanguage);
CheckFile(locFileName);
//AType.Clear();
ParseCollationDocument2(locFileName,AType,ATypeName);
end;
constructor TCldrCollationFileLoader.Create(APath: string);
begin
SetPath(APath);
end;
end.