mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-11 16:28:30 +02:00
871 lines
24 KiB
ObjectPascal
871 lines
24 KiB
ObjectPascal
{ Parser of the CLDR collation tailoring files.
|
|
This parser handle the textual syntax for CLDR version > 23
|
|
|
|
Copyright (c) 2014,2015 by Inoussa OUEDRAOGO
|
|
|
|
The source code is distributed under the Library GNU
|
|
General Public License with the following modification:
|
|
|
|
- object files and libraries linked into an application may be
|
|
distributed without source code.
|
|
|
|
If you didn't receive a copy of the file COPYING, contact:
|
|
Free Software Foundation
|
|
675 Mass Ave
|
|
Cambridge, MA 02139
|
|
USA
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
}
|
|
unit cldrtxt;
|
|
|
|
{$mode delphi}
|
|
{$H+}
|
|
{$SCOPEDENUMS ON}
|
|
{$TypedAddress on}
|
|
|
|
interface
|
|
|
|
uses
|
|
Classes, SysUtils,
|
|
cldrhelper, helper;
|
|
|
|
procedure ParseInitialDocument(
|
|
ASequence : POrderedCharacters;
|
|
ADoc : TCustomMemoryStream;
|
|
ASettings : TSettingRecArray
|
|
);overload;
|
|
procedure ParseInitialDocument(
|
|
ASequence : POrderedCharacters;
|
|
AFileName : string;
|
|
ASettings : TSettingRecArray
|
|
);overload;
|
|
|
|
const
|
|
SETTING_WITH_UNICODESET = [
|
|
TSettingOption.SuppressContractions, TSettingOption.Optimize
|
|
];
|
|
|
|
SETTING_OPTION_STRINGS : // Lower case !
|
|
array[Succ(TSettingOption.Unknown)..High(TSettingOption)] of UTF8String = (
|
|
'strength', 'alternate', 'backwards', 'normalization', 'caselevel', 'casefirst',
|
|
'hiraganaq', 'numericordering', 'reorder', 'maxvariable', 'import',
|
|
'suppresscontractions', 'optimize'
|
|
);
|
|
|
|
type
|
|
|
|
TStatementKind = (Sequence, Setting);
|
|
TParsedStatement = record
|
|
Kind : TStatementKind;
|
|
ReorderSequence : TReorderSequence;
|
|
Setting : TSettingRec;
|
|
end;
|
|
PParsedStatement = ^TParsedStatement;
|
|
|
|
function ParseStatement(
|
|
AData : PAnsiChar;
|
|
AStartPosition,
|
|
AMaxLen : Integer;
|
|
AStatement : PParsedStatement;
|
|
var ANextPos,
|
|
ALineCount : Integer
|
|
) : Boolean;
|
|
|
|
procedure Clear(var AItem : TParsedStatement);
|
|
procedure AddItem(var AList : TSettingRecArray; const AItem : PSettingRec);
|
|
|
|
implementation
|
|
uses
|
|
unicodedata;
|
|
|
|
const
|
|
s_BEFORE = 'before';
|
|
|
|
function String2UnicodeCodePointArray(const AValue : UTF8String): TUnicodeCodePointArray;
|
|
var
|
|
u4str : UCS4String;
|
|
k : Integer;
|
|
begin
|
|
if (Length(AValue) = 0) then
|
|
exit(nil);
|
|
if (Length(AValue) = 1) then begin
|
|
SetLength(Result,1);
|
|
Result[0] := Ord(AValue[1])
|
|
end else begin
|
|
u4str := UnicodeStringToUCS4String(UTF8Decode(AValue));
|
|
k := Length(u4str) - 1; // remove the last #0
|
|
SetLength(Result,k);
|
|
for k := 0 to k - 1 do
|
|
Result[k] := u4str[k];
|
|
end;
|
|
end;
|
|
|
|
function TryStringToReorderWeigthKind(
|
|
const AStr : UTF8String;
|
|
out AResult : TReorderWeigthKind
|
|
) : Boolean;
|
|
begin
|
|
Result := True;
|
|
if (AStr = '=') then
|
|
AResult := TReorderWeigthKind.Identity
|
|
else if (AStr = '<') or (AStr = '>') then
|
|
AResult := TReorderWeigthKind.Primary
|
|
else if (AStr = '<<') or (AStr = '>>') then
|
|
AResult := TReorderWeigthKind.Secondary
|
|
else if (AStr = '<<<') or (AStr = '>>>') then
|
|
AResult := TReorderWeigthKind.Tertiary
|
|
else if (AStr = '<<<<') or (AStr = '>>>>') then
|
|
{Quaternary level is treated as Identity !}
|
|
AResult := TReorderWeigthKind.Identity
|
|
else begin
|
|
AResult := TReorderWeigthKind.Identity;
|
|
Result := False;
|
|
end;
|
|
end;
|
|
|
|
function StringToSettingOption(const AStr : UTF8String) : TSettingOption;
|
|
var
|
|
e : TSettingOption;
|
|
s : UTF8String;
|
|
begin
|
|
s := LowerCase(AStr);
|
|
for e := Succ(TSettingOption.Unknown) to High(TSettingOption) do begin
|
|
if (s = SETTING_OPTION_STRINGS[e]) then
|
|
exit(e);
|
|
end;
|
|
Result := TSettingOption.Unknown;
|
|
end;
|
|
|
|
procedure Clear(var AItem : TParsedStatement);
|
|
begin
|
|
AItem.Setting.Clear();
|
|
AItem.ReorderSequence.Clear();
|
|
AItem.Kind := TStatementKind(0);
|
|
end;
|
|
|
|
procedure AddItem(var AList : TSettingRecArray; const AItem : PSettingRec);
|
|
var
|
|
c : Integer;
|
|
begin
|
|
c := Length(AList);
|
|
SetLength(AList,(c+1));
|
|
AList[c].Assign(AItem);
|
|
end;
|
|
|
|
procedure FromUCS4(const AValue : UCS4Char; var AHighS, ALowS : UnicodeChar);
|
|
begin
|
|
AHighS := UnicodeChar((AValue - $10000) shr 10 + $d800);
|
|
ALowS := UnicodeChar((AValue - $10000) and $3ff + $dc00);
|
|
end;
|
|
|
|
function ParseStatement(
|
|
AData : PAnsiChar;
|
|
AStartPosition,
|
|
AMaxLen : Integer;
|
|
AStatement : PParsedStatement;
|
|
var ANextPos,
|
|
ALineCount : Integer
|
|
) : Boolean;
|
|
const
|
|
LINE_LENGTH = 1024;
|
|
var
|
|
p : PAnsiChar;
|
|
bufferLength, bufferPos, lineLength, linePos, lineIndex : Integer;
|
|
line : UTF8String;
|
|
statement : PReorderSequence;
|
|
elementActualCount : Integer;
|
|
specialChararter : Boolean;
|
|
historyItemIndex : Integer;
|
|
historyItems : array[0..31] of record
|
|
p : PAnsiChar;
|
|
bufferLength,
|
|
bufferPos,
|
|
lineLength,
|
|
linePos,
|
|
lineIndex : Integer;
|
|
line : UTF8String;
|
|
end;
|
|
|
|
procedure SaveState();
|
|
begin
|
|
if (historyItemIndex >= High(historyItems)) then
|
|
raise Exception.Create('History buffer is full.');
|
|
historyItemIndex := historyItemIndex+1;
|
|
historyItems[historyItemIndex].p := p;
|
|
historyItems[historyItemIndex].bufferLength := bufferLength;
|
|
historyItems[historyItemIndex].bufferPos := bufferPos;
|
|
historyItems[historyItemIndex].lineLength := lineLength;
|
|
historyItems[historyItemIndex].linePos := linePos;
|
|
historyItems[historyItemIndex].lineIndex := lineIndex;
|
|
historyItems[historyItemIndex].line := line;
|
|
end;
|
|
|
|
procedure RestoreState();
|
|
begin
|
|
if (historyItemIndex < 0) then
|
|
raise Exception.Create('History buffer is empty.');
|
|
p := historyItems[historyItemIndex].p;
|
|
bufferLength := historyItems[historyItemIndex].bufferLength;
|
|
bufferPos := historyItems[historyItemIndex].bufferPos;
|
|
lineLength := historyItems[historyItemIndex].lineLength;
|
|
linePos := historyItems[historyItemIndex].linePos;
|
|
lineIndex := historyItems[historyItemIndex].lineIndex;
|
|
line := historyItems[historyItemIndex].line;
|
|
historyItemIndex := historyItemIndex-1;
|
|
end;
|
|
|
|
procedure DiscardState();
|
|
begin
|
|
if (historyItemIndex < 0) then
|
|
raise Exception.Create('History buffer is empty.');
|
|
historyItemIndex := historyItemIndex-1;
|
|
end;
|
|
|
|
function CurrentLine() : UTF8String; inline;
|
|
begin
|
|
Result := Copy(line,1,lineLength);
|
|
end;
|
|
|
|
function NextLine() : Boolean;
|
|
var
|
|
locOldPos : Integer;
|
|
locOldPointer : PAnsiChar;
|
|
begin
|
|
Result := False;
|
|
if (p^ = #10) then begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
locOldPos := bufferPos;
|
|
locOldPointer := p;
|
|
while (bufferPos < bufferLength) and (p^ <> #10) do begin
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;
|
|
if (locOldPos = bufferPos) and (p^ = #10) then begin
|
|
lineLength := 0;
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
linePos := 1;
|
|
Result := True;
|
|
end else if (locOldPos < bufferPos) then begin
|
|
lineLength := (bufferPos - locOldPos);
|
|
if (lineLength >= Length(line)) then
|
|
SetLength(line,(2*lineLength));
|
|
Move(locOldPointer^,line[1],lineLength);
|
|
{if (p^ = #10) then begin
|
|
//Dec(lineLength);
|
|
Inc(p);
|
|
Inc(bufferPos);
|
|
end;}
|
|
linePos := 1;
|
|
Result := True;
|
|
end;
|
|
if Result and (locOldPos < bufferPos) then
|
|
lineIndex := lineIndex+1;
|
|
end;
|
|
|
|
procedure CheckLineLength(const ALength : Integer);
|
|
begin
|
|
if (ALength > lineLength) then
|
|
raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
|
|
end;
|
|
|
|
function ReadChar(out AResult : UTF8String) : Boolean;
|
|
var
|
|
k : Integer;
|
|
us : UnicodeString;
|
|
begin
|
|
AResult := '';
|
|
Result := False;
|
|
if (linePos > lineLength) then
|
|
exit;
|
|
{if CharInSet(line[linePos],['#','=','&','[',']']) then begin
|
|
AResult := line[linePos];
|
|
Inc(linePos);
|
|
exit(True);
|
|
end;}
|
|
if (line[linePos] <> '\') then begin
|
|
AResult := line[linePos];
|
|
Inc(linePos);
|
|
exit(True);
|
|
end;
|
|
CheckLineLength(linePos+1);
|
|
Inc(linePos);
|
|
case line[linePos] of
|
|
'''': begin
|
|
AResult := '\';
|
|
exit(True);
|
|
end;
|
|
'\' : begin
|
|
AResult := '\';
|
|
Inc(linePos);
|
|
exit(True);
|
|
end;
|
|
'u' : begin
|
|
CheckLineLength(linePos+4);
|
|
AResult := '$'+Copy(line,(linePos+1),4);
|
|
if not TryStrToInt(AResult,k) then
|
|
raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s", line = "%s".',[AResult,CurrentLine()]);
|
|
SetLength(us,1);
|
|
us[1] := UnicodeChar(k);
|
|
AResult := UTF8Encode(us);
|
|
Inc(linePos,5);
|
|
exit(True);
|
|
end;
|
|
'U' : begin
|
|
CheckLineLength(linePos+8);
|
|
AResult := '$'+Copy(line,(linePos+1),8);
|
|
if not TryStrToInt(AResult,k) then
|
|
raise Exception.CreateFmt('Hexadecimal Integer expected but found "%s".',[AResult]);
|
|
if (k > High(Word)) then begin
|
|
SetLength(us,2);
|
|
FromUCS4(k,us[1],us[2]);
|
|
if (Ord(us[2]) = 0) then
|
|
SetLength(us,1);
|
|
end else begin
|
|
SetLength(us,1);
|
|
us[1] := UnicodeChar(k);
|
|
end;
|
|
AResult := UTF8Encode(us);
|
|
Inc(linePos,9);
|
|
exit(True);
|
|
end;
|
|
else
|
|
raise Exception.CreateFmt('Invalide escaped string "%s", at %d position.',[CurrentLine(),linePos]);
|
|
end;
|
|
end;
|
|
|
|
function ReadQuotedString() : UTF8String;
|
|
var
|
|
ks : UTF8String;
|
|
begin
|
|
if (line[linePos] <> '''') then
|
|
raise Exception.CreateFmt('Unexpected character found "%s", a quote expected: "%s".',[line[linePos],CurrentLine()]);
|
|
Inc(linePos);
|
|
if (linePos > lineLength) then
|
|
raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[CurrentLine()]);
|
|
if (line[linePos] = '''') then begin
|
|
Inc(linePos);
|
|
Result := '''';
|
|
exit;
|
|
end;
|
|
Result := '';
|
|
while (linePos <= lineLength) and ReadChar(ks) do begin
|
|
Result := Result + ks;
|
|
if (line[linePos] = '''') then
|
|
break;
|
|
end;
|
|
if (line[linePos] = '''') then begin
|
|
Inc(linePos);
|
|
exit;
|
|
end;
|
|
raise Exception.CreateFmt('Unexpected end of line, a quote expected: "%s".',[line]);
|
|
end;
|
|
|
|
function ReadUnQuotedString() : UTF8String;
|
|
var
|
|
k : Integer;
|
|
begin
|
|
k := linePos;
|
|
while (linePos <= lineLength) and
|
|
not(CharInSet(line[linePos],[' ',#9,'#', '=','&','[',']','<','>','''','/','|']))
|
|
do begin
|
|
Inc(linePos);
|
|
end;
|
|
if (linePos > k) then begin
|
|
if (line[linePos] in [' ',#9,'#', '=','&','[',']','<','>','''','/','|']) then
|
|
Result := Copy(line,k,(linePos-k))
|
|
else
|
|
Result := Copy(line,k,(linePos-k)); //Result := Copy(line,k,(linePos-k+1));
|
|
end else begin
|
|
Result := '';
|
|
end;
|
|
end;
|
|
|
|
function NextToken() : UTF8String; overload;
|
|
var
|
|
k : Integer;
|
|
ks : UTF8String;
|
|
begin
|
|
specialChararter := False;
|
|
while True do begin
|
|
while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin
|
|
Inc(linePos);
|
|
end;
|
|
if (linePos > lineLength) or (line[linePos] = '#') then begin
|
|
if not NextLine() then begin
|
|
if (line[linePos] = '#') then
|
|
linePos := lineLength+1; // A comment terminates a line !
|
|
exit('');
|
|
end;
|
|
Continue;
|
|
end ;
|
|
Break;
|
|
end;
|
|
if (linePos > lineLength) then
|
|
exit('');
|
|
|
|
if (line[linePos] = '*') then begin
|
|
linePos := linePos+1;
|
|
specialChararter := True;
|
|
exit('*');
|
|
end;
|
|
k := linePos;
|
|
if (linePos <= lineLength) and CharInSet(line[linePos],['<','>']) then begin
|
|
ks := line[linePos];
|
|
while (linePos <= lineLength) and (line[linePos] = ks) do begin
|
|
Inc(linePos);
|
|
end;
|
|
Result := Copy(line,k,(linePos-k));
|
|
exit;
|
|
end;
|
|
if (linePos <= lineLength) and
|
|
CharInSet(line[linePos],['=','&','[',']','<','>','/','|'])
|
|
then begin
|
|
Inc(linePos);
|
|
Result := Copy(line,k,(linePos-k));
|
|
specialChararter := True;
|
|
exit;
|
|
end;
|
|
{if (line[linePos] = '''') then
|
|
exit(ReadQuotedString()); }
|
|
Result := '';
|
|
while (linePos <= lineLength) do begin
|
|
if CharInSet(line[linePos],[' ',#9,#13,'#', '=','&','[',']','<','>','/','|']) then
|
|
Break;
|
|
if (line[linePos] <> '''') then
|
|
ks := ReadUnQuotedString()
|
|
else
|
|
ks := ReadQuotedString();
|
|
if (ks = '') then
|
|
Break;
|
|
Result := Result + ks;
|
|
end;
|
|
end;
|
|
|
|
function NextToken(const AMustSucceed : Boolean) : UTF8String; overload;
|
|
begin
|
|
Result := NextToken();
|
|
if (Result = '') and AMustSucceed then
|
|
raise Exception.CreateFmt('Unexpected end of line(%d) : "%s".',[lineIndex,CurrentLine()]);
|
|
end;
|
|
|
|
procedure CheckToken(const AActual, AExpectedToken : UTF8String);
|
|
begin
|
|
if (AActual <> AExpectedToken) then
|
|
raise Exception.CreateFmt(
|
|
'"%s" expected but "%s" found at position %d, BufferPosition(%d), line(%d) = "%s".',
|
|
[AExpectedToken,AActual,linePos,bufferPos,lineIndex,CurrentLine()]
|
|
);
|
|
end;
|
|
|
|
function parse_reset() : Boolean;
|
|
var
|
|
s, s1 : UTF8String;
|
|
logicalPos : TReorderLogicalReset;
|
|
k : Integer;
|
|
begin
|
|
s := NextToken(True);
|
|
if (s = '[') and specialChararter then begin
|
|
s := NextToken();
|
|
if (s = s_BEFORE) then begin
|
|
s := NextToken();
|
|
if not(TryStrToInt(s,k)) or (k < 1) or (k > 3) then
|
|
CheckToken(s,'"1" or "2" or "3"');
|
|
CheckToken(NextToken(True),']');
|
|
statement^.Reset := String2UnicodeCodePointArray(NextToken(True));
|
|
statement^.Before := True;
|
|
end else begin
|
|
while True do begin
|
|
s1 := NextToken();
|
|
if (s1 = '') or (s1 = ']') then
|
|
break;
|
|
s := s + Trim(s1)
|
|
end;
|
|
CheckToken(s1,']');
|
|
if (s = '') then
|
|
raise Exception.CreateFmt('Unexpected end of line : "%s".',[CurrentLine()]);
|
|
if not TryStrToLogicalReorder(s,logicalPos) then
|
|
raise Exception.CreateFmt(sUnknownResetLogicalPosition,[s]);
|
|
statement^.LogicalPosition := logicalPos;
|
|
end;
|
|
end else begin
|
|
statement^.Reset := String2UnicodeCodePointArray(s);
|
|
end;
|
|
if (statement^.LogicalPosition = TReorderLogicalReset.None) and
|
|
(Length(statement^.Reset) = 0)
|
|
then
|
|
raise Exception.Create(sInvalidResetClause);
|
|
Result := True;
|
|
end;
|
|
|
|
procedure EnsureElementLength(const ALength : Integer);
|
|
var
|
|
k, d : Integer;
|
|
begin
|
|
k := Length(statement^.Elements);
|
|
if (k < ALength) then begin
|
|
k := ALength;
|
|
if (k = 0) then begin
|
|
k := 50;
|
|
end else begin
|
|
if (k < 10) then
|
|
d := 10
|
|
else
|
|
d := 2;
|
|
k := k * d;
|
|
end;
|
|
statement^.SetElementCount(k);
|
|
end;
|
|
end;
|
|
|
|
procedure AddElement(
|
|
const AChars : array of UCS4Char;
|
|
const AWeigthKind : TReorderWeigthKind;
|
|
const AContext : UTF8String
|
|
);overload;
|
|
var
|
|
kp : PReorderUnit;
|
|
kc, k : Integer;
|
|
begin
|
|
EnsureElementLength(elementActualCount+1);
|
|
kp := @statement^.Elements[elementActualCount];
|
|
kc := Length(AChars)-1;
|
|
if (kc < 0) then
|
|
kc := 0;
|
|
SetLength(kp^.Characters,kc);
|
|
for k := 0 to kc - 1 do
|
|
kp^.Characters[k] := AChars[k];
|
|
kp^.WeigthKind := AWeigthKind;
|
|
elementActualCount := elementActualCount + 1;
|
|
if (AContext <> '') then
|
|
kp^.Context := String2UnicodeCodePointArray(AContext);
|
|
end;
|
|
|
|
procedure AddElement(
|
|
const AChar : UCS4Char;
|
|
const AWeigthKind : TReorderWeigthKind;
|
|
const AContext : UTF8String
|
|
);overload;
|
|
var
|
|
kp : PReorderUnit;
|
|
kc, k : Integer;
|
|
begin
|
|
EnsureElementLength(elementActualCount+1);
|
|
kp := @statement^.Elements[elementActualCount];
|
|
SetLength(kp^.Characters,1);
|
|
kp^.Characters[0] := AChar;
|
|
kp^.WeigthKind := AWeigthKind;
|
|
elementActualCount := elementActualCount + 1;
|
|
if (AContext <> '') then
|
|
kp^.Context := String2UnicodeCodePointArray(AContext);
|
|
end;
|
|
|
|
function ReadNextItem() : Boolean;
|
|
var
|
|
contextStr : UTF8String;
|
|
w : TReorderWeigthKind;
|
|
last : PReorderUnit;
|
|
u4str : UCS4String;
|
|
s, ts : UTF8String;
|
|
expandStr : TUnicodeCodePointArray;
|
|
k, kc, x : Integer;
|
|
us : UnicodeString;
|
|
begin
|
|
contextStr := '';
|
|
expandStr := nil;
|
|
Result := False;
|
|
SaveState();
|
|
s := NextToken();
|
|
if (s = '') then begin
|
|
DiscardState();
|
|
exit;
|
|
end;
|
|
if specialChararter and CharInSet(s[1],['&','[']) then begin
|
|
RestoreState();
|
|
exit;
|
|
end;
|
|
DiscardState();
|
|
if not TryStringToReorderWeigthKind(s,w) then
|
|
CheckToken(s,'Reorder Weigth');
|
|
s := NextToken(True);
|
|
if specialChararter then begin
|
|
if (s = '[') then begin
|
|
k := 1;
|
|
while True do begin
|
|
ts := NextToken(True);
|
|
s := s + ts;
|
|
if specialChararter then begin
|
|
if (ts = '[') then
|
|
k := k+1
|
|
else if (ts = ']') then begin
|
|
k := k-1;
|
|
if (k = 0) then
|
|
Break;
|
|
end;
|
|
end;
|
|
end;
|
|
if (Pos('variable',s) > 0) then
|
|
exit(True);
|
|
end else if (s = '*') then begin
|
|
s := NextToken(True);
|
|
us := UTF8Decode(s);
|
|
u4str := UnicodeStringToUCS4String(us);
|
|
kc := Length(u4str)-1;
|
|
k := 0;
|
|
while (k <= (kc-1)) do begin
|
|
if (k > 0) and (u4str[k] = Ord('-')) then begin
|
|
if (k = (kc-1)) then begin
|
|
AddElement(u4str[k],w,contextStr);
|
|
end else begin
|
|
for x := (u4str[k-1]+1) to u4str[k+1] do
|
|
AddElement(x,w,contextStr);
|
|
k := k+1;
|
|
end;
|
|
end else begin
|
|
AddElement(u4str[k],w,contextStr);
|
|
end;
|
|
k := k+1;
|
|
end;
|
|
exit(True);
|
|
end;
|
|
end;
|
|
SaveState();
|
|
ts := NextToken();
|
|
if (ts = '') or not(specialChararter) then begin
|
|
RestoreState();
|
|
us := UTF8Decode(s);
|
|
u4str := UnicodeStringToUCS4String(us);
|
|
end else begin
|
|
if (ts = '|') then begin
|
|
DiscardState();
|
|
contextStr := s;
|
|
s := NextToken(True);
|
|
SaveState();
|
|
ts := NextToken();
|
|
end;
|
|
if specialChararter and (ts = '/') then begin
|
|
expandStr := String2UnicodeCodePointArray(NextToken(True));
|
|
DiscardState();
|
|
end else begin
|
|
RestoreState();
|
|
end;
|
|
u4str := UnicodeStringToUCS4String(UTF8Decode(s));
|
|
end;
|
|
AddElement(u4str,w,contextStr);
|
|
if (Length(expandStr) > 0) then begin
|
|
last := @statement^.Elements[elementActualCount-1];
|
|
last^.ExpansionChars := expandStr;
|
|
end;
|
|
Result := True;
|
|
end;
|
|
|
|
function ReadUnicodeSet() : UTF8String;
|
|
var
|
|
k, c : Integer;
|
|
ks : UTF8String;
|
|
begin
|
|
while True do begin
|
|
while (linePos <= lineLength) and CharInSet(line[linePos],[' ', #9, #13]) do begin
|
|
Inc(linePos);
|
|
end;
|
|
if (linePos > lineLength) or (line[linePos] = '#') then begin
|
|
if not NextLine() then begin
|
|
if (line[linePos] = '#') then
|
|
linePos := lineLength+1; // A comment terminates a line !
|
|
exit('');
|
|
end;
|
|
Continue;
|
|
end ;
|
|
Break;
|
|
end;
|
|
if (linePos > lineLength) then
|
|
exit('');
|
|
if (line[linePos] <> '[') then
|
|
exit;
|
|
k := linePos;
|
|
c := 1;
|
|
ks := '';
|
|
linePos := linePos+1;
|
|
while (linePos <= lineLength) do begin
|
|
if (line[linePos] = '[') then
|
|
c := c+1
|
|
else if (line[linePos] = ']') then
|
|
c := c-1;
|
|
if (c = 0) then
|
|
break;
|
|
linePos := linePos+1;
|
|
if (linePos > lineLength) then begin
|
|
ks := ks+Copy(line,k,linePos);
|
|
if not NextLine() then
|
|
raise Exception.CreateFmt(sInvalidUnicodeSetExpression,[line]);
|
|
k := linePos;
|
|
end;
|
|
end;
|
|
if (line[linePos] <> ']') then
|
|
raise Exception.CreateFmt(sInvalidUnicodeSetExpression,[line]);
|
|
linePos := linePos+1;
|
|
ks := ks+Copy(line,k,(linePos-k));
|
|
Result := ks;
|
|
end;
|
|
|
|
function ParseSetting() : Boolean;
|
|
var
|
|
name, value : UTF8String;
|
|
c, k : Integer;
|
|
begin
|
|
name := NextToken(True);
|
|
if (name = ']') then
|
|
raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
|
|
AStatement^.Setting.Name := name;
|
|
AStatement^.Setting.OptionValue := StringToSettingOption(AStatement^.Setting.Name);
|
|
if (AStatement^.Setting.OptionValue in SETTING_WITH_UNICODESET) then begin
|
|
value := ReadUnicodeSet();
|
|
if (value = '') then
|
|
raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
|
|
CheckToken(NextToken(True),']');
|
|
SetLength(AStatement^.Setting.Values,1);
|
|
AStatement^.Setting.Values[0] := value;
|
|
Result := True;
|
|
end else begin
|
|
c := 0;
|
|
while True do begin
|
|
value := NextToken((c = 0));
|
|
if (value = '') or (specialChararter and (value = ']')) then begin
|
|
if (c = 0) then
|
|
raise Exception.CreateFmt(sInvalidSettingExpression,[line]);
|
|
break;
|
|
end;
|
|
k := Length(AStatement^.Setting.Values);
|
|
SetLength(AStatement^.Setting.Values,(k+1));
|
|
AStatement^.Setting.Values[k] := value;
|
|
c := c+1;
|
|
end;
|
|
Result := (c > 0);
|
|
end;
|
|
end;
|
|
|
|
var
|
|
locToken : UTF8String;
|
|
begin
|
|
Result := False;
|
|
elementActualCount := 0;
|
|
if (AStartPosition >= AMaxLen) then
|
|
exit;
|
|
historyItemIndex := -1;
|
|
lineIndex := ALineCount;
|
|
bufferLength := AMaxLen;
|
|
bufferPos := AStartPosition;
|
|
p := AData+AStartPosition;
|
|
SetLength(line,LINE_LENGTH);
|
|
Clear(AStatement^);
|
|
if not NextLine() then
|
|
exit;
|
|
locToken := NextToken();
|
|
if (locToken = '') then
|
|
exit;
|
|
if not specialChararter then
|
|
raise Exception.CreateFmt(sSpecialCharacterExpected,[locToken,CurrentLine()]);
|
|
if (locToken = '&') then begin
|
|
AStatement.Kind := TStatementKind.Sequence;
|
|
statement := @AStatement.ReorderSequence;
|
|
if not parse_reset() then
|
|
exit;
|
|
while ReadNextItem() do begin
|
|
// All done in the condition
|
|
end;
|
|
statement^.SetElementCount(elementActualCount);
|
|
end else if (locToken = '[') then begin
|
|
if not ParseSetting() then
|
|
exit;
|
|
AStatement.Kind := TStatementKind.Setting;
|
|
end;
|
|
if (linePos > lineLength) then
|
|
linePos := lineLength+1;
|
|
ANextPos := bufferPos-lineLength+linePos-1;
|
|
Result := (ANextPos > AStartPosition);
|
|
ALineCount := lineIndex;
|
|
end;
|
|
|
|
procedure ParseInitialDocument(
|
|
ASequence : POrderedCharacters;
|
|
ADoc : TCustomMemoryStream;
|
|
ASettings : TSettingRecArray
|
|
);
|
|
var
|
|
buffer : PAnsiChar;
|
|
bufferLength : Integer;
|
|
i, nextPost : Integer;
|
|
statement : TParsedStatement;
|
|
p : PReorderUnit;
|
|
lineCount : Integer;
|
|
begin
|
|
if (ADoc.Size < 1) then
|
|
exit;
|
|
buffer := ADoc.Memory; //0xEF,0xBB,0xBF
|
|
bufferLength := ADoc.Size;
|
|
if (bufferLength >= 3) and
|
|
(Byte(buffer[0]) = $EF) and
|
|
(Byte(buffer[1]) = $BB) and
|
|
(Byte(buffer[2]) = $BF)
|
|
then begin
|
|
Inc(buffer,3);
|
|
Dec(bufferLength,3);
|
|
end;
|
|
lineCount := 0;
|
|
ASequence^.Clear();
|
|
SetLength(ASequence^.Data,50000);
|
|
nextPost := 0;
|
|
i := 0;
|
|
while (i < bufferLength) do begin
|
|
Clear(statement);
|
|
if not ParseStatement(buffer,i,bufferLength,@statement,nextPost,lineCount) then
|
|
Break;
|
|
i := nextPost;
|
|
try
|
|
if (statement.Kind = TStatementKind.Sequence) then
|
|
ASequence^.ApplyStatement(@statement.ReorderSequence)
|
|
else
|
|
AddItem(ASettings,@statement.Setting);
|
|
except
|
|
on e : Exception do begin
|
|
e.Message := Format('%s Position = %d',[e.Message,i]);
|
|
raise;
|
|
end;
|
|
end;
|
|
end;
|
|
if (ASequence^.ActualLength > 0) then begin
|
|
p := @ASequence^.Data[0];
|
|
for i := 0 to ASequence^.ActualLength - 1 do begin
|
|
p^.Changed := False;
|
|
Inc(p);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure ParseInitialDocument(
|
|
ASequence : POrderedCharacters;
|
|
AFileName : string;
|
|
ASettings : TSettingRecArray
|
|
);
|
|
var
|
|
doc : TMemoryStream;
|
|
begin
|
|
doc := TMemoryStream.Create();
|
|
try
|
|
doc.LoadFromFile(AFileName);
|
|
doc.Position := 0;
|
|
ParseInitialDocument(ASequence,doc,ASettings);
|
|
finally
|
|
doc.Free();
|
|
end;
|
|
end;
|
|
|
|
|
|
end.
|
|
|