fpc/compiler/ldscript.pas
2023-07-25 16:06:53 +02:00

312 lines
7.3 KiB
ObjectPascal

{
Copyright (c) 2012 by Sergei Gorelkin
A basic lexer for GNU ld scripts
This program is free software; you can redistribute it and/or modify
iu under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge- MA 02139, USA.
****************************************************************************
}
unit ldscript;
{$i fpcdefs.inc}
interface
uses
owbase;
type
TldScriptToken=char;
TScriptLexer=class(TObject)
data: ansistring;
curtoken: TldScriptToken;
curtokenstr: string;
curpos: longint;
line: longint;
linestart: longint;
public
constructor Create(aReader:TObjectReader);
procedure nextToken;
function CheckForIdent(const s:string):boolean;
function CheckFor(c:TldScriptToken):boolean;
procedure Expect(c:TldScriptToken);
property token:TldScriptToken read curtoken;
property tokenstr:string read curtokenstr;
end;
const
tkEOF = #0;
tkINVALID = #1;
tkIDENT = #2;
tkNUMBER = #3;
tkLITERAL = #4;
tkLSHIFT = #5; { << }
tkLE = #6; { <= }
tkRSHIFT = #7; { >> }
tkGE = #8; { >= }
tkANDAND = #9; { && }
tkANDEQ = #10; { &= }
tkOROR = #11; { || }
tkOREQ = #12; { |= }
tkDIVEQ = #13; { /= }
tkMULTEQ = #14; { *= }
tkMINUSEQ = #15; { -= }
tkPLUSEQ = #16; { += }
tkNE = #17; { != }
tkEQ = #18; { == }
tkRSHIFTEQ = #19; { >>= }
tkLSHIFTEQ = #20; { <<= }
implementation
uses
sysutils;
const
NameChars=['A'..'Z','a'..'z','_','.','$','0'..'9','+','-','=',',','*','?','/','~','\','[',']'];
{*****************************************************************************
TSCRIPTLEXER
*****************************************************************************}
constructor TScriptLexer.Create(AReader:TObjectReader);
begin
{ Expected data size is few hundred bytes, }
SetLength(data,AReader.size);
AReader.Read(data[1],AReader.size);
curpos:=1;
end;
procedure TScriptLexer.nextToken;
var
p,start: longint;
begin
p:=curpos;
repeat
{ skip whitespace }
while (data[p] in [#32,#9,#13]) do
inc(p);
start:=p;
{ C-style comment }
if (data[p]='/') and (data[p+1]='*') then
begin
inc(p,2);
while (data[p]<>'*') and (data[p+1]<>'/') do
begin
if (data[p]=#0) then
begin
curtoken:=tkINVALID;
exit;
end;
if (data[p]=#10) then
begin
inc(line);
linestart:=p+1;
end;
inc(p);
end;
inc(p,2);
continue;
end
else if (data[p]=#10) then
begin
inc(p);
inc(line);
linestart:=p;
continue;
end
else if (data[p]='#') then { line comment }
begin
inc(p);
while (data[p]<>#0) and (data[p]<>#10) do
inc(p);
continue;
end;
case data[p] of
#0: curtoken:=tkEOF;
'/':
if (data[p+1] in NameChars) then
begin
inc(p);
while (data[p] in NameChars) do
inc(p);
curtoken:=tkIDENT;
end
else if (data[p+1]='=') then
curtoken:=tkDIVEQ
else
curtoken:='/';
'A'..'Z','a'..'z','_','.','$','\':
begin
inc(p);
while (data[p] in NameChars) do
inc(p);
curtoken:=tkIDENT;
end;
'0'..'9':
begin
if (data[p]='0') and (data[p+1] in ['x','X']) then
begin
inc(p,2);
while data[p] in ['0'..'9','a'..'f','A'..'F'] do
inc(p);
end
else
while (data[p] in ['0'..'9']) do
inc(p);
curtoken:=tkNUMBER;
end;
'"':
begin
inc(p);
while (data[p]<>'"') and (data[p]<>#10) do
inc(p);
if data[p]=#10 then
begin
curtoken:=tkINVALID;
exit;
end;
inc(p);
curtoken:=tkLITERAL;
end;
'<':
if (data[p+1]='<') then
begin
if (data[p+2]='=') then
curtoken:=tkLSHIFTEQ
else
curtoken:=tkLSHIFT;
end
else if (data[p+1]='=') then
curtoken:=tkLE
else
curtoken:='<';
'>':
if (data[p+1]='>') then
begin
if (data[p+2]='=') then
curtoken:=tkRSHIFTEQ
else
curtoken:=tkRSHIFT;
end
else if (data[p+1]='=') then
curtoken:=tkGE
else
curtoken:='>';
'!':
if (data[p+1]='=') then
curtoken:=tkNE
else
curtoken:='!';
'&':
if (data[p+1]='&') then
curtoken:=tkANDAND
else if (data[p+1]='=') then
curtoken:=tkANDEQ
else
curtoken:='&';
'|':
if (data[p+1]='|') then
curtoken:=tkOROR
else if (data[p+1]='=') then
curtoken:=tkOREQ
else
curtoken:='|';
'*':
if (data[p+1]='=') then
curtoken:=tkMULTEQ
else
curtoken:='*';
'+':
if (data[p+1]='=') then
curtoken:=tkPLUSEQ
else
curtoken:='+';
'-':
if (data[p+1]='=') then
curtoken:=tkMINUSEQ
else
curtoken:='-';
'=':
if (data[p+1]='=') then
curtoken:=tkEQ
else
curtoken:='=';
'(',')','{','}','[',']',';','?',':':
curtoken:=data[p];
else
curtoken:=tkINVALID;
exit;
end;
break;
until false;
case curtoken of
tkRSHIFTEQ,tkLSHIFTEQ: inc(p,3);
tkLSHIFT..tkEQ: inc(p,2);
#32..#255: inc(p);
tkIDENT,tkNUMBER:
setstring(curtokenstr,PChar(@data[start]),p-start);
tkLITERAL:
setstring(curtokenstr,PChar(@data[start+1]),p-start-2);
end;
curpos:=p;
end;
procedure TScriptLexer.Expect(c:TldScriptToken);
begin
if (curtoken=c) then
nextToken
else
{error};
end;
function TScriptLexer.CheckForIdent(const s:string):boolean;
begin
result:=(curtoken=tkIDENT) and (curtokenstr=s);
if result then
nextToken;
end;
function TScriptLexer.CheckFor(c:TldScriptToken):boolean;
begin
result:=(curtoken=c);
if result then
nextToken;
end;
end.