fpc/compiler/llvm/llvmdef.pas
Jonas Maebe 39074edf10 - removed the globalsymbolmangleprefix/suffix constants that were added for
llvm
  * instead, add LLVM-specific name mangling based on the asmsymbol's bind
    and typ whenever we write out its name

git-svn-id: branches/hlcgllvm@28166 -
2014-07-05 15:44:16 +00:00

722 lines
27 KiB
ObjectPascal
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
Copyright (c) 2013 by Jonas Maebe
This unit implements some LLVM type helper routines.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
{$i fpcdefs.inc}
unit llvmdef;
interface
uses
cclasses,globtype,
aasmbase,
parabase,
symbase,symtype,symdef,
llvmbase;
type
{ there are three different circumstances in which procdefs are used:
a) definition of a procdef that's implemented in the current module or
declaration of an external routine that's called in the current one
b) alias declaration of a procdef implemented in the current module
c) defining a procvar type
The main differences between the contexts are:
a) information about sign extension of result type, proc name, parameter names & types
b) no information about sign extension of result type, proc name, no parameter names, parameter types
c) information about sign extension of result type, no proc name, no parameter names, parameter types
}
tllvmprocdefdecltype = (lpd_decl,lpd_alias,lpd_procvar);
{ Encode a type into the internal format used by LLVM. }
function llvmencodetype(def: tdef): TSymStr;
{ incremental version of llvmencodetype(). "inaggregate" indicates whether
this was a recursive call to get the type of an entity part of an
aggregate type (array, record, ...) }
procedure llvmaddencodedtype(def: tdef; inaggregate: boolean; var encodedstr: TSymStr);
{ encode a procdef/procvardef into the internal format used by LLVM }
function llvmencodeproctype(def: tabstractprocdef; const customname: TSymStr; pddecltype: tllvmprocdefdecltype): TSymStr;
{ incremental version of the above }
procedure llvmaddencodedproctype(def: tabstractprocdef; const customname: TSymStr; pddecltype: tllvmprocdefdecltype; var encodedstr: TSymStr);
{ function result types may have to be represented differently, e.g. a
record consisting of 4 longints must be returned as a record consisting of
two int64's on x86-64. This function is used to create (and reuse)
temporary recorddefs for such purposes.}
function llvmgettemprecorddef(fieldtypes: tfplist; packrecords: shortint): trecorddef;
{ get the llvm type corresponding to a parameter, e.g. a record containing
two integer int64 for an arbitrary record split over two individual int64
parameters, or an int32 for an int16 parameter on a platform that requires
such parameters to be zero/sign extended. The second parameter can be used
to get the type before zero/sign extension, as e.g. required to generate
function declarations. }
function llvmgetcgparadef(const cgpara: tcgpara; beforevalueext: boolean): tdef;
{ can be used to extract the value extension info from acgpara. Pass in
the def of the cgpara as first parameter and a local variable holding
a copy of the def of the location (value extension only makes sense for
ordinal parameters that are smaller than a single location). The routine
will return the def of the location without sign extension (if applicable)
and the kind of sign extension that was originally performed in the
signext parameter }
procedure llvmextractvalueextinfo(paradef: tdef; var paralocdef: tdef; out signext: tllvmvalueextension);
{ returns whether a paraloc should be translated into an llvm "byval"
parameter. These are declared as pointers to a particular type, but
usually turned into copies onto the stack. The exact behaviour for
parameters that should be passed in registers is undefined and depends on
the platform, and furthermore this modifier sometimes inhibits
optimizations. As a result,we only use it for aggregate parameters of
which we know that they should be passed on the stack }
function llvmbyvalparaloc(paraloc: pcgparalocation): boolean;
{ returns whether a def is representated by an aggregate type in llvm
(struct, array) }
function llvmaggregatetype(def: tdef): boolean;
function llvmconvop(fromsize, tosize: tdef): tllvmop;
{ mangle a global identifier so that it's recognised by LLVM as a global
(in the sense of module-global) label and so that it won't mangle the
name further according to platform conventions (we already did that) }
function llvmmangledname(const s: TSymStr): TSymStr;
function llvmasmsymname(const sym: TAsmSymbol): TSymStr;
implementation
uses
cutils,constexp,
verbose,systems,
fmodule,
symtable,symconst,symsym,
llvmsym,hlcgobj,
defutil,cgbase,paramgr;
{******************************************************************
Type encoding
*******************************************************************}
function llvmaggregatetype(def: tdef): boolean;
begin
result:=
(def.typ in [recorddef,filedef,variantdef]) or
((def.typ=arraydef) and
not is_dynamic_array(def)) or
((def.typ=setdef) and
not is_smallset(def)) or
is_shortstring(def) or
is_object(def) or
((def.typ=procvardef) and
not tprocvardef(def).is_addressonly)
end;
function llvmconvop(fromsize, tosize: tdef): tllvmop;
var
fromregtyp,
toregtyp: tregistertype;
frombytesize,
tobytesize: asizeint;
begin
fromregtyp:=chlcgobj.def2regtyp(fromsize);
toregtyp:=chlcgobj.def2regtyp(tosize);
{ int to pointer or vice versa }
if fromregtyp=R_ADDRESSREGISTER then
begin
case toregtyp of
R_INTREGISTER:
result:=la_ptrtoint;
R_ADDRESSREGISTER:
result:=la_bitcast;
else
result:=la_ptrtoint_to_x;
end;
end
else if toregtyp=R_ADDRESSREGISTER then
begin
case fromregtyp of
R_INTREGISTER:
result:=la_inttoptr;
R_ADDRESSREGISTER:
result:=la_bitcast;
else
result:=la_x_to_inttoptr;
end;
end
else
begin
frombytesize:=fromsize.size;
tobytesize:=tosize.size;
{ need zero/sign extension, float truncation or plain bitcast? }
if tobytesize<>frombytesize then
begin
case fromregtyp of
R_FPUREGISTER,
R_MMREGISTER:
begin
{ todo: update once we support vectors }
if not(toregtyp in [R_FPUREGISTER,R_MMREGISTER]) then
internalerror(2014062203);
if tobytesize<frombytesize then
result:=la_fptrunc
else
result:=la_fpext
end;
else
begin
if tobytesize<frombytesize then
result:=la_trunc
else if is_signed(fromsize) then
{ fromsize is signed -> sign extension }
result:=la_sext
else
result:=la_zext;
end;
end;
end
else
result:=la_bitcast;
end;
end;
function llvmmangledname(const s: TSymStr): TSymStr;
begin
result:='@"\01'+s+'"';
end;
function llvmasmsymname(const sym: TAsmSymbol): TSymStr;
begin
{ AT_ADDR and AT_LABEL represent labels in the code, which have
a different type in llvm compared to (global) data labels }
if sym.bind=AB_TEMP then
result:='%'+sym.name
else if not(sym.typ in [AT_LABEL,AT_ADDR]) then
result:=llvmmangledname(sym.name)
else
result:='label %'+sym.name;
end;
function llvmbyvalparaloc(paraloc: pcgparalocation): boolean;
begin
{ "byval" is broken for register paras on several platforms in llvm
(search for "byval" in llvm's bug tracker). Additionally, it should only
be used to pass aggregate parameters on the stack, because it reportedly
inhibits llvm's midlevel optimizers.
Exception (for now?): parameters that have special shifting
requirements, because modelling those in llvm is not easy (and clang
nor llvm-gcc seem to do so either) }
result:=
((paraloc^.loc=LOC_REFERENCE) and
llvmaggregatetype(paraloc^.def)) or
(paraloc^.shiftval<>0)
end;
procedure llvmaddencodedabstractrecordtype(def: tabstractrecorddef; var encodedstr: TSymStr); forward;
procedure llvmaddencodedtype_intern(def: tdef; inaggregate, noimplicitderef: boolean; var encodedstr: TSymStr);
begin
case def.typ of
stringdef :
begin
case tstringdef(def).stringtype of
st_widestring,
st_unicodestring:
{ the variable does not point to the header, but to a
null-terminated string/array with undefined bounds }
encodedstr:=encodedstr+'i16*';
st_ansistring:
encodedstr:=encodedstr+'i8*';
st_shortstring:
{ length byte followed by string bytes }
if tstringdef(def).len>0 then
encodedstr:=encodedstr+'['+tostr(tstringdef(def).len+1)+' x i8]'
else
encodedstr:=encodedstr+'[0 x i8]';
else
internalerror(2013100201);
end;
end;
enumdef:
begin
encodedstr:=encodedstr+'i'+tostr(def.size*8);
end;
orddef :
begin
if is_void(def) then
encodedstr:=encodedstr+'void'
{ mainly required because comparison operations return i1, and
otherwise we always have to immediatel extend them to i8 for
no good reason; besides, Pascal booleans can only contain 0
or 1 in valid code anyway (famous last words...) }
else if torddef(def).ordtype=pasbool8 then
encodedstr:=encodedstr+'i1'
else
encodedstr:=encodedstr+'i'+tostr(def.size*8);
end;
pointerdef :
begin
if is_voidpointer(def) then
encodedstr:=encodedstr+'i8*'
else
begin
llvmaddencodedtype_intern(tpointerdef(def).pointeddef,inaggregate,false,encodedstr);
encodedstr:=encodedstr+'*';
end;
end;
floatdef :
begin
case tfloatdef(def).floattype of
s32real:
encodedstr:=encodedstr+'float';
s64real:
encodedstr:=encodedstr+'double';
{ necessary to be able to force our own size/alignment }
s80real:
{ prevent llvm from allocating the standard ABI size for
extended }
if inaggregate then
encodedstr:=encodedstr+'[10 x i8]'
else
encodedstr:=encodedstr+'x86_fp80';
sc80real:
encodedstr:=encodedstr+'x86_fp80';
s64comp,
s64currency:
encodedstr:=encodedstr+'i64';
s128real:
{$if defined(powerpc) or defined(powerpc128)}
encodedstr:=encodedstr+'ppc_fp128';
{$else}
encodedstr:=encodedstr+'fp128';
{$endif}
else
internalerror(2013100202);
end;
end;
filedef :
begin
case tfiledef(def).filetyp of
ft_text :
llvmaddencodedtype_intern(search_system_type('TEXTREC').typedef,inaggregate,false,encodedstr);
ft_typed,
ft_untyped :
llvmaddencodedtype_intern(search_system_type('FILEREC').typedef,inaggregate,false,encodedstr);
else
internalerror(2013100203);
end;
end;
recorddef :
begin
llvmaddencodedabstractrecordtype(trecorddef(def),encodedstr);
end;
variantdef :
begin
llvmaddencodedtype_intern(search_system_type('TVARDATA').typedef,inaggregate,false,encodedstr);
end;
classrefdef :
begin
{ todo: define proper type for VMT and use that }
encodedstr:=encodedstr+'i8*';
end;
setdef :
begin
{ just an array as far as llvm is concerned; don't use a "packed
array of i1" or so, this requires special support in backends
and guarantees nothing about the internal format }
if is_smallset(def) then
llvmaddencodedtype_intern(cgsize_orddef(def_cgsize(def)),inaggregate,false,encodedstr)
else
encodedstr:=encodedstr+'['+tostr(tsetdef(def).size)+' x i8]';
end;
formaldef :
begin
{ var/const/out x }
encodedstr:=encodedstr+'i8*';
end;
arraydef :
begin
if is_array_of_const(def) then
begin
encodedstr:=encodedstr+'[0 x ';
llvmaddencodedtype_intern(search_system_type('TVARREC').typedef,true,false,encodedstr);
encodedstr:=encodedstr+']';
end
else if is_open_array(def) then
begin
encodedstr:=encodedstr+'[0 x ';
llvmaddencodedtype_intern(tarraydef(def).elementdef,true,false,encodedstr);
encodedstr:=encodedstr+']';
end
else if is_dynamic_array(def) then
begin
llvmaddencodedtype_intern(tarraydef(def).elementdef,inaggregate,false,encodedstr);
encodedstr:=encodedstr+'*';
end
else if is_packed_array(def) then
begin
encodedstr:=encodedstr+'['+tostr(tarraydef(def).size div tarraydef(def).elementdef.packedbitsize)+' x ';
{ encode as an array of integers with the size on which we
perform the packedbits operations }
llvmaddencodedtype_intern(cgsize_orddef(int_cgsize(packedbitsloadsize(tarraydef(def).elementdef.packedbitsize))),true,false,encodedstr);
encodedstr:=encodedstr+']';
end
else
begin
encodedstr:=encodedstr+'['+tostr(tarraydef(def).elecount)+' x ';
llvmaddencodedtype_intern(tarraydef(def).elementdef,true,false,encodedstr);
encodedstr:=encodedstr+']';
end;
end;
procdef,
procvardef :
begin
if (def.typ=procdef) or
tprocvardef(def).is_addressonly then
begin
llvmaddencodedproctype(tabstractprocdef(def),'',lpd_procvar,encodedstr);
encodedstr:=encodedstr+'*';
end
else
begin
encodedstr:=encodedstr+'{';
{ code pointer }
llvmaddencodedproctype(tabstractprocdef(def),'',lpd_procvar,encodedstr);
{ data pointer (maybe todo: generate actual layout if
available) }
encodedstr:=encodedstr+'*, i8*}';
end;
end;
objectdef :
case tobjectdef(def).objecttype of
odt_class,
odt_objcclass,
odt_object,
odt_cppclass:
begin
{ for now don't handle fields yet }
encodedstr:=encodedstr+'{[i8 x '+tostr(def.size)+']}';
if not noimplicitderef and
is_implicit_pointer_object_type(def) then
encodedstr:=encodedstr+'*'
end;
odt_interfacecom,
odt_interfacecom_function,
odt_interfacecom_property,
odt_interfacecorba,
odt_dispinterface,
odt_objcprotocol:
begin
{ opaque for now }
encodedstr:=encodedstr+'i8*'
end;
else
internalerror(2013100601);
end;
undefineddef,
errordef :
internalerror(2013100604);
else
internalerror(2013100603);
end;
end;
procedure llvmaddencodedtype(def: tdef; inaggregate: boolean; var encodedstr: TSymStr);
begin
llvmaddencodedtype_intern(def,inaggregate,false,encodedstr);
end;
procedure llvmaddencodedabstractrecordtype(def: tabstractrecorddef; var encodedstr: TSymStr);
var
st: tllvmshadowsymtable;
symdeflist: tfpobjectlist;
i: longint;
begin
st:=tabstractrecordsymtable(def.symtable).llvmst;
symdeflist:=st.symdeflist;
if tabstractrecordsymtable(def.symtable).usefieldalignment<>C_alignment then
encodedstr:=encodedstr+'<';
encodedstr:=encodedstr+'{ ';
if symdeflist.count>0 then
begin
i:=0;
if (def.typ=objectdef) and
assigned(tobjectdef(def).childof) and
is_class_or_interface_or_dispinterface(tllvmshadowsymtableentry(symdeflist[0]).def) then
begin
{ insert the struct for the class rather than a pointer to the struct }
if (tllvmshadowsymtableentry(symdeflist[0]).def.typ<>objectdef) then
internalerror(2008070601);
llvmaddencodedtype_intern(tllvmshadowsymtableentry(symdeflist[0]).def,true,true,encodedstr);
inc(i);
end;
while i<symdeflist.count do
begin
if i<>0 then
encodedstr:=encodedstr+', ';
llvmaddencodedtype_intern(tllvmshadowsymtableentry(symdeflist[i]).def,true,false,encodedstr);
inc(i);
end;
end;
encodedstr:=encodedstr+' }';
if tabstractrecordsymtable(def.symtable).usefieldalignment<>C_alignment then
encodedstr:=encodedstr+'>';
end;
procedure llvmextractvalueextinfo(paradef: tdef; var paralocdef: tdef; out signext: tllvmvalueextension);
begin
{ implicit zero/sign extension for ABI compliance? (yes, if the size
of a paraloc is larger than the size of the entire parameter) }
if is_ordinal(paradef) and
is_ordinal(paralocdef) and
(paradef.size<paralocdef.size) then
begin
paralocdef:=paradef;
if is_signed(paradef) then
signext:=lve_signext
else
signext:=lve_zeroext
end
else
signext:=lve_none;
end;
procedure llvmaddencodedparaloctype(hp: tparavarsym; proccalloption: tproccalloption; withparaname: boolean; var first: boolean; var encodedstr: TSymStr);
var
paraloc: PCGParaLocation;
signext: tllvmvalueextension;
usedef: tdef;
begin
paraloc:=hp.paraloc[calleeside].location;
repeat
usedef:=paraloc^.def;
llvmextractvalueextinfo(hp.vardef,usedef,signext);
{ implicit zero/sign extension for ABI compliance? }
if not first then
encodedstr:=encodedstr+', '
else
first:=false;
llvmaddencodedtype(usedef,false,encodedstr);
{ in case signextstr<>'', there should be only one paraloc -> no need
to clear (reason: it means that the paraloc is larger than the
original parameter) }
encodedstr:=encodedstr+llvmvalueextension2str[signext];
{ sret: hidden pointer for structured function result }
if vo_is_funcret in hp.varoptions then
encodedstr:=encodedstr+' sret'
else if not paramanager.push_addr_param(hp.varspez,hp.vardef,proccalloption) and
llvmbyvalparaloc(paraloc) then
encodedstr:=encodedstr+'* byval';
if withparaname then
begin
if paraloc^.llvmloc.loc<>LOC_REFERENCE then
internalerror(2014010803);
encodedstr:=encodedstr+' '+llvmasmsymname(paraloc^.llvmloc.sym);
end;
paraloc:=paraloc^.next;
until not assigned(paraloc);
end;
function llvmencodeproctype(def: tabstractprocdef; const customname: TSymStr; pddecltype: tllvmprocdefdecltype): TSymStr;
begin
result:='';
llvmaddencodedproctype(def,customname,pddecltype,result);
end;
procedure llvmaddencodedproctype(def: tabstractprocdef; const customname: TSymStr; pddecltype: tllvmprocdefdecltype; var encodedstr: TSymStr);
var
usedef: tdef;
paranr: longint;
hp: tparavarsym;
signext: tllvmvalueextension;
first: boolean;
begin
def.init_paraloc_info(calleeside);
first:=true;
{ function result (return-by-ref is handled explicitly) }
if not paramanager.ret_in_param(def.returndef,def) then
begin
usedef:=llvmgetcgparadef(def.funcretloc[calleeside],false);
llvmextractvalueextinfo(def.returndef,usedef,signext);
{ specifying result sign extention information for an alias causes
an error for some reason }
if pddecltype in [lpd_decl,lpd_procvar] then
encodedstr:=encodedstr+llvmvalueextension2str[signext];
encodedstr:=encodedstr+' ';
llvmaddencodedtype_intern(usedef,false,false,encodedstr);
end
else
begin
encodedstr:=encodedstr+' ';
llvmaddencodedtype(voidtype,false,encodedstr);
end;
encodedstr:=encodedstr+' ';
{ add procname? }
if (pddecltype in [lpd_decl]) and
(def.typ=procdef) then
if customname='' then
encodedstr:=encodedstr+llvmmangledname(tprocdef(def).mangledname)
else
encodedstr:=encodedstr+llvmmangledname(customname);
encodedstr:=encodedstr+'(';
{ parameters }
first:=true;
for paranr:=0 to def.paras.count-1 do
begin
hp:=tparavarsym(def.paras[paranr]);
llvmaddencodedparaloctype(hp,def.proccalloption,pddecltype in [lpd_decl],first,encodedstr);
end;
encodedstr:=encodedstr+')'
end;
function llvmgettemprecorddef(fieldtypes: tfplist; packrecords: shortint): trecorddef;
var
i: longint;
res: PHashSetItem;
oldsymtablestack: tsymtablestack;
hrecst: trecordsymtable;
hdef: tdef;
hrecdef: trecorddef;
sym: tfieldvarsym;
typename: string;
begin
typename:='$llvmstruct_';
for i:=0 to fieldtypes.count-1 do
begin
hdef:=tdef(fieldtypes[i]);
case hdef.typ of
orddef:
case torddef(hdef).ordtype of
s8bit,
u8bit:
typename:=typename+'i8';
s16bit,
u16bit:
typename:=typename+'i16';
s32bit,
u32bit:
typename:=typename+'i32';
s64bit,
u64bit:
typename:=typename+'i64';
else
{ other types should not appear currently, add as needed }
internalerror(2014012001);
end;
floatdef:
case tfloatdef(hdef).floattype of
s32real:
typename:=typename+'f32';
s64real:
typename:=typename+'f64';
else
{ other types should not appear currently, add as needed }
internalerror(2014012008);
end;
else
{ other types should not appear currently, add as needed }
internalerror(2014012009);
end;
end;
if not assigned(current_module) then
internalerror(2014012002);
res:=current_module.llvmdefs.FindOrAdd(@typename[1],length(typename));
if not assigned(res^.Data) then
begin
oldsymtablestack:=symtablestack;
{ do not simply push/pop current_module.localsymtable, because
that can have side-effects (e.g., it removes helpers) }
symtablestack:=nil;
hrecst:=trecordsymtable.create(typename,packrecords);
hrecdef:=trecorddef.create(typename,hrecst);
for i:=0 to fieldtypes.count-1 do
begin
sym:=tfieldvarsym.create('$f'+tostr(i),vs_value,tdef(fieldtypes[i]),[]);
hrecst.insert(sym);
hrecst.addfield(sym,vis_hidden);
end;
res^.Data:=hrecdef;
if assigned(current_module.localsymtable) then
current_module.localsymtable.insertdef(tdef(res^.Data))
else
current_module.globalsymtable.insertdef(tdef(res^.Data));
symtablestack:=oldsymtablestack;
end;
result:=trecorddef(res^.Data);
end;
function llvmgetcgparadef(const cgpara: tcgpara; beforevalueext: boolean): tdef;
var
retdeflist: tfplist;
retloc: pcgparalocation;
usedef: tdef;
valueext: tllvmvalueextension;
begin
{ single location }
if not assigned(cgpara.location^.next) then
begin
{ def of the location, except in case of zero/sign-extension }
usedef:=cgpara.location^.def;
if beforevalueext then
llvmextractvalueextinfo(cgpara.def,usedef,valueext);
result:=usedef;
exit
end;
{ multiple locations -> create temp record }
retdeflist:=tfplist.create;
retloc:=cgpara.location;
repeat
retdeflist.add(retloc^.def);
retloc:=retloc^.next;
until not assigned(retloc);
result:=llvmgettemprecorddef(retdeflist,C_alignment);
end;
function llvmencodetype(def: tdef): TSymStr;
begin
result:='';
llvmaddencodedtype(def,false,result);
end;
end.