mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 13:38:31 +02:00
2711 lines
109 KiB
ObjectPascal
2711 lines
109 KiB
ObjectPascal
{
|
||
Copyright (c) 2014 by Jonas Maebe
|
||
|
||
This unit implements the code generator for AArch64
|
||
|
||
This program is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 2 of the License, or
|
||
(at your option) any later version.
|
||
|
||
This program is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with this program; if not, write to the Free Software
|
||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||
|
||
****************************************************************************
|
||
}
|
||
unit cgcpu;
|
||
|
||
{$i fpcdefs.inc}
|
||
|
||
interface
|
||
|
||
uses
|
||
globtype,parabase,
|
||
cgbase,cgutils,cgobj,
|
||
aasmbase,aasmtai,aasmdata,aasmcpu,
|
||
cpubase,cpuinfo,
|
||
node,symconst,SymType,symdef,
|
||
rgcpu;
|
||
|
||
type
|
||
|
||
{ tcgaarch64 }
|
||
|
||
tcgaarch64=class(tcg)
|
||
protected
|
||
{ changes register size without adding register allocation info }
|
||
function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
|
||
public
|
||
{ simplifies "ref" so it can be used with "op". If "ref" can be used
|
||
with a different load/Store operation that has the same meaning as the
|
||
original one, "op" will be replaced with the alternative }
|
||
procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
|
||
function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
|
||
procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
|
||
procedure init_register_allocators;override;
|
||
procedure done_register_allocators;override;
|
||
function getmmregister(list:TAsmList;size:tcgsize):tregister;override;
|
||
function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
|
||
procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
|
||
procedure a_call_reg(list:TAsmList;Reg:tregister);override;
|
||
{ General purpose instructions }
|
||
procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
|
||
procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
|
||
procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
|
||
procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
|
||
procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
|
||
procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
|
||
procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
|
||
{ move instructions }
|
||
procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
|
||
procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
|
||
procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
|
||
procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
|
||
procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
|
||
procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
|
||
procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
|
||
procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
|
||
{ fpu move instructions (not used, all floating point is vector unit-based) }
|
||
procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
|
||
procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
|
||
procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
|
||
procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
|
||
procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
|
||
procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
|
||
|
||
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
|
||
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
|
||
|
||
procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
|
||
|
||
procedure a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
|
||
{ comparison operations }
|
||
procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
|
||
procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
|
||
procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
|
||
procedure a_jmp_name(list: TAsmList; const s: string);override;
|
||
procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
|
||
procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
|
||
procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
|
||
procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
|
||
procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
|
||
procedure g_stackpointer_alloc(list: TAsmList; localsize: longint);override;
|
||
procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
|
||
procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
|
||
procedure g_maybe_got_init(list: TAsmList); override;
|
||
procedure g_restore_registers(list: TAsmList);override;
|
||
procedure g_save_registers(list: TAsmList);override;
|
||
procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
|
||
procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
|
||
procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
|
||
procedure g_profilecode(list: TAsmList);override;
|
||
private
|
||
function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
|
||
procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
||
end;
|
||
|
||
procedure create_codegen;
|
||
|
||
const
|
||
TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
|
||
A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
|
||
);
|
||
TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
|
||
A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
|
||
);
|
||
TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
|
||
C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
|
||
);
|
||
|
||
winstackpagesize = 4096;
|
||
|
||
implementation
|
||
|
||
uses
|
||
globals,verbose,systems,cutils,cclasses,
|
||
paramgr,fmodule,
|
||
symtable,symsym,
|
||
tgobj,
|
||
ncgutil,
|
||
procinfo,cpupi;
|
||
|
||
|
||
procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
|
||
var
|
||
href: treference;
|
||
so: tshifterop;
|
||
accesssize: longint;
|
||
begin
|
||
if (ref.base=NR_NO) then
|
||
begin
|
||
if ref.shiftmode<>SM_None then
|
||
internalerror(2014110701);
|
||
ref.base:=ref.index;
|
||
ref.index:=NR_NO;
|
||
end;
|
||
{ no abitrary scale factor support (the generic code doesn't set it,
|
||
AArch-specific code shouldn't either) }
|
||
if not(ref.scalefactor in [0,1]) then
|
||
internalerror(2014111002);
|
||
|
||
case simple_ref_type(op,size,oppostfix,ref) of
|
||
sr_simple:
|
||
exit;
|
||
sr_internal_illegal:
|
||
internalerror(2014121702);
|
||
sr_complex:
|
||
{ continue } ;
|
||
end;
|
||
|
||
if assigned(ref.symbol) then
|
||
begin
|
||
{ internal "load symbol" instructions should already be valid }
|
||
if assigned(ref.symboldata) or
|
||
(ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
|
||
internalerror(2014110802);
|
||
{ no relative symbol support (needed) yet }
|
||
if assigned(ref.relsymbol) then
|
||
internalerror(2014111001);
|
||
{ loading a symbol address (whether it's in the GOT or not) consists
|
||
of two parts: first load the page on which it is located, then
|
||
either the offset in the page or load the value at that offset in
|
||
the page. This final GOT-load can be relaxed by the linker in case
|
||
the variable itself can be stored directly in the GOT }
|
||
if (preferred_newbasereg=NR_NO) or
|
||
(ref.base=preferred_newbasereg) or
|
||
(ref.index=preferred_newbasereg) then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
{ load the (GOT) page }
|
||
reference_reset_symbol(href,ref.symbol,0,8,[]);
|
||
if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL,AT_DATA]) and
|
||
(ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
|
||
((ref.symbol.typ=AT_DATA) and
|
||
(ref.symbol.bind=AB_LOCAL)) or
|
||
(target_info.system=system_aarch64_win64) then
|
||
href.refaddr:=addr_page
|
||
else
|
||
href.refaddr:=addr_gotpage;
|
||
list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
|
||
{ load the GOT entry (= address of the variable) }
|
||
reference_reset_base(href,preferred_newbasereg,0,ctempposinvalid,sizeof(pint),[]);
|
||
href.symbol:=ref.symbol;
|
||
{ code symbols defined in the current compilation unit do not
|
||
have to be accessed via the GOT }
|
||
if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL,AT_DATA]) and
|
||
(ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
|
||
((ref.symbol.typ=AT_DATA) and
|
||
(ref.symbol.bind=AB_LOCAL)) or
|
||
(target_info.system=system_aarch64_win64) then
|
||
begin
|
||
href.base:=NR_NO;
|
||
href.refaddr:=addr_pageoffset;
|
||
list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
|
||
end
|
||
else
|
||
begin
|
||
href.refaddr:=addr_gotpageoffset;
|
||
{ use a_load_ref_reg() rather than directly encoding the LDR,
|
||
so that we'll check the validity of the reference }
|
||
a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
|
||
end;
|
||
{ set as new base register }
|
||
if ref.base=NR_NO then
|
||
ref.base:=preferred_newbasereg
|
||
else if ref.index=NR_NO then
|
||
ref.index:=preferred_newbasereg
|
||
else
|
||
begin
|
||
{ make sure it's valid in case ref.base is SP -> make it
|
||
the second operand}
|
||
a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
|
||
ref.base:=preferred_newbasereg
|
||
end;
|
||
ref.symbol:=nil;
|
||
end;
|
||
|
||
{ base & index }
|
||
if (ref.base<>NR_NO) and
|
||
(ref.index<>NR_NO) then
|
||
begin
|
||
case op of
|
||
A_LDR, A_STR:
|
||
begin
|
||
if (ref.shiftmode=SM_None) and
|
||
(ref.shiftimm<>0) then
|
||
internalerror(2014110805);
|
||
{ wrong shift? (possible in case of something like
|
||
array_of_2byte_rec[x].bytefield -> shift will be set 1, but
|
||
the final load is a 1 byte -> can't use shift after all }
|
||
if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
|
||
((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
|
||
(ref.offset<>0)) then
|
||
begin
|
||
if preferred_newbasereg=NR_NO then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
{ "add" supports a superset of the shift modes supported by
|
||
load/store instructions }
|
||
shifterop_reset(so);
|
||
so.shiftmode:=ref.shiftmode;
|
||
so.shiftimm:=ref.shiftimm;
|
||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
|
||
reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
|
||
{ possibly still an invalid offset -> fall through }
|
||
end
|
||
else if ref.offset<>0 then
|
||
begin
|
||
if (preferred_newbasereg=NR_NO) or
|
||
{ we keep ref.index, so it must not be overwritten }
|
||
(ref.index=preferred_newbasereg) then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
{ add to the base and not to the index, because the index
|
||
may be scaled; this works even if the base is SP }
|
||
a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
||
ref.offset:=0;
|
||
ref.base:=preferred_newbasereg;
|
||
{ finished }
|
||
exit;
|
||
end
|
||
else
|
||
{ valid -> exit }
|
||
exit;
|
||
end;
|
||
{ todo }
|
||
A_LD1,A_LD2,A_LD3,A_LD4,
|
||
A_ST1,A_ST2,A_ST3,A_ST4:
|
||
internalerror(2014110702);
|
||
{ these don't support base+index }
|
||
A_LDUR,A_STUR,
|
||
A_LDP,A_STP:
|
||
begin
|
||
{ these either don't support pre-/post-indexing, or don't
|
||
support it with base+index }
|
||
if ref.addressmode<>AM_OFFSET then
|
||
internalerror(2014110911);
|
||
if preferred_newbasereg=NR_NO then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
if ref.shiftmode<>SM_None then
|
||
begin
|
||
{ "add" supports a superset of the shift modes supported by
|
||
load/store instructions }
|
||
shifterop_reset(so);
|
||
so.shiftmode:=ref.shiftmode;
|
||
so.shiftimm:=ref.shiftimm;
|
||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
|
||
end
|
||
else
|
||
a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
|
||
reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
|
||
{ fall through to the handling of base + offset, since the
|
||
offset may still be too big }
|
||
end;
|
||
else
|
||
internalerror(2014110903);
|
||
end;
|
||
end;
|
||
|
||
{ base + offset }
|
||
if ref.base<>NR_NO then
|
||
begin
|
||
if ref.offset=0 then
|
||
exit;
|
||
|
||
{ valid offset for LDUR/STUR -> use that }
|
||
if (ref.addressmode=AM_OFFSET) and
|
||
(op in [A_LDR,A_STR]) and
|
||
(ref.offset>=-256) and
|
||
(ref.offset<=255) then
|
||
begin
|
||
if op=A_LDR then
|
||
op:=A_LDUR
|
||
else
|
||
op:=A_STUR
|
||
end
|
||
{ if it's not a valid LDUR/STUR, use LDR/STR }
|
||
else if (op in [A_LDUR,A_STUR]) and
|
||
((ref.offset<-256) or
|
||
(ref.offset>255) or
|
||
(ref.addressmode<>AM_OFFSET)) then
|
||
begin
|
||
if op=A_LDUR then
|
||
op:=A_LDR
|
||
else
|
||
op:=A_STR
|
||
end;
|
||
case op of
|
||
A_LDR,A_STR:
|
||
begin
|
||
case ref.addressmode of
|
||
AM_PREINDEXED:
|
||
begin
|
||
{ since the loaded/stored register cannot be the same
|
||
as the base register, we can safely add the
|
||
offset to the base if it doesn't fit}
|
||
if (ref.offset<-256) or
|
||
(ref.offset>255) then
|
||
begin
|
||
a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
|
||
ref.offset:=0;
|
||
end;
|
||
end;
|
||
AM_POSTINDEXED:
|
||
begin
|
||
{ cannot emulate post-indexing if we have to fold the
|
||
offset into the base register }
|
||
if (ref.offset<-256) or
|
||
(ref.offset>255) then
|
||
internalerror(2014110909);
|
||
{ ok }
|
||
end;
|
||
AM_OFFSET:
|
||
begin
|
||
{ unsupported offset -> fold into base register }
|
||
accesssize:=1 shl tcgsizep2size[size];
|
||
if (ref.offset<0) or
|
||
(ref.offset>(((1 shl 12)-1)*accesssize)) or
|
||
((ref.offset mod accesssize)<>0) then
|
||
begin
|
||
if preferred_newbasereg=NR_NO then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
{ can we split the offset beween an
|
||
"add/sub (imm12 shl 12)" and the load (also an
|
||
imm12)?
|
||
-- the offset from the load will always be added,
|
||
that's why the lower bound has a smaller range
|
||
than the upper bound; it must also be a multiple
|
||
of the access size }
|
||
if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
|
||
(ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
|
||
((ref.offset mod accesssize)=0) then
|
||
begin
|
||
a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
|
||
ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
|
||
end
|
||
else
|
||
begin
|
||
a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
||
ref.offset:=0;
|
||
end;
|
||
reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
|
||
end;
|
||
end
|
||
end;
|
||
end;
|
||
A_LDP,A_STP:
|
||
begin
|
||
{ unsupported offset -> fold into base register (these
|
||
instructions support all addressmodes) }
|
||
if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
|
||
(ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
|
||
begin
|
||
case ref.addressmode of
|
||
AM_POSTINDEXED:
|
||
{ don't emulate post-indexing if we have to fold the
|
||
offset into the base register }
|
||
internalerror(2014110910);
|
||
AM_PREINDEXED:
|
||
{ this means the offset must be added to the current
|
||
base register }
|
||
preferred_newbasereg:=ref.base;
|
||
AM_OFFSET:
|
||
if preferred_newbasereg=NR_NO then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
end;
|
||
a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
|
||
reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,ref.alignment,ref.volatility);
|
||
end
|
||
end;
|
||
A_LDUR,A_STUR:
|
||
begin
|
||
{ valid, checked above }
|
||
end;
|
||
{ todo }
|
||
A_LD1,A_LD2,A_LD3,A_LD4,
|
||
A_ST1,A_ST2,A_ST3,A_ST4:
|
||
internalerror(2014110908);
|
||
else
|
||
internalerror(2014110708);
|
||
end;
|
||
{ done }
|
||
exit;
|
||
end;
|
||
|
||
{ only an offset -> change to base (+ offset 0) }
|
||
if preferred_newbasereg=NR_NO then
|
||
preferred_newbasereg:=getaddressregister(list);
|
||
a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
|
||
reference_reset_base(ref,preferred_newbasereg,0,ref.temppos,newalignment(8,ref.offset),ref.volatility);
|
||
end;
|
||
|
||
|
||
function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
|
||
var
|
||
subreg:Tsubregister;
|
||
begin
|
||
subreg:=cgsize2subreg(getregtype(reg),size);
|
||
result:=reg;
|
||
setsubreg(result,subreg);
|
||
end;
|
||
|
||
|
||
function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
|
||
begin
|
||
internalerror(2014122110);
|
||
{ squash warning }
|
||
result:=NR_NO;
|
||
end;
|
||
|
||
|
||
function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
|
||
begin
|
||
make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
|
||
list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
|
||
result:=ref;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
|
||
var
|
||
instr: taicpu;
|
||
so: tshifterop;
|
||
hadtmpreg: boolean;
|
||
begin
|
||
{ imm12 }
|
||
if (a>=0) and
|
||
(a<=((1 shl 12)-1)) then
|
||
if usedest then
|
||
instr:=taicpu.op_reg_reg_const(op,dst,src,a)
|
||
else
|
||
instr:=taicpu.op_reg_const(op,src,a)
|
||
{ imm12 lsl 12 }
|
||
else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
|
||
begin
|
||
so.shiftmode:=SM_LSL;
|
||
so.shiftimm:=12;
|
||
if usedest then
|
||
instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
|
||
else
|
||
instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
|
||
end
|
||
else
|
||
begin
|
||
{ todo: other possible optimizations (e.g. load 16 bit constant in
|
||
register and then add/sub/cmp/cmn shifted the rest) }
|
||
if tmpreg=NR_NO then
|
||
begin
|
||
hadtmpreg:=false;
|
||
tmpreg:=getintregister(list,size);
|
||
end
|
||
else
|
||
begin
|
||
hadtmpreg:=true;
|
||
getcpuregister(list,tmpreg);
|
||
end;
|
||
a_load_const_reg(list,size,a,tmpreg);
|
||
if usedest then
|
||
instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
|
||
else
|
||
instr:=taicpu.op_reg_reg(op,src,tmpreg);
|
||
if hadtmpreg then
|
||
ungetcpuregister(list,tmpreg);
|
||
end;
|
||
if setflags then
|
||
setoppostfix(instr,PF_S);
|
||
list.concat(instr);
|
||
end;
|
||
|
||
|
||
{****************************************************************************
|
||
Assembler code
|
||
****************************************************************************}
|
||
|
||
procedure tcgaarch64.init_register_allocators;
|
||
begin
|
||
inherited init_register_allocators;
|
||
|
||
rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
|
||
[RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
|
||
RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
|
||
RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
|
||
{ maybe we can enable this in the future for leaf functions (it's
|
||
the frame pointer)
|
||
,RS_X29 }],
|
||
first_int_imreg,[]);
|
||
|
||
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
|
||
[RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
|
||
RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
|
||
RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
|
||
RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
|
||
first_mm_imreg,[]);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.done_register_allocators;
|
||
begin
|
||
rg[R_INTREGISTER].free;
|
||
rg[R_FPUREGISTER].free;
|
||
rg[R_MMREGISTER].free;
|
||
inherited done_register_allocators;
|
||
end;
|
||
|
||
|
||
function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
|
||
begin
|
||
case size of
|
||
OS_F32:
|
||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
|
||
OS_F64:
|
||
result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
|
||
else
|
||
internalerror(2014102701);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
|
||
begin
|
||
if not weak then
|
||
list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s,AT_FUNCTION)))
|
||
else
|
||
list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s,AT_FUNCTION)));
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
|
||
begin
|
||
list.concat(taicpu.op_reg(A_BLR,reg));
|
||
end;
|
||
|
||
|
||
{********************** load instructions ********************}
|
||
|
||
procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
|
||
var
|
||
opc: tasmop;
|
||
shift: byte;
|
||
so: tshifterop;
|
||
reginited,doinverted,extendedsize: boolean;
|
||
manipulated_a: tcgint;
|
||
leftover_a: word;
|
||
begin
|
||
{$ifdef extdebug}
|
||
list.concat(tai_comment.Create(strpnew('Generating constant ' + tostr(a) + ' / $' + hexstr(a, 16))));
|
||
{$endif extdebug}
|
||
extendedsize := (size in [OS_64,OS_S64]);
|
||
|
||
case a of
|
||
{ Small positive number }
|
||
$0..$FFFF:
|
||
begin
|
||
list.concat(taicpu.op_reg_const(A_MOVZ, reg, a));
|
||
Exit;
|
||
end;
|
||
{ Small negative number }
|
||
-65536..-1:
|
||
begin
|
||
list.concat(taicpu.op_reg_const(A_MOVN, reg, Word(not a)));
|
||
Exit;
|
||
end;
|
||
{ Can be represented as a negative number more compactly }
|
||
$FFFF0000..$FFFFFFFF:
|
||
begin
|
||
{ if we load a value into a 32 bit register, it is automatically
|
||
zero-extended to 64 bit }
|
||
list.concat(taicpu.op_reg_const(A_MOVN, makeregsize(reg,OS_32), Word(not a)));
|
||
Exit;
|
||
end;
|
||
else
|
||
begin
|
||
if not extendedsize then
|
||
{ Mostly so programmers don't get confused when they view the disassembly and
|
||
'a' is sign-extended to 64-bit, say, but also avoids potential problems with
|
||
third-party assemblers if the number is out of bounds for a given size }
|
||
a := Cardinal(a);
|
||
|
||
{ Check to see if a is a valid shifter constant that can be encoded in ORR as is }
|
||
if is_shifter_const(a,size) then
|
||
begin
|
||
{ Use synthetic "MOV" instruction instead of "ORR reg,wzr,#a" (an alias),
|
||
since AArch64 conventions prefer this, and it's clearer in the
|
||
disassembly }
|
||
list.concat(taicpu.op_reg_const(A_MOV,reg,a));
|
||
Exit;
|
||
end;
|
||
|
||
{ If the value of a fits into 32 bits, it's fastest to use movz/movk regardless }
|
||
if extendedsize and ((a shr 32) <> 0) then
|
||
begin
|
||
{ This determines whether this write can be performed with an ORR followed by MOVK
|
||
by copying the 3nd word to the 1st word for the ORR constant, then overwriting
|
||
the 1st word. The alternative would require 4 instructions. This sequence is
|
||
common when division reciprocals are calculated (e.g. 3 produces AAAAAAAAAAAAAAAB). }
|
||
leftover_a := word(a and $FFFF);
|
||
manipulated_a := (a and $FFFFFFFFFFFF0000) or ((a shr 32) and $FFFF);
|
||
{ if manipulated_a = a, don't check, because is_shifter_const was already
|
||
called for a and it returned False. Reduces processing time. [Kit] }
|
||
if (manipulated_a <> a) and is_shifter_const(manipulated_a, OS_64) then
|
||
begin
|
||
{ Encode value as:
|
||
orr reg,xzr,manipulated_a
|
||
movk reg,#(leftover_a)
|
||
|
||
Use "orr" instead of "mov" here for the assembly dump so it better
|
||
implies that something special is happening with the number arrangement.
|
||
}
|
||
list.concat(taicpu.op_reg_reg_const(A_ORR, reg, NR_XZR, manipulated_a));
|
||
list.concat(taicpu.op_reg_const(A_MOVK, reg, leftover_a));
|
||
Exit;
|
||
end;
|
||
|
||
{ This determines whether this write can be performed with an ORR followed by MOVK
|
||
by copying the 2nd word to the 4th word for the ORR constant, then overwriting
|
||
the 4th word. The alternative would require 3 instructions }
|
||
leftover_a := word(a shr 48);
|
||
manipulated_a := (a and $0000FFFFFFFFFFFF);
|
||
|
||
if manipulated_a = $0000FFFFFFFFFFFF then
|
||
begin
|
||
{ This is even better, as we can just use a single MOVN on the last word }
|
||
shifterop_reset(so);
|
||
so.shiftmode := SM_LSL;
|
||
so.shiftimm := 48;
|
||
list.concat(taicpu.op_reg_const_shifterop(A_MOVN, reg, word(not leftover_a), so));
|
||
Exit;
|
||
end;
|
||
|
||
manipulated_a := manipulated_a or (((a shr 16) and $FFFF) shl 48);
|
||
{ if manipulated_a = a, don't check, because is_shifter_const was already
|
||
called for a and it returned False. Reduces processing time. [Kit] }
|
||
if (manipulated_a <> a) and is_shifter_const(manipulated_a, OS_64) then
|
||
begin
|
||
{ Encode value as:
|
||
orr reg,xzr,manipulated_a
|
||
movk reg,#(leftover_a),lsl #48
|
||
|
||
Use "orr" instead of "mov" here for the assembly dump so it better
|
||
implies that something special is happening with the number arrangement.
|
||
}
|
||
list.concat(taicpu.op_reg_reg_const(A_ORR, reg, NR_XZR, manipulated_a));
|
||
shifterop_reset(so);
|
||
so.shiftmode := SM_LSL;
|
||
so.shiftimm := 48;
|
||
list.concat(taicpu.op_reg_const_shifterop(A_MOVK, reg, leftover_a, so));
|
||
Exit;
|
||
end;
|
||
|
||
case a of
|
||
{ If a is in the given negative range, it can be stored
|
||
more efficiently if it is inverted. }
|
||
TCgInt($FFFF000000000000)..-65537:
|
||
begin
|
||
{ NOTE: This excluded range can be more efficiently
|
||
stored as the first 16 bits followed by a shifter constant }
|
||
case a of
|
||
TCgInt($FFFF0000FFFF0000)..TCgInt($FFFF0000FFFFFFFF):
|
||
doinverted := False;
|
||
else
|
||
begin
|
||
doinverted := True;
|
||
a := not a;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
else
|
||
doinverted := False;
|
||
end;
|
||
end
|
||
else
|
||
doinverted:=False;
|
||
end;
|
||
end;
|
||
|
||
reginited:=false;
|
||
shift:=0;
|
||
|
||
if doinverted then
|
||
opc:=A_MOVN
|
||
else
|
||
opc:=A_MOVZ;
|
||
|
||
repeat
|
||
{ leftover is shifterconst? (don't check if we can represent it just
|
||
as effectively with movz/movk, as this check is expensive) }
|
||
if (word(a)<>0) then
|
||
begin
|
||
|
||
if not doinverted and
|
||
((shift<tcgsize2size[size]*(8 div 2)) and
|
||
((a shr 16)<>0)) and
|
||
is_shifter_const(a shl shift,size) then
|
||
begin
|
||
if reginited then
|
||
list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
|
||
else
|
||
list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
|
||
|
||
exit;
|
||
end;
|
||
|
||
{ set all 16 bit parts <> 0 }
|
||
if shift=0 then
|
||
begin
|
||
list.concat(taicpu.op_reg_const(opc,reg,word(a)));
|
||
reginited:=true;
|
||
end
|
||
else
|
||
begin
|
||
shifterop_reset(so);
|
||
so.shiftmode:=SM_LSL;
|
||
so.shiftimm:=shift;
|
||
if not reginited then
|
||
begin
|
||
list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
|
||
reginited:=true;
|
||
end
|
||
else
|
||
begin
|
||
if doinverted then
|
||
list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(not a),so))
|
||
else
|
||
list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(a),so));
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
a:=a shr 16;
|
||
inc(shift,16);
|
||
until a = 0;
|
||
|
||
if not reginited then
|
||
internalerror(2014102702);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
|
||
var
|
||
reg: tregister;
|
||
href: treference;
|
||
i: Integer;
|
||
begin
|
||
{ use the zero register if possible }
|
||
if a=0 then
|
||
begin
|
||
href:=ref;
|
||
inc(href.offset,tcgsize2size[size]-1);
|
||
if (tcgsize2size[size]>1) and (ref.alignment=1) and (simple_ref_type(A_STUR,OS_8,PF_None,ref)=sr_simple) and
|
||
(simple_ref_type(A_STUR,OS_8,PF_None,href)=sr_simple) then
|
||
begin
|
||
href:=ref;
|
||
for i:=0 to tcgsize2size[size]-1 do
|
||
begin
|
||
a_load_const_ref(list,OS_8,0,href);
|
||
inc(href.offset);
|
||
end;
|
||
end
|
||
else
|
||
begin
|
||
if size in [OS_64,OS_S64] then
|
||
reg:=NR_XZR
|
||
else
|
||
reg:=NR_WZR;
|
||
a_load_reg_ref(list,size,size,reg,ref);
|
||
end;
|
||
end
|
||
else
|
||
inherited;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
|
||
var
|
||
oppostfix:toppostfix;
|
||
hreg: tregister;
|
||
begin
|
||
if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
|
||
begin
|
||
fromsize:=tosize;
|
||
reg:=makeregsize(list,reg,fromsize);
|
||
end
|
||
{ have a 32 bit register but need a 64 bit one? }
|
||
else if tosize in [OS_64,OS_S64] then
|
||
begin
|
||
{ sign extend if necessary }
|
||
if fromsize in [OS_S8,OS_S16,OS_S32] then
|
||
begin
|
||
{ can't overwrite reg, may be a constant reg }
|
||
hreg:=getintregister(list,tosize);
|
||
a_load_reg_reg(list,fromsize,tosize,reg,hreg);
|
||
reg:=hreg;
|
||
end
|
||
else
|
||
{ top 32 bit are zero by default }
|
||
reg:=makeregsize(reg,OS_64);
|
||
fromsize:=tosize;
|
||
end;
|
||
if not(target_info.system=system_aarch64_darwin) and (ref.alignment<>0) and
|
||
(ref.alignment<tcgsize2size[tosize]) then
|
||
begin
|
||
a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
|
||
end
|
||
else
|
||
begin
|
||
case tosize of
|
||
{ signed integer registers }
|
||
OS_8,
|
||
OS_S8:
|
||
oppostfix:=PF_B;
|
||
OS_16,
|
||
OS_S16:
|
||
oppostfix:=PF_H;
|
||
OS_32,
|
||
OS_S32,
|
||
OS_64,
|
||
OS_S64:
|
||
oppostfix:=PF_None;
|
||
else
|
||
InternalError(200308299);
|
||
end;
|
||
handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
|
||
var
|
||
oppostfix:toppostfix;
|
||
begin
|
||
if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
|
||
fromsize:=tosize;
|
||
{ ensure that all bits of the 32/64 register are always correctly set:
|
||
* default behaviour is always to zero-extend to the entire (64 bit)
|
||
register -> unsigned 8/16/32 bit loads only exist with a 32 bit
|
||
target register, as the upper 32 bit will be zeroed implicitly
|
||
-> always make target register 32 bit
|
||
* signed loads exist both with 32 and 64 bit target registers,
|
||
depending on whether the value should be sign extended to 32 or
|
||
to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
|
||
corresponding 64 bit register are again zeroed) -> no need to
|
||
change anything (we only have 32 and 64 bit registers), except that
|
||
when loading an OS_S32 to a 32 bit register, we don't need/can't
|
||
use sign extension
|
||
}
|
||
if fromsize in [OS_8,OS_16,OS_32] then
|
||
reg:=makeregsize(reg,OS_32);
|
||
if not(target_info.system=system_aarch64_darwin) and (ref.alignment<>0) and
|
||
(ref.alignment<tcgsize2size[fromsize]) then
|
||
begin
|
||
a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
|
||
exit;
|
||
end;
|
||
case fromsize of
|
||
{ signed integer registers }
|
||
OS_8:
|
||
oppostfix:=PF_B;
|
||
OS_S8:
|
||
oppostfix:=PF_SB;
|
||
OS_16:
|
||
oppostfix:=PF_H;
|
||
OS_S16:
|
||
oppostfix:=PF_SH;
|
||
OS_S32:
|
||
if getsubreg(reg)=R_SUBD then
|
||
oppostfix:=PF_NONE
|
||
else
|
||
oppostfix:=PF_SW;
|
||
OS_32,
|
||
OS_64,
|
||
OS_S64:
|
||
oppostfix:=PF_None;
|
||
else
|
||
InternalError(200308297);
|
||
end;
|
||
handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
|
||
|
||
{ clear upper 16 bits if the value was negative }
|
||
if (fromsize=OS_S8) and (tosize=OS_16) then
|
||
a_load_reg_reg(list,fromsize,tosize,reg,reg);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
|
||
var
|
||
href: treference;
|
||
hreg1, hreg2, tmpreg,tmpreg2: tregister;
|
||
i : Integer;
|
||
begin
|
||
case fromsize of
|
||
OS_64,OS_S64:
|
||
begin
|
||
{ split into two 32 bit loads }
|
||
hreg1:=getintregister(list,OS_32);
|
||
hreg2:=getintregister(list,OS_32);
|
||
if target_info.endian=endian_big then
|
||
begin
|
||
tmpreg:=hreg1;
|
||
hreg1:=hreg2;
|
||
hreg2:=tmpreg;
|
||
end;
|
||
{ can we use LDP? }
|
||
if (ref.alignment=4) and
|
||
(simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
|
||
list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
|
||
else
|
||
begin
|
||
a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
|
||
href:=ref;
|
||
inc(href.offset,4);
|
||
a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
|
||
end;
|
||
a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
|
||
list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
|
||
end;
|
||
OS_16,OS_S16,
|
||
OS_32,OS_S32:
|
||
begin
|
||
if ref.alignment=2 then
|
||
begin
|
||
href:=ref;
|
||
if target_info.endian=endian_big then
|
||
inc(href.offset,tcgsize2size[fromsize]-2);
|
||
tmpreg:=getintregister(list,OS_32);
|
||
a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
|
||
tmpreg2:=getintregister(list,OS_32);
|
||
for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
|
||
begin
|
||
if target_info.endian=endian_big then
|
||
dec(href.offset,2)
|
||
else
|
||
inc(href.offset,2);
|
||
a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
|
||
list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
|
||
end;
|
||
a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
|
||
end
|
||
else
|
||
begin
|
||
href:=ref;
|
||
if target_info.endian=endian_big then
|
||
inc(href.offset,tcgsize2size[fromsize]-1);
|
||
tmpreg:=getintregister(list,OS_32);
|
||
a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
|
||
tmpreg2:=getintregister(list,OS_32);
|
||
for i:=1 to tcgsize2size[fromsize]-1 do
|
||
begin
|
||
if target_info.endian=endian_big then
|
||
dec(href.offset)
|
||
else
|
||
inc(href.offset);
|
||
a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
|
||
list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
|
||
end;
|
||
if (tosize in [OS_S8,OS_S16]) then
|
||
list.concat(taicpu.op_reg_reg(A_SXTH,tmpreg,tmpreg));
|
||
a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
|
||
end;
|
||
end;
|
||
else
|
||
inherited;
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
|
||
var
|
||
instr: taicpu;
|
||
begin
|
||
{ we use both 32 and 64 bit registers -> insert conversion when when
|
||
we have to truncate/sign extend inside the (32 or 64 bit) register
|
||
holding the value, and when we sign extend from a 32 to a 64 bit
|
||
register }
|
||
if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
|
||
((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
|
||
(fromsize<>tosize) and
|
||
not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
|
||
((fromsize in [OS_S8,OS_S16,OS_S32]) and
|
||
(tosize in [OS_64,OS_S64])) or
|
||
{ needs to mask out the sign in the top 16 bits }
|
||
((fromsize=OS_S8) and
|
||
(tosize=OS_16)) then
|
||
begin
|
||
case tosize of
|
||
OS_8:
|
||
list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_32)));
|
||
OS_16:
|
||
list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_32)));
|
||
OS_S8:
|
||
list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
|
||
OS_S16:
|
||
list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
|
||
{ while "mov wN, wM" automatically inserts a zero-extension and
|
||
hence we could encode a 64->32 bit move like that, the problem
|
||
is that we then can't distinguish 64->32 from 32->32 moves, and
|
||
the 64->32 truncation could be removed altogether... So use a
|
||
different instruction }
|
||
OS_32,
|
||
OS_S32:
|
||
{ in theory, reg1 should be 64 bit here (since fromsize>tosize),
|
||
but because of the way location_force_register() tries to
|
||
avoid superfluous zero/sign extensions, it's not always the
|
||
case -> also force reg1 to to 64 bit }
|
||
list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
|
||
OS_64,
|
||
OS_S64:
|
||
case fromsize of
|
||
OS_8:
|
||
list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_64)));
|
||
OS_S8:
|
||
list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
|
||
OS_16:
|
||
list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_64)));
|
||
OS_S16:
|
||
list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
|
||
OS_32:
|
||
list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
|
||
OS_S32:
|
||
list.concat(taicpu.op_reg_reg(A_SXTW,reg2,makeregsize(reg1,OS_32)));
|
||
else
|
||
internalerror(2024070701);
|
||
end;
|
||
else
|
||
internalerror(2002090901);
|
||
end;
|
||
end
|
||
else
|
||
begin
|
||
{ 32 -> 32 bit move implies zero extension (sign extensions have
|
||
been handled above) -> also use for 32 <-> 64 bit moves }
|
||
if not(fromsize in [OS_64,OS_S64]) or
|
||
not(tosize in [OS_64,OS_S64]) then
|
||
instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
|
||
else
|
||
instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
|
||
list.Concat(instr);
|
||
{ Notify the register allocator that we have written a move instruction so
|
||
it can try to eliminate it. }
|
||
add_move_instruction(instr);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
|
||
var
|
||
href: treference;
|
||
so: tshifterop;
|
||
op: tasmop;
|
||
begin
|
||
op:=A_LDR;
|
||
href:=ref;
|
||
{ simplify as if we're going to perform a regular 64 bit load, using
|
||
"r" as the new base register if possible/necessary }
|
||
make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
|
||
{ load literal? }
|
||
if assigned(href.symbol) then
|
||
begin
|
||
if (href.base<>NR_NO) or
|
||
(href.index<>NR_NO) or
|
||
not assigned(href.symboldata) then
|
||
internalerror(2014110912);
|
||
list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
|
||
end
|
||
else
|
||
begin
|
||
if href.index<>NR_NO then
|
||
begin
|
||
if href.shiftmode<>SM_None then
|
||
begin
|
||
{ "add" supports a supperset of the shift modes supported by
|
||
load/store instructions }
|
||
shifterop_reset(so);
|
||
so.shiftmode:=href.shiftmode;
|
||
so.shiftimm:=href.shiftimm;
|
||
list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
|
||
end
|
||
else
|
||
a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
|
||
end
|
||
else if href.offset<>0 then
|
||
a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
|
||
else
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
|
||
begin
|
||
internalerror(2014122107)
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
|
||
begin
|
||
internalerror(2014122108)
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
|
||
begin
|
||
internalerror(2014122109)
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
|
||
var
|
||
instr: taicpu;
|
||
begin
|
||
if assigned(shuffle) and
|
||
not shufflescalar(shuffle) then
|
||
internalerror(2014122104);
|
||
if fromsize=tosize then
|
||
begin
|
||
instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
|
||
{ Notify the register allocator that we have written a move
|
||
instruction so it can try to eliminate it. }
|
||
add_move_instruction(instr);
|
||
{ FMOV cannot generate a floating point exception }
|
||
end
|
||
else
|
||
begin
|
||
if (reg_cgsize(reg1)<>fromsize) or
|
||
(reg_cgsize(reg2)<>tosize) then
|
||
internalerror(2014110913);
|
||
instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
|
||
maybe_check_for_fpu_exception(list);
|
||
end;
|
||
list.Concat(instr);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
|
||
var
|
||
tmpreg: tregister;
|
||
begin
|
||
if assigned(shuffle) and
|
||
not shufflescalar(shuffle) then
|
||
internalerror(2014122105);
|
||
tmpreg:=NR_NO;
|
||
if (fromsize<>tosize) then
|
||
begin
|
||
tmpreg:=reg;
|
||
reg:=getmmregister(list,fromsize);
|
||
end;
|
||
handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
|
||
if (fromsize<>tosize) then
|
||
a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
|
||
var
|
||
tmpreg: tregister;
|
||
begin
|
||
if assigned(shuffle) and
|
||
not shufflescalar(shuffle) then
|
||
internalerror(2014122106);
|
||
if (fromsize<>tosize) then
|
||
begin
|
||
tmpreg:=getmmregister(list,tosize);
|
||
a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
|
||
reg:=tmpreg;
|
||
end;
|
||
handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
|
||
begin
|
||
if not shufflescalar(shuffle) then
|
||
internalerror(2014122801);
|
||
if tcgsize2size[fromsize]<>tcgsize2size[tosize] then
|
||
internalerror(2014122803);
|
||
case tcgsize2size[tosize] of
|
||
4:
|
||
setsubreg(mmreg,R_SUBMMS);
|
||
8:
|
||
setsubreg(mmreg,R_SUBMMD);
|
||
else
|
||
internalerror(2020101310);
|
||
end;
|
||
list.concat(taicpu.op_indexedreg_reg(A_INS,mmreg,0,intreg));
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
|
||
var
|
||
r : tregister;
|
||
begin
|
||
if not shufflescalar(shuffle) then
|
||
internalerror(2014122802);
|
||
if tcgsize2size[fromsize]>tcgsize2size[tosize] then
|
||
internalerror(2014122804);
|
||
case tcgsize2size[fromsize] of
|
||
4:
|
||
setsubreg(mmreg,R_SUBMMS);
|
||
8:
|
||
setsubreg(mmreg,R_SUBMMD);
|
||
else
|
||
internalerror(2020101311);
|
||
end;
|
||
if tcgsize2size[fromsize]<tcgsize2size[tosize] then
|
||
r:=makeregsize(intreg,fromsize)
|
||
else
|
||
r:=intreg;
|
||
list.concat(taicpu.op_reg_reg(A_FMOV,r,mmreg));
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
|
||
begin
|
||
case op of
|
||
{ "xor Vx,Vx" is used to initialize global regvars to 0 }
|
||
OP_XOR:
|
||
begin
|
||
if shuffle=nil then
|
||
begin
|
||
dst:=newreg(R_MMREGISTER,getsupreg(dst),R_SUBMM16B);
|
||
src:=newreg(R_MMREGISTER,getsupreg(src),R_SUBMM16B);
|
||
list.concat(taicpu.op_reg_reg_reg(A_EOR,dst,dst,src))
|
||
end
|
||
else if (src<>dst) or
|
||
(reg_cgsize(src)<>size) or
|
||
assigned(shuffle) then
|
||
internalerror(2015011401)
|
||
else
|
||
case size of
|
||
OS_F32,
|
||
OS_F64:
|
||
list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
|
||
else
|
||
internalerror(2015011402);
|
||
end;
|
||
end
|
||
else
|
||
internalerror(2015011403);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse,not_zero: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
|
||
var
|
||
bitsize: longint;
|
||
begin
|
||
if srcsize in [OS_64,OS_S64] then
|
||
bitsize:=64
|
||
else
|
||
bitsize:=32;
|
||
if not(not_zero) then
|
||
{ source is 0 -> dst will have to become 255 }
|
||
list.concat(taicpu.op_reg_const(A_CMP,src,0));
|
||
if reverse then
|
||
begin
|
||
list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
|
||
{ xor 31/63 is the same as setting the lower 5/6 bits to
|
||
"31/63-(lower 5/6 bits of dst)" }
|
||
list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
|
||
end
|
||
else
|
||
begin
|
||
list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
|
||
list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
|
||
end;
|
||
{ set dst to -1 if src was 0 }
|
||
if not(not_zero) then
|
||
begin
|
||
list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
|
||
{ mask the -1 to 255 if src was 0 (anyone find a two-instruction
|
||
branch-free version? All of mine are 3...) }
|
||
list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32)));
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
|
||
var
|
||
href: treference;
|
||
hreg1, hreg2, tmpreg: tregister;
|
||
begin
|
||
if fromsize in [OS_64,OS_S64] then
|
||
begin
|
||
{ split into two 32 bit stores }
|
||
hreg1:=getintregister(list,OS_32);
|
||
hreg2:=getintregister(list,OS_32);
|
||
a_load_reg_reg(list,OS_32,OS_32,makeregsize(register,OS_32),hreg1);
|
||
a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
|
||
if target_info.endian=endian_big then
|
||
begin
|
||
tmpreg:=hreg1;
|
||
hreg1:=hreg2;
|
||
hreg2:=tmpreg;
|
||
end;
|
||
{ can we use STP? }
|
||
if (ref.alignment=4) and
|
||
(simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
|
||
list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
|
||
else
|
||
begin
|
||
a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
|
||
href:=ref;
|
||
inc(href.offset,4);
|
||
a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
|
||
end;
|
||
end
|
||
else
|
||
inherited;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
|
||
const
|
||
overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
|
||
begin
|
||
if (op in overflowops) and
|
||
(size in [OS_8,OS_S8,OS_16,OS_S16]) then
|
||
a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
|
||
begin
|
||
optimize_op_const(size,op,a);
|
||
case op of
|
||
OP_NONE:
|
||
exit;
|
||
OP_MOVE:
|
||
a_load_const_reg(list,size,a,reg);
|
||
OP_NEG,OP_NOT:
|
||
internalerror(200306011);
|
||
else
|
||
a_op_const_reg_reg(list,op,size,a,reg,reg);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
|
||
begin
|
||
Case op of
|
||
OP_NEG,
|
||
OP_NOT:
|
||
begin
|
||
if (op=OP_NOT) and (size in [OS_8,OS_S8]) then
|
||
list.concat(taicpu.op_reg_reg_const(A_EOR,dst,src,255))
|
||
else
|
||
begin
|
||
list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
|
||
maybeadjustresult(list,op,size,dst);
|
||
end;
|
||
end
|
||
else
|
||
a_op_reg_reg_reg(list,op,size,src,dst,dst);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
|
||
var
|
||
l: tlocation;
|
||
begin
|
||
a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
|
||
var
|
||
hreg: tregister;
|
||
begin
|
||
{ no ROLV opcode... }
|
||
if op=OP_ROL then
|
||
begin
|
||
case size of
|
||
OS_32,OS_S32,
|
||
OS_64,OS_S64:
|
||
begin
|
||
hreg:=getintregister(list,size);
|
||
a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
|
||
a_op_reg_reg(list,OP_SUB,size,src1,hreg);
|
||
a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
|
||
exit;
|
||
end;
|
||
else
|
||
internalerror(2014111005);
|
||
end;
|
||
end
|
||
else if (op=OP_ROR) and
|
||
not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
|
||
internalerror(2014111006);
|
||
if TOpCG2AsmOpReg[op]=A_NONE then
|
||
internalerror(2014111007);
|
||
list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
|
||
maybeadjustresult(list,op,size,dst);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
|
||
var
|
||
shiftcountmask: longint;
|
||
constreg: tregister;
|
||
begin
|
||
{ add/sub instructions have only positive immediate operands }
|
||
if (op in [OP_ADD,OP_SUB]) and
|
||
(a<0) and
|
||
{ this might result in a false positive overflow in case of a+0 }
|
||
(a<>$8000000000000000) then
|
||
begin
|
||
if op=OP_ADD then
|
||
op:=op_SUB
|
||
else
|
||
op:=OP_ADD;
|
||
{ avoid range/overflow error in case a = low(tcgint) }
|
||
{$push}{$r-}{$q-}
|
||
a:=-a;
|
||
{$pop}
|
||
end;
|
||
ovloc.loc:=LOC_VOID;
|
||
optimize_op_const(size,op,a);
|
||
case op of
|
||
OP_NONE:
|
||
begin
|
||
a_load_reg_reg(list,size,size,src,dst);
|
||
exit;
|
||
end;
|
||
OP_MOVE:
|
||
begin
|
||
a_load_const_reg(list,size,a,dst);
|
||
exit;
|
||
end;
|
||
else
|
||
;
|
||
end;
|
||
case op of
|
||
OP_ADD,
|
||
OP_SUB:
|
||
begin
|
||
handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
|
||
{ on a 64 bit target, overflows with smaller data types
|
||
are handled via range errors }
|
||
if setflags and
|
||
(size in [OS_64,OS_S64]) then
|
||
begin
|
||
location_reset(ovloc,LOC_FLAGS,OS_8);
|
||
if size=OS_64 then
|
||
if op=OP_ADD then
|
||
ovloc.resflags:=F_CS
|
||
else
|
||
ovloc.resflags:=F_CC
|
||
else
|
||
ovloc.resflags:=F_VS;
|
||
end;
|
||
end;
|
||
OP_OR,
|
||
OP_AND,
|
||
OP_XOR:
|
||
begin
|
||
if not(size in [OS_64,OS_S64]) then
|
||
a:=cardinal(a);
|
||
if is_shifter_const(a,size) then
|
||
list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
|
||
else
|
||
begin
|
||
constreg:=getintregister(list,size);
|
||
a_load_const_reg(list,size,a,constreg);
|
||
a_op_reg_reg_reg(list,op,size,constreg,src,dst);
|
||
end;
|
||
end;
|
||
OP_SHL,
|
||
OP_SHR,
|
||
OP_SAR:
|
||
begin
|
||
if size in [OS_64,OS_S64] then
|
||
shiftcountmask:=63
|
||
else
|
||
shiftcountmask:=31;
|
||
if (a and shiftcountmask)<>0 Then
|
||
list.concat(taicpu.op_reg_reg_const(
|
||
TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
|
||
else
|
||
a_load_reg_reg(list,size,size,src,dst);
|
||
if (a and not(tcgint(shiftcountmask)))<>0 then
|
||
internalError(2014112101);
|
||
end;
|
||
OP_ROL,
|
||
OP_ROR:
|
||
begin
|
||
case size of
|
||
OS_32,OS_S32:
|
||
if (a and not(tcgint(31)))<>0 then
|
||
internalError(2014112102);
|
||
OS_64,OS_S64:
|
||
if (a and not(tcgint(63)))<>0 then
|
||
internalError(2014112103);
|
||
else
|
||
internalError(2014112104);
|
||
end;
|
||
{ there's only a ror opcode }
|
||
if op=OP_ROL then
|
||
a:=(tcgsize2size[size]*8)-a;
|
||
list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
|
||
end;
|
||
OP_MUL,
|
||
OP_IMUL,
|
||
OP_DIV,
|
||
OP_IDIV:
|
||
begin
|
||
constreg:=getintregister(list,size);
|
||
a_load_const_reg(list,size,a,constreg);
|
||
a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
|
||
end;
|
||
else
|
||
internalerror(2014111403);
|
||
end;
|
||
maybeadjustresult(list,op,size,dst);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
|
||
var
|
||
tmpreg1, tmpreg2: tregister;
|
||
begin
|
||
ovloc.loc:=LOC_VOID;
|
||
{ overflow can only occur with 64 bit calculations on 64 bit cpus }
|
||
if setflags and
|
||
(size in [OS_64,OS_S64]) then
|
||
begin
|
||
case op of
|
||
OP_ADD,
|
||
OP_SUB:
|
||
begin
|
||
list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
|
||
ovloc.loc:=LOC_FLAGS;
|
||
if size=OS_64 then
|
||
if op=OP_ADD then
|
||
ovloc.resflags:=F_CS
|
||
else
|
||
ovloc.resflags:=F_CC
|
||
else
|
||
ovloc.resflags:=F_VS;
|
||
{ finished }
|
||
exit;
|
||
end;
|
||
OP_MUL:
|
||
begin
|
||
{ check whether the upper 64 bit of the 128 bit product is 0 }
|
||
tmpreg1:=getintregister(list,OS_64);
|
||
list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
|
||
list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
|
||
ovloc.loc:=LOC_FLAGS;
|
||
ovloc.resflags:=F_NE;
|
||
{ still have to perform the actual multiplication }
|
||
end;
|
||
OP_IMUL:
|
||
begin
|
||
{ check whether the upper 64 bits of the 128 bit multiplication
|
||
result have the same value as the replicated sign bit of the
|
||
lower 64 bits }
|
||
tmpreg1:=getintregister(list,OS_64);
|
||
list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
|
||
{ calculate lower 64 bits (afterwards, because dst may be
|
||
equal to src1 or src2) }
|
||
a_op_reg_reg_reg(list,op,size,src1,src2,dst);
|
||
{ replicate sign bit }
|
||
tmpreg2:=getintregister(list,OS_64);
|
||
a_op_const_reg_reg(list,OP_SAR,OS_S64,63,dst,tmpreg2);
|
||
list.concat(taicpu.op_reg_reg(A_CMP,tmpreg1,tmpreg2));
|
||
ovloc.loc:=LOC_FLAGS;
|
||
ovloc.resflags:=F_NE;
|
||
{ finished }
|
||
exit;
|
||
end;
|
||
OP_IDIV,
|
||
OP_DIV:
|
||
begin
|
||
{ not handled here, needs div-by-zero check (dividing by zero
|
||
just gives a 0 result on aarch64), and low(int64) div -1
|
||
check for overflow) }
|
||
internalerror(2014122101);
|
||
end;
|
||
else
|
||
internalerror(2019050936);
|
||
end;
|
||
end;
|
||
a_op_reg_reg_reg(list,op,size,src1,src2,dst);
|
||
end;
|
||
|
||
|
||
|
||
{*************** compare instructructions ****************}
|
||
|
||
procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
|
||
var
|
||
op: tasmop;
|
||
begin
|
||
if a>=0 then
|
||
op:=A_CMP
|
||
else
|
||
op:=A_CMN;
|
||
{ avoid range/overflow error in case a=low(tcgint) }
|
||
{$push}{$r-}{$q-}
|
||
handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
|
||
{$pop}
|
||
a_jmp_cond(list,cmp_op,l);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
|
||
begin
|
||
list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
|
||
a_jmp_cond(list,cmp_op,l);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
|
||
var
|
||
ai: taicpu;
|
||
begin
|
||
ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name,AT_FUNCTION));
|
||
ai.is_jmp:=true;
|
||
list.Concat(ai);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
|
||
var
|
||
ai: taicpu;
|
||
begin
|
||
ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s,AT_FUNCTION));
|
||
ai.is_jmp:=true;
|
||
list.Concat(ai);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
|
||
var
|
||
ai: taicpu;
|
||
begin
|
||
ai:=TAiCpu.op_sym(A_B,l);
|
||
ai.is_jmp:=true;
|
||
ai.SetCondition(TOpCmp2AsmCond[cond]);
|
||
list.Concat(ai);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
|
||
var
|
||
ai : taicpu;
|
||
begin
|
||
ai:=Taicpu.op_sym(A_B,l);
|
||
ai.is_jmp:=true;
|
||
ai.SetCondition(flags_to_cond(f));
|
||
list.Concat(ai);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
|
||
begin
|
||
list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
|
||
begin
|
||
{ we need an explicit overflow location, because there are many
|
||
possibilities (not just the overflow flag, which is only used for
|
||
signed add/sub) }
|
||
internalerror(2014112303);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
|
||
var
|
||
hl : tasmlabel;
|
||
hflags : tresflags;
|
||
begin
|
||
if not(cs_check_overflow in current_settings.localswitches) then
|
||
exit;
|
||
current_asmdata.getjumplabel(hl);
|
||
case ovloc.loc of
|
||
LOC_FLAGS:
|
||
begin
|
||
hflags:=ovloc.resflags;
|
||
inverse_flags(hflags);
|
||
cg.a_jmp_flags(list,hflags,hl);
|
||
end;
|
||
else
|
||
internalerror(2014112304);
|
||
end;
|
||
a_call_name(list,'FPC_OVERFLOW',false);
|
||
a_label(list,hl);
|
||
end;
|
||
|
||
{ *********** entry/exit code and address loading ************ }
|
||
|
||
function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
|
||
var
|
||
ref: treference;
|
||
sr: tsuperregister;
|
||
pairreg: tregister;
|
||
sehreg,sehregp : TAsmSehDirective;
|
||
begin
|
||
result:=0;
|
||
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
|
||
ref.addressmode:=AM_PREINDEXED;
|
||
pairreg:=NR_NO;
|
||
{ for SEH on Win64 we can only store consecutive register pairs, others
|
||
need to be stored with STR }
|
||
if target_info.system=system_aarch64_win64 then
|
||
begin
|
||
if rt=R_INTREGISTER then
|
||
begin
|
||
sehreg:=ash_savereg_x;
|
||
sehregp:=ash_saveregp_x;
|
||
end
|
||
else if rt=R_MMREGISTER then
|
||
begin
|
||
sehreg:=ash_savefreg_x;
|
||
sehregp:=ash_savefregp_x;
|
||
end
|
||
else
|
||
internalerror(2020041304);
|
||
for sr:=lowsr to highsr do
|
||
if sr in rg[rt].used_in_proc then
|
||
if pairreg=NR_NO then
|
||
pairreg:=newreg(rt,sr,sub)
|
||
else
|
||
begin
|
||
inc(result,16);
|
||
if getsupreg(pairreg)=sr-1 then
|
||
begin
|
||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
|
||
list.concat(cai_seh_directive.create_reg_offset(sehregp,pairreg,16));
|
||
pairreg:=NR_NO;
|
||
end
|
||
else
|
||
begin
|
||
list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
|
||
list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
|
||
pairreg:=newreg(rt,sr,sub);
|
||
end;
|
||
end;
|
||
if pairreg<>NR_NO then
|
||
begin
|
||
inc(result,16);
|
||
list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
|
||
list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
|
||
end;
|
||
end
|
||
else
|
||
begin
|
||
{ store all used registers pairwise }
|
||
for sr:=lowsr to highsr do
|
||
if sr in rg[rt].used_in_proc then
|
||
if pairreg=NR_NO then
|
||
pairreg:=newreg(rt,sr,sub)
|
||
else
|
||
begin
|
||
inc(result,16);
|
||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
|
||
pairreg:=NR_NO
|
||
end;
|
||
{ one left -> store twice (stack must be 16 bytes aligned) }
|
||
if pairreg<>NR_NO then
|
||
begin
|
||
list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
|
||
inc(result,16);
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure FixupOffsets(p:TObject;arg:pointer);
|
||
var
|
||
sym: tabstractnormalvarsym absolute p;
|
||
begin
|
||
if (tsym(p).typ in [paravarsym,localvarsym]) and
|
||
(sym.localloc.loc=LOC_REFERENCE) and
|
||
(sym.localloc.reference.base=NR_STACK_POINTER_REG) then
|
||
begin
|
||
sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
|
||
dec(sym.localloc.reference.offset,PLongint(arg)^);
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_stackpointer_alloc(list : TAsmList;localsize : longint);
|
||
var
|
||
href : treference;
|
||
i : integer;
|
||
again : tasmlabel;
|
||
begin
|
||
if localsize>0 then
|
||
begin
|
||
{ windows guards only a few pages for stack growing,
|
||
so we have to access every page first }
|
||
if (target_info.system=system_aarch64_win64) and
|
||
(localsize>=winstackpagesize) then
|
||
begin
|
||
if localsize div winstackpagesize<=4 then
|
||
begin
|
||
handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
|
||
for i:=1 to localsize div winstackpagesize do
|
||
begin
|
||
reference_reset_base(href,NR_SP,localsize-i*winstackpagesize+4,ctempposinvalid,4,[]);
|
||
list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
|
||
end;
|
||
reference_reset_base(href,NR_SP,0,ctempposinvalid,4,[]);
|
||
list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
|
||
end
|
||
else
|
||
begin
|
||
current_asmdata.getjumplabel(again);
|
||
getcpuregister(list,NR_IP0);
|
||
a_load_const_reg(list,OS_ADDR,localsize div winstackpagesize,NR_IP0);
|
||
a_label(list,again);
|
||
handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,winstackpagesize,NR_SP,NR_IP1,false,true);
|
||
reference_reset_base(href,NR_SP,0,ctempposinvalid,4,[]);
|
||
list.concat(Taicpu.op_reg_ref(A_STR,NR_WZR,href));
|
||
list.concat(setoppostfix(Taicpu.op_reg_reg_const(A_SUB,NR_IP0,NR_IP0,1),PF_S));
|
||
a_jmp_cond(list,OC_NE,again);
|
||
handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize mod winstackpagesize,NR_SP,NR_IP1,false,true);
|
||
ungetcpuregister(list,NR_IP0);
|
||
end
|
||
end
|
||
else
|
||
begin
|
||
handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
|
||
if target_info.system=system_aarch64_win64 then
|
||
list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
|
||
var
|
||
hitem: tlinkedlistitem;
|
||
seh_proc: tai_seh_directive;
|
||
templist: TAsmList;
|
||
genloadframeforexcept,
|
||
suppress_endprologue: boolean;
|
||
ref: treference;
|
||
totalstackframesize: longint;
|
||
begin
|
||
{ on aarch64, we need to store the link register and the generate a frame pointer if the subroutine either
|
||
- receives parameters on the stack
|
||
- is not a leaf procedure
|
||
- has nested procedures
|
||
- helpers retrieve the stack pointer
|
||
}
|
||
|
||
hitem:=list.last;
|
||
{ pi_has_unwind_info may already be set at this point if there are
|
||
SEH directives in assembler body. In this case, .seh_endprologue
|
||
is expected to be one of those directives, and not generated here. }
|
||
suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
|
||
genloadframeforexcept:=false;
|
||
|
||
if not nostackframe then
|
||
begin
|
||
{ stack pointer has to be aligned to 16 bytes at all times }
|
||
localsize:=align(localsize,16);
|
||
|
||
if target_info.system=system_aarch64_win64 then
|
||
include(current_procinfo.flags,pi_has_unwind_info);
|
||
if not(pi_no_framepointer_needed in current_procinfo.flags) then
|
||
begin
|
||
{ save stack pointer and return address }
|
||
reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
|
||
ref.addressmode:=AM_PREINDEXED;
|
||
list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
|
||
current_asmdata.asmcfi.cfa_def_cfa_offset(list,2*sizeof(pint));
|
||
current_asmdata.asmcfi.cfa_offset(list,NR_FP,-16);
|
||
current_asmdata.asmcfi.cfa_offset(list,NR_LR,-8);
|
||
if target_info.system=system_aarch64_win64 then
|
||
list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
|
||
{ initialise frame pointer }
|
||
if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
|
||
begin
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
|
||
current_asmdata.asmcfi.cfa_def_cfa_register(list,NR_FP);
|
||
if target_info.system=system_aarch64_win64 then
|
||
list.concat(cai_seh_directive.create(ash_setfp));
|
||
end
|
||
else
|
||
begin
|
||
{ do this after the prologue is done for aarch64-win64 as
|
||
there is no SEH directive for setting FP to a register }
|
||
if target_info.system<>system_aarch64_win64 then
|
||
gen_load_frame_for_exceptfilter(list)
|
||
else
|
||
genloadframeforexcept:=true;
|
||
localsize:=current_procinfo.maxpushedparasize;
|
||
end;
|
||
end;
|
||
|
||
totalstackframesize:=localsize;
|
||
{ save modified integer registers }
|
||
inc(totalstackframesize,
|
||
save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
|
||
{ only the lower 64 bits of the modified vector registers need to be
|
||
saved; if the caller needs the upper 64 bits, it has to save them
|
||
itself }
|
||
inc(totalstackframesize,
|
||
save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
|
||
|
||
{ allocate stack space }
|
||
if localsize<>0 then
|
||
begin
|
||
localsize:=align(localsize,16);
|
||
current_procinfo.final_localsize:=localsize;
|
||
g_stackpointer_alloc(list,localsize);
|
||
end;
|
||
{ By default, we use the frame pointer to access parameters passed via
|
||
the stack and the stack pointer to address local variables and temps
|
||
because
|
||
a) we can use bigger positive than negative offsets (so accessing
|
||
locals via negative offsets from the frame pointer would be less
|
||
efficient)
|
||
b) we don't know the local size while generating the code, so
|
||
accessing the parameters via the stack pointer is not possible
|
||
without copying them
|
||
The problem with this is the get_frame() intrinsic:
|
||
a) it must return the same value as what we pass as parentfp
|
||
parameter, since that's how it's used in the TP-style objects unit
|
||
b) its return value must usable to access all local data from a
|
||
routine (locals and parameters), since it's all the nested
|
||
routines have access to
|
||
c) its return value must be usable to construct a backtrace, as it's
|
||
also used by the exception handling routines
|
||
|
||
The solution we use here, based on something similar that's done in
|
||
the MIPS port, is to generate all accesses to locals in the routine
|
||
itself SP-relative, and then after the code is generated and the local
|
||
size is known (namely, here), we change all SP-relative variables/
|
||
parameters into FP-relative ones. This means that they'll be accessed
|
||
less efficiently from nested routines, but those accesses are indirect
|
||
anyway and at least this way they can be accessed at all
|
||
}
|
||
if current_procinfo.has_nestedprocs or
|
||
(
|
||
(target_info.system=system_aarch64_win64) and
|
||
(current_procinfo.flags*[pi_has_implicit_finally,pi_needs_implicit_finally,pi_uses_exceptions]<>[])
|
||
) then
|
||
begin
|
||
current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
|
||
current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
|
||
end;
|
||
end;
|
||
|
||
if not (pi_has_unwind_info in current_procinfo.flags) then
|
||
begin
|
||
if genloadframeforexcept then
|
||
gen_load_frame_for_exceptfilter(list);
|
||
exit;
|
||
end;
|
||
|
||
{ Generate unwind data for aarch64-win64 }
|
||
seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
|
||
if assigned(hitem) then
|
||
list.insertafter(seh_proc,hitem)
|
||
else
|
||
list.insert(seh_proc);
|
||
{ the directive creates another section }
|
||
inc(list.section_count);
|
||
templist:=TAsmList.Create;
|
||
|
||
if not suppress_endprologue then
|
||
begin
|
||
templist.concat(cai_seh_directive.create(ash_endprologue));
|
||
end;
|
||
if assigned(current_procinfo.endprologue_ai) then
|
||
current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
|
||
else
|
||
list.concatlist(templist);
|
||
templist.free;
|
||
|
||
if genloadframeforexcept then
|
||
gen_load_frame_for_exceptfilter(list);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
|
||
begin
|
||
{ nothing to do on Darwin or Linux }
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_restore_registers(list:TAsmList);
|
||
begin
|
||
{ done in g_proc_exit }
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
|
||
var
|
||
ref: treference;
|
||
sr, highestsetsr: tsuperregister;
|
||
pairreg: tregister;
|
||
i,
|
||
regcount: longint;
|
||
aiarr : array of tai;
|
||
begin
|
||
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
|
||
ref.addressmode:=AM_POSTINDEXED;
|
||
regcount:=0;
|
||
{ due to SEH on Win64 we can only load consecutive registers and single
|
||
ones are done using LDR, so we need to handle this differently there }
|
||
if target_info.system=system_aarch64_win64 then
|
||
begin
|
||
setlength(aiarr,highsr-lowsr+1);
|
||
pairreg:=NR_NO;
|
||
for sr:=lowsr to highsr do
|
||
if sr in rg[rt].used_in_proc then
|
||
begin
|
||
if pairreg=NR_NO then
|
||
pairreg:=newreg(rt,sr,sub)
|
||
else
|
||
begin
|
||
if getsupreg(pairreg)=sr-1 then
|
||
begin
|
||
aiarr[regcount]:=taicpu.op_reg_reg_ref(A_LDP,pairreg,newreg(rt,sr,sub),ref);
|
||
inc(regcount);
|
||
pairreg:=NR_NO;
|
||
end
|
||
else
|
||
begin
|
||
aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
|
||
inc(regcount);
|
||
pairreg:=newreg(rt,sr,sub);
|
||
end;
|
||
end;
|
||
end;
|
||
if pairreg<>NR_NO then
|
||
begin
|
||
aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
|
||
inc(regcount);
|
||
pairreg:=NR_NO;
|
||
end;
|
||
for i:=regcount-1 downto 0 do
|
||
list.concat(aiarr[i]);
|
||
end
|
||
else
|
||
begin
|
||
{ highest reg stored twice? }
|
||
highestsetsr:=RS_NO;
|
||
for sr:=lowsr to highsr do
|
||
if sr in rg[rt].used_in_proc then
|
||
begin
|
||
inc(regcount);
|
||
highestsetsr:=sr;
|
||
end;
|
||
if odd(regcount) then
|
||
begin
|
||
list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
|
||
highestsetsr:=pred(highestsetsr);
|
||
end;
|
||
{ load all (other) used registers pairwise }
|
||
pairreg:=NR_NO;
|
||
for sr:=highestsetsr downto lowsr do
|
||
if sr in rg[rt].used_in_proc then
|
||
if pairreg=NR_NO then
|
||
pairreg:=newreg(rt,sr,sub)
|
||
else
|
||
begin
|
||
list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
|
||
pairreg:=NR_NO
|
||
end;
|
||
end;
|
||
{ There can't be any register left }
|
||
if pairreg<>NR_NO then
|
||
internalerror(2014112602);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
|
||
var
|
||
ref: treference;
|
||
regsstored: boolean;
|
||
sr: tsuperregister;
|
||
begin
|
||
if not(nostackframe) and
|
||
{ we do not need an exit stack frame when we never return
|
||
|
||
* the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
|
||
* the entry stack frame must be normally generated because the subroutine could be still left by
|
||
an exception and then the unwinding code might need to restore the registers stored by the entry code
|
||
}
|
||
not(po_noreturn in current_procinfo.procdef.procoptions) then
|
||
begin
|
||
{ if no registers have been stored, we don't have to subtract the
|
||
allocated temp space from the stack pointer }
|
||
regsstored:=false;
|
||
for sr:=RS_X19 to RS_X28 do
|
||
if sr in rg[R_INTREGISTER].used_in_proc then
|
||
begin
|
||
regsstored:=true;
|
||
break;
|
||
end;
|
||
if not regsstored then
|
||
for sr:=RS_D8 to RS_D15 do
|
||
if sr in rg[R_MMREGISTER].used_in_proc then
|
||
begin
|
||
regsstored:=true;
|
||
break;
|
||
end;
|
||
{ restore registers (and stack pointer) }
|
||
if regsstored then
|
||
begin
|
||
if current_procinfo.final_localsize<>0 then
|
||
handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
|
||
load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
|
||
load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
|
||
{ on Windows also restore SP even if the add should be enough
|
||
to have matching exit sequence to the entry sequence }
|
||
if target_info.system=system_aarch64_win64 then
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
||
end
|
||
else if current_procinfo.final_localsize<>0 then
|
||
begin
|
||
{ restore stack pointer }
|
||
{ Note: for Windows we need to restore the stack using an ADD
|
||
and to set FP back to SP }
|
||
if target_info.system=system_aarch64_win64 then
|
||
begin
|
||
handle_reg_imm12_reg(list,A_ADD,OS_ADDR,current_procinfo.framepointer,current_procinfo.final_localsize,
|
||
current_procinfo.framepointer,NR_IP0,false,true);
|
||
if not (pi_no_framepointer_needed in current_procinfo.flags) then
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
||
end
|
||
else if pi_no_framepointer_needed in current_procinfo.flags then
|
||
handle_reg_imm12_reg(list,A_ADD,OS_ADDR,current_procinfo.framepointer,current_procinfo.final_localsize,
|
||
current_procinfo.framepointer,NR_IP0,false,true)
|
||
else
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
|
||
end;
|
||
|
||
if not(pi_no_framepointer_needed in current_procinfo.flags) then
|
||
begin
|
||
{ restore framepointer and return address }
|
||
reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
|
||
ref.addressmode:=AM_POSTINDEXED;
|
||
list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
|
||
end;
|
||
end;
|
||
|
||
{ return }
|
||
list.concat(taicpu.op_none(A_RET));
|
||
if (pi_has_unwind_info in current_procinfo.flags) then
|
||
begin
|
||
tcpuprocinfo(current_procinfo).dump_scopes(list);
|
||
list.concat(cai_seh_directive.create(ash_endproc));
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_save_registers(list : TAsmList);
|
||
begin
|
||
{ done in g_proc_entry }
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
|
||
|
||
var
|
||
sourcebasereplaced, destbasereplaced: boolean;
|
||
|
||
{ get optimal memory operation to use for loading/storing data
|
||
in an unrolled loop }
|
||
procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
|
||
begin
|
||
if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
|
||
(simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
|
||
begin
|
||
memop:=unscaledop;
|
||
needsimplify:=true;
|
||
end
|
||
else if (unscaledop<>A_NONE) and
|
||
(simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
|
||
(simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
|
||
begin
|
||
memop:=unscaledop;
|
||
needsimplify:=false;
|
||
end
|
||
else
|
||
begin
|
||
memop:=scaledop;
|
||
needsimplify:=true;
|
||
end;
|
||
end;
|
||
|
||
{ adjust the offset and/or addressing mode after a load/store so it's
|
||
correct for the next one of the same size }
|
||
procedure updaterefafterloadstore(var ref: treference; oplen: longint);
|
||
begin
|
||
case ref.addressmode of
|
||
AM_OFFSET:
|
||
inc(ref.offset,oplen);
|
||
AM_POSTINDEXED:
|
||
{ base register updated by instruction, next offset can remain
|
||
the same }
|
||
;
|
||
AM_PREINDEXED:
|
||
begin
|
||
{ base register updated by instruction -> next instruction can
|
||
use post-indexing with offset = sizeof(operation) }
|
||
ref.offset:=0;
|
||
ref.addressmode:=AM_OFFSET;
|
||
end;
|
||
end;
|
||
end;
|
||
|
||
{ generate a load/store and adjust the reference offset to the next
|
||
memory location if necessary }
|
||
procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
|
||
begin
|
||
list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
|
||
updaterefafterloadstore(ref,tcgsize2size[opsize]);
|
||
end;
|
||
|
||
{ generate a dual load/store (ldp/stp) and adjust the reference offset to
|
||
the next memory location if necessary }
|
||
procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
|
||
begin
|
||
list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
|
||
updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
|
||
end;
|
||
|
||
{ turn a reference into a pre- or post-indexed reference for use in a
|
||
load/store of a particular size }
|
||
procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
|
||
var
|
||
tmpreg: tregister;
|
||
scaledoffset: longint;
|
||
orgaddressmode: taddressmode;
|
||
begin
|
||
scaledoffset:=tcgsize2size[opsize];
|
||
if scaledop in [A_LDP,A_STP] then
|
||
scaledoffset:=scaledoffset*2;
|
||
{ can we use the reference as post-indexed without changes? }
|
||
if forcepostindexing then
|
||
begin
|
||
orgaddressmode:=ref.addressmode;
|
||
ref.addressmode:=AM_POSTINDEXED;
|
||
if (orgaddressmode=AM_POSTINDEXED) or
|
||
((ref.offset=0) and
|
||
(simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
|
||
begin
|
||
{ just change the post-indexed offset to the access size }
|
||
ref.offset:=scaledoffset;
|
||
{ and replace the base register if that didn't happen yet
|
||
(could be sp or a regvar) }
|
||
if not basereplaced then
|
||
begin
|
||
tmpreg:=getaddressregister(list);
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
|
||
ref.base:=tmpreg;
|
||
basereplaced:=true;
|
||
end;
|
||
exit;
|
||
end;
|
||
ref.addressmode:=orgaddressmode;
|
||
end;
|
||
{$ifdef dummy}
|
||
This could in theory be useful in case you have a concatcopy from
|
||
e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
|
||
very unlikely. Disabled because it still needs fixes, as it
|
||
also generates pre-indexed loads right now at the very end for the
|
||
left-over gencopies
|
||
|
||
{ can we turn it into a pre-indexed reference for free? (after the
|
||
first operation, it will be turned into an offset one) }
|
||
if not forcepostindexing and
|
||
(ref.offset<>0) then
|
||
begin
|
||
orgaddressmode:=ref.addressmode;
|
||
ref.addressmode:=AM_PREINDEXED;
|
||
tmpreg:=ref.base;
|
||
if not basereplaced and
|
||
(ref.base=tmpreg) then
|
||
begin
|
||
tmpreg:=getaddressregister(list);
|
||
a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
|
||
ref.base:=tmpreg;
|
||
basereplaced:=true;
|
||
end;
|
||
if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
|
||
make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
|
||
exit;
|
||
end;
|
||
{$endif dummy}
|
||
if not forcepostindexing then
|
||
begin
|
||
ref.addressmode:=AM_OFFSET;
|
||
make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
|
||
{ this may still cause problems if the final offset is no longer
|
||
a simple ref; it's a bit complicated to pass all information
|
||
through at all places and check that here, so play safe: we
|
||
currently never generate unrolled copies for more than 64
|
||
bytes (32 with non-double-register copies) }
|
||
if ref.index=NR_NO then
|
||
begin
|
||
if ((scaledop in [A_LDP,A_STP]) and
|
||
(ref.offset<((64-8)*tcgsize2size[opsize]))) or
|
||
((scaledop in [A_LDUR,A_STUR]) and
|
||
(ref.offset<(255-8*tcgsize2size[opsize]))) or
|
||
((scaledop in [A_LDR,A_STR]) and
|
||
(ref.offset<((4096-8)*tcgsize2size[opsize]))) then
|
||
exit;
|
||
end;
|
||
end;
|
||
tmpreg:=getaddressregister(list);
|
||
a_loadaddr_ref_reg(list,ref,tmpreg);
|
||
basereplaced:=true;
|
||
if forcepostindexing then
|
||
begin
|
||
reference_reset_base(ref,tmpreg,scaledoffset,ref.temppos,ref.alignment,ref.volatility);
|
||
ref.addressmode:=AM_POSTINDEXED;
|
||
end
|
||
else
|
||
begin
|
||
reference_reset_base(ref,tmpreg,0,ref.temppos,ref.alignment,ref.volatility);
|
||
ref.addressmode:=AM_OFFSET;
|
||
end
|
||
end;
|
||
|
||
{ prepare a reference for use by gencopy. This is done both after the
|
||
unrolled and regular copy loop -> get rid of post-indexing mode, make
|
||
sure ref is valid }
|
||
procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
|
||
var
|
||
simplify: boolean;
|
||
begin
|
||
if ref.addressmode=AM_POSTINDEXED then
|
||
ref.offset:=tcgsize2size[opsize];
|
||
getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
|
||
if simplify then
|
||
begin
|
||
makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
|
||
op:=scaledop;
|
||
end;
|
||
end;
|
||
|
||
{ generate a copy from source to dest of size opsize/postfix }
|
||
procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
|
||
var
|
||
reg: tregister;
|
||
loadop, storeop: tasmop;
|
||
begin
|
||
preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
|
||
preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
|
||
reg:=getintregister(list,opsize);
|
||
genloadstore(list,loadop,reg,source,postfix,opsize);
|
||
genloadstore(list,storeop,reg,dest,postfix,opsize);
|
||
end;
|
||
|
||
|
||
{ copy the leftovers after an unrolled or regular copy loop }
|
||
procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
|
||
begin
|
||
{ stop post-indexing if we did so in the loop, since in that case all
|
||
offsets definitely can be represented now }
|
||
if source.addressmode=AM_POSTINDEXED then
|
||
begin
|
||
source.addressmode:=AM_OFFSET;
|
||
source.offset:=0;
|
||
end;
|
||
if dest.addressmode=AM_POSTINDEXED then
|
||
begin
|
||
dest.addressmode:=AM_OFFSET;
|
||
dest.offset:=0;
|
||
end;
|
||
{ transfer the leftovers }
|
||
if len>=8 then
|
||
begin
|
||
dec(len,8);
|
||
gencopy(list,source,dest,PF_NONE,OS_64);
|
||
end;
|
||
if len>=4 then
|
||
begin
|
||
dec(len,4);
|
||
gencopy(list,source,dest,PF_NONE,OS_32);
|
||
end;
|
||
if len>=2 then
|
||
begin
|
||
dec(len,2);
|
||
gencopy(list,source,dest,PF_H,OS_16);
|
||
end;
|
||
if len>=1 then
|
||
begin
|
||
dec(len);
|
||
gencopy(list,source,dest,PF_B,OS_8);
|
||
end;
|
||
end;
|
||
|
||
|
||
const
|
||
{ load_length + loop dec + cbnz }
|
||
loopoverhead=12;
|
||
{ loop overhead + load + store }
|
||
totallooplen=loopoverhead + 8;
|
||
var
|
||
totalalign: longint;
|
||
maxlenunrolled: tcgint;
|
||
loadop, storeop: tasmop;
|
||
opsize: tcgsize;
|
||
postfix: toppostfix;
|
||
tmpsource, tmpdest: treference;
|
||
scaledstoreop, unscaledstoreop,
|
||
scaledloadop, unscaledloadop: tasmop;
|
||
regs: array[1..8] of tregister;
|
||
countreg: tregister;
|
||
i, regcount: longint;
|
||
hl: tasmlabel;
|
||
simplifysource, simplifydest: boolean;
|
||
begin
|
||
if len=0 then
|
||
exit;
|
||
sourcebasereplaced:=false;
|
||
destbasereplaced:=false;
|
||
{ maximum common alignment }
|
||
totalalign:=max(1,newalignment(source.alignment,dest.alignment));
|
||
{ use a simple load/store? }
|
||
if (len in [1,2,4,8]) and
|
||
((totalalign>=(len div 2)) or
|
||
(source.alignment=len) or
|
||
(dest.alignment=len)) then
|
||
begin
|
||
opsize:=int_cgsize(len);
|
||
a_load_ref_ref(list,opsize,opsize,source,dest);
|
||
exit;
|
||
end;
|
||
|
||
{ alignment > length is not useful, and would break some checks below }
|
||
while totalalign>len do
|
||
totalalign:=totalalign div 2;
|
||
|
||
{ operation sizes to use based on common alignment }
|
||
case totalalign of
|
||
1:
|
||
begin
|
||
postfix:=PF_B;
|
||
opsize:=OS_8;
|
||
end;
|
||
2:
|
||
begin
|
||
postfix:=PF_H;
|
||
opsize:=OS_16;
|
||
end;
|
||
4:
|
||
begin
|
||
postfix:=PF_None;
|
||
opsize:=OS_32;
|
||
end
|
||
else
|
||
begin
|
||
totalalign:=8;
|
||
postfix:=PF_None;
|
||
opsize:=OS_64;
|
||
end;
|
||
end;
|
||
{ maximum length to handled with an unrolled loop (4 loads + 4 stores) }
|
||
maxlenunrolled:=min(totalalign,8)*4;
|
||
{ ldp/stp -> 2 registers per instruction }
|
||
if (totalalign>=4) and
|
||
(len>=totalalign*2) then
|
||
begin
|
||
maxlenunrolled:=maxlenunrolled*2;
|
||
scaledstoreop:=A_STP;
|
||
scaledloadop:=A_LDP;
|
||
unscaledstoreop:=A_NONE;
|
||
unscaledloadop:=A_NONE;
|
||
end
|
||
else
|
||
begin
|
||
scaledstoreop:=A_STR;
|
||
scaledloadop:=A_LDR;
|
||
unscaledstoreop:=A_STUR;
|
||
unscaledloadop:=A_LDUR;
|
||
end;
|
||
{ we only need 4 instructions extra to call FPC_MOVE }
|
||
if cs_opt_size in current_settings.optimizerswitches then
|
||
maxlenunrolled:=maxlenunrolled div 2;
|
||
if (len>maxlenunrolled) and
|
||
(len>totalalign*8) and
|
||
(pi_do_call in current_procinfo.flags) then
|
||
begin
|
||
g_concatcopy_move(list,source,dest,len);
|
||
exit;
|
||
end;
|
||
|
||
simplifysource:=true;
|
||
simplifydest:=true;
|
||
tmpsource:=source;
|
||
tmpdest:=dest;
|
||
{ can we directly encode all offsets in an unrolled loop? }
|
||
if len<=maxlenunrolled then
|
||
begin
|
||
{$ifdef extdebug}
|
||
list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
|
||
{$endif extdebug}
|
||
{ the leftovers will be handled separately -> -(len mod opsize) }
|
||
inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
|
||
{ additionally, the last regular load/store will be at
|
||
offset+len-opsize (if len-(len mod opsize)>len) }
|
||
if tmpsource.offset>source.offset then
|
||
dec(tmpsource.offset,tcgsize2size[opsize]);
|
||
getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
|
||
inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
|
||
if tmpdest.offset>dest.offset then
|
||
dec(tmpdest.offset,tcgsize2size[opsize]);
|
||
getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
|
||
tmpsource:=source;
|
||
tmpdest:=dest;
|
||
{ if we can't directly encode all offsets, simplify }
|
||
if simplifysource then
|
||
begin
|
||
loadop:=scaledloadop;
|
||
makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
|
||
end;
|
||
if simplifydest then
|
||
begin
|
||
storeop:=scaledstoreop;
|
||
makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
|
||
end;
|
||
regcount:=len div tcgsize2size[opsize];
|
||
{ in case we transfer two registers at a time, we copy an even
|
||
number of registers }
|
||
if loadop=A_LDP then
|
||
regcount:=regcount and not(1);
|
||
{ initialise for dfa }
|
||
regs[low(regs)]:=NR_NO;
|
||
{ max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
|
||
for i:=1 to regcount do
|
||
regs[i]:=getintregister(list,opsize);
|
||
if loadop=A_LDP then
|
||
begin
|
||
{ load registers }
|
||
for i:=1 to (regcount div 2) do
|
||
gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
|
||
{ store registers }
|
||
for i:=1 to (regcount div 2) do
|
||
gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
|
||
end
|
||
else
|
||
begin
|
||
for i:=1 to regcount do
|
||
genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
|
||
for i:=1 to regcount do
|
||
genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
|
||
end;
|
||
{ leftover }
|
||
len:=len-regcount*tcgsize2size[opsize];
|
||
{$ifdef extdebug}
|
||
list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
|
||
{$endif extdebug}
|
||
end
|
||
else
|
||
begin
|
||
{$ifdef extdebug}
|
||
list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
|
||
{$endif extdebug}
|
||
{ regular loop -> definitely use post-indexing }
|
||
loadop:=scaledloadop;
|
||
makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
|
||
storeop:=scaledstoreop;
|
||
makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
|
||
current_asmdata.getjumplabel(hl);
|
||
countreg:=getintregister(list,OS_32);
|
||
if loadop=A_LDP then
|
||
a_load_const_reg(list,OS_32,len div (tcgsize2size[opsize]*2),countreg)
|
||
else
|
||
a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
|
||
a_label(list,hl);
|
||
a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
|
||
if loadop=A_LDP then
|
||
begin
|
||
regs[1]:=getintregister(list,opsize);
|
||
regs[2]:=getintregister(list,opsize);
|
||
gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
|
||
gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
|
||
end
|
||
else
|
||
begin
|
||
regs[1]:=getintregister(list,opsize);
|
||
genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
|
||
genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
|
||
end;
|
||
list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
|
||
if loadop=A_LDP then
|
||
len:=len mod (tcgsize2size[opsize]*2)
|
||
else
|
||
len:=len mod tcgsize2size[opsize];
|
||
end;
|
||
gencopyleftovers(list,tmpsource,tmpdest,len);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
|
||
begin
|
||
{ This method is integrated into g_intf_wrapper and shouldn't be called separately }
|
||
InternalError(2013020102);
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
|
||
var
|
||
r, tmpreg: TRegister;
|
||
ai: taicpu;
|
||
l1,l2: TAsmLabel;
|
||
begin
|
||
{ so far, we assume all flavours of AArch64 need explicit floating point exception checking }
|
||
if ((cs_check_fpu_exceptions in current_settings.localswitches) and
|
||
(force or current_procinfo.FPUExceptionCheckNeeded)) then
|
||
begin
|
||
r:=getintregister(list,OS_INT);
|
||
tmpreg:=getintregister(list,OS_INT);
|
||
list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
|
||
list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$1f));
|
||
current_asmdata.getjumplabel(l1);
|
||
current_asmdata.getjumplabel(l2);
|
||
ai:=taicpu.op_reg_sym_ofs(A_CBNZ,tmpreg,l1,0);
|
||
ai.is_jmp:=true;
|
||
list.concat(ai);
|
||
list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$80));
|
||
ai:=taicpu.op_reg_sym_ofs(A_CBZ,tmpreg,l2,0);
|
||
ai.is_jmp:=true;
|
||
list.concat(ai);
|
||
a_label(list,l1);
|
||
alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
|
||
cg.a_call_name(list,'FPC_THROWFPUEXCEPTION',false);
|
||
dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
|
||
a_label(list,l2);
|
||
if clear then
|
||
current_procinfo.FPUExceptionCheckNeeded:=false;
|
||
end;
|
||
end;
|
||
|
||
|
||
procedure tcgaarch64.g_profilecode(list : TAsmList);
|
||
begin
|
||
if target_info.system = system_aarch64_linux then
|
||
begin
|
||
list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
|
||
a_call_name(list,'_mcount',false);
|
||
end
|
||
else
|
||
internalerror(2020021901);
|
||
end;
|
||
|
||
|
||
procedure create_codegen;
|
||
begin
|
||
cg:=tcgaarch64.Create;
|
||
cg128:=tcg128.Create;
|
||
end;
|
||
|
||
end.
|