mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-10-27 02:51:40 +01:00
In r21686 I've introduced optimized 64bit shifts for ARM. But the methods did not check for which machine it has to generate the code. This patch disables the optimized code for now if the target is in cpu_thumb2 and falls back to the generic code. There are 2 problems with the current code: 1.) Thumb-2 does not support shift by register on all data instruction as ARM does. 2.) The code does not generate the required IT-block for the conditional executed code. git-svn-id: trunk@21997 -
505 lines
20 KiB
ObjectPascal
505 lines
20 KiB
ObjectPascal
{
|
|
Copyright (c) 1998-2002 by Florian Klaempfl
|
|
|
|
Generate ARM assembler for math nodes
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
****************************************************************************
|
|
}
|
|
unit narmmat;
|
|
|
|
{$i fpcdefs.inc}
|
|
|
|
interface
|
|
|
|
uses
|
|
node,nmat,ncgmat;
|
|
|
|
type
|
|
tarmmoddivnode = class(tmoddivnode)
|
|
function first_moddivint: tnode;override;
|
|
procedure pass_generate_code;override;
|
|
end;
|
|
|
|
tarmnotnode = class(tcgnotnode)
|
|
procedure second_boolean;override;
|
|
end;
|
|
|
|
tarmunaryminusnode = class(tcgunaryminusnode)
|
|
procedure second_float;override;
|
|
end;
|
|
|
|
tarmshlshrnode = class(tcgshlshrnode)
|
|
procedure second_64bit;override;
|
|
function first_shlshr64bitint: tnode; override;
|
|
end;
|
|
|
|
implementation
|
|
|
|
uses
|
|
globtype,systems,
|
|
cutils,verbose,globals,constexp,
|
|
aasmbase,aasmcpu,aasmtai,aasmdata,
|
|
defutil,
|
|
cgbase,cgobj,hlcgobj,cgutils,
|
|
pass_2,procinfo,
|
|
ncon,
|
|
cpubase,cpuinfo,
|
|
ncgutil,cgcpu,
|
|
nadd,pass_1,symdef;
|
|
|
|
{*****************************************************************************
|
|
TARMMODDIVNODE
|
|
*****************************************************************************}
|
|
|
|
function tarmmoddivnode.first_moddivint: tnode;
|
|
var
|
|
power : longint;
|
|
begin
|
|
if (right.nodetype=ordconstn) and
|
|
(nodetype=divn) and
|
|
(ispowerof2(tordconstnode(right).value,power) or
|
|
(tordconstnode(right).value=1) or
|
|
(tordconstnode(right).value=int64(-1))
|
|
) and
|
|
not(is_64bitint(resultdef)) then
|
|
result:=nil
|
|
else if (current_settings.cputype in [cpu_armv7m]) and
|
|
(nodetype=divn) and
|
|
not(is_64bitint(resultdef)) then
|
|
result:=nil
|
|
else if (current_settings.cputype in [cpu_armv7m]) and
|
|
(nodetype=modn) and
|
|
not(is_64bitint(resultdef)) then
|
|
begin
|
|
if (right.nodetype=ordconstn) and
|
|
ispowerof2(tordconstnode(right).value,power) and
|
|
(tordconstnode(right).value<=256) and
|
|
(tordconstnode(right).value>0) then
|
|
result:=caddnode.create(andn,left,cordconstnode.create(tordconstnode(right).value-1,sinttype,false))
|
|
else
|
|
begin
|
|
result:=caddnode.create(subn,left,caddnode.create(muln,right.getcopy, cmoddivnode.Create(divn,left.getcopy,right.getcopy)));
|
|
right:=nil;
|
|
end;
|
|
left:=nil;
|
|
end
|
|
else
|
|
result:=inherited first_moddivint;
|
|
end;
|
|
|
|
|
|
procedure tarmmoddivnode.pass_generate_code;
|
|
var
|
|
power : longint;
|
|
numerator,
|
|
helper1,
|
|
helper2,
|
|
resultreg : tregister;
|
|
size : Tcgsize;
|
|
so : tshifterop;
|
|
|
|
procedure genOrdConstNodeDiv;
|
|
begin
|
|
if tordconstnode(right).value=0 then
|
|
internalerror(2005061701)
|
|
else if tordconstnode(right).value=1 then
|
|
cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, numerator, resultreg)
|
|
else if (tordconstnode(right).value = int64(-1)) then
|
|
begin
|
|
// note: only in the signed case possible..., may overflow
|
|
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,
|
|
resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
|
|
end
|
|
else if ispowerof2(tordconstnode(right).value,power) then
|
|
begin
|
|
if (is_signed(right.resultdef)) then
|
|
begin
|
|
helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
shifterop_reset(so);
|
|
so.shiftmode:=SM_ASR;
|
|
so.shiftimm:=31;
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_shifterop(A_MOV,helper1,numerator,so));
|
|
shifterop_reset(so);
|
|
so.shiftmode:=SM_LSR;
|
|
so.shiftimm:=32-power;
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
|
|
shifterop_reset(so);
|
|
so.shiftmode:=SM_ASR;
|
|
so.shiftimm:=power;
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_shifterop(A_MOV,resultreg,helper2,so));
|
|
end
|
|
else
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
|
|
end;
|
|
end;
|
|
|
|
{
|
|
procedure genOrdConstNodeMod;
|
|
var
|
|
modreg, maskreg, tempreg : tregister;
|
|
begin
|
|
if (tordconstnode(right).value = 0) then begin
|
|
internalerror(2005061702);
|
|
end
|
|
else if (abs(tordconstnode(right).value.svalue) = 1) then
|
|
begin
|
|
// x mod +/-1 is always zero
|
|
cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, 0, resultreg);
|
|
end
|
|
else if (ispowerof2(tordconstnode(right).value, power)) then
|
|
begin
|
|
if (is_signed(right.resultdef)) then begin
|
|
|
|
tempreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
maskreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
modreg := cg.getintregister(current_asmdata.CurrAsmList, OS_INT);
|
|
|
|
cg.a_load_const_reg(current_asmdata.CurrAsmList, OS_INT, abs(tordconstnode(right).value.svalue)-1, modreg);
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SAR, OS_INT, 31, numerator, maskreg);
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, numerator, modreg, tempreg);
|
|
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ANDC, maskreg, maskreg, modreg));
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_SUBFIC, modreg, tempreg, 0));
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUBFE, modreg, modreg, modreg));
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, modreg, maskreg, maskreg);
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_INT, maskreg, tempreg, resultreg);
|
|
end else begin
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_AND, OS_INT, tordconstnode(right).value.svalue-1, numerator, resultreg);
|
|
end;
|
|
end else begin
|
|
genOrdConstNodeDiv();
|
|
cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_MUL, OS_INT, tordconstnode(right).value.svalue, resultreg, resultreg);
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_SUB, OS_INT, resultreg, numerator, resultreg);
|
|
end;
|
|
end;
|
|
}
|
|
|
|
begin
|
|
secondpass(left);
|
|
secondpass(right);
|
|
|
|
if (current_settings.cputype in [cpu_armv7m]) and
|
|
(nodetype=divn) and
|
|
not(is_64bitint(resultdef)) then
|
|
begin
|
|
size:=def_cgsize(left.resultdef);
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
|
|
|
|
location_copy(location,left.location);
|
|
location.loc := LOC_REGISTER;
|
|
location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
|
|
resultreg:=location.register;
|
|
|
|
if (right.nodetype=ordconstn) and
|
|
((tordconstnode(right).value=1) or
|
|
(tordconstnode(right).value=int64(-1)) or
|
|
(tordconstnode(right).value=0) or
|
|
ispowerof2(tordconstnode(right).value,power)) then
|
|
begin
|
|
numerator:=left.location.register;
|
|
|
|
genOrdConstNodeDiv;
|
|
end
|
|
else
|
|
begin
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
|
|
|
|
if is_signed(left.resultdef) or
|
|
is_signed(right.resultdef) then
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_IDIV,OS_INT,right.location.register,left.location.register,location.register)
|
|
else
|
|
cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_DIV,OS_INT,right.location.register,left.location.register,location.register);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
location_copy(location,left.location);
|
|
|
|
{ put numerator in register }
|
|
size:=def_cgsize(left.resultdef);
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
|
|
left.resultdef,left.resultdef,true);
|
|
location_copy(location,left.location);
|
|
numerator:=location.register;
|
|
resultreg:=location.register;
|
|
if location.loc=LOC_CREGISTER then
|
|
begin
|
|
location.loc := LOC_REGISTER;
|
|
location.register := cg.getintregister(current_asmdata.CurrAsmList,size);
|
|
resultreg:=location.register;
|
|
end
|
|
else if (nodetype=modn) or (right.nodetype=ordconstn) then
|
|
begin
|
|
// for a modulus op, and for const nodes we need the result register
|
|
// to be an extra register
|
|
resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
|
|
end;
|
|
|
|
if right.nodetype=ordconstn then
|
|
begin
|
|
if nodetype=divn then
|
|
genOrdConstNodeDiv
|
|
else
|
|
// genOrdConstNodeMod;
|
|
end;
|
|
|
|
location.register:=resultreg;
|
|
end;
|
|
|
|
{ unsigned division/module can only overflow in case of division by zero }
|
|
{ (but checking this overflow flag is more convoluted than performing a }
|
|
{ simple comparison with 0) }
|
|
if is_signed(right.resultdef) then
|
|
cg.g_overflowcheck(current_asmdata.CurrAsmList,location,resultdef);
|
|
end;
|
|
|
|
{*****************************************************************************
|
|
TARMNOTNODE
|
|
*****************************************************************************}
|
|
|
|
procedure tarmnotnode.second_boolean;
|
|
var
|
|
hl : tasmlabel;
|
|
begin
|
|
{ if the location is LOC_JUMP, we do the secondpass after the
|
|
labels are allocated
|
|
}
|
|
if left.expectloc=LOC_JUMP then
|
|
begin
|
|
hl:=current_procinfo.CurrTrueLabel;
|
|
current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
|
|
current_procinfo.CurrFalseLabel:=hl;
|
|
secondpass(left);
|
|
maketojumpbool(current_asmdata.CurrAsmList,left,lr_load_regvars);
|
|
hl:=current_procinfo.CurrTrueLabel;
|
|
current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
|
|
current_procinfo.CurrFalseLabel:=hl;
|
|
location.loc:=LOC_JUMP;
|
|
end
|
|
else
|
|
begin
|
|
secondpass(left);
|
|
case left.location.loc of
|
|
LOC_FLAGS :
|
|
begin
|
|
location_copy(location,left.location);
|
|
inverse_flags(location.resflags);
|
|
end;
|
|
LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE,
|
|
LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF :
|
|
begin
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
|
|
location_reset(location,LOC_FLAGS,OS_NO);
|
|
location.resflags:=F_EQ;
|
|
end;
|
|
else
|
|
internalerror(2003042401);
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{*****************************************************************************
|
|
TARMUNARYMINUSNODE
|
|
*****************************************************************************}
|
|
|
|
procedure tarmunaryminusnode.second_float;
|
|
var
|
|
op: tasmop;
|
|
begin
|
|
secondpass(left);
|
|
case current_settings.fputype of
|
|
fpu_fpa,
|
|
fpu_fpa10,
|
|
fpu_fpa11:
|
|
begin
|
|
location_force_fpureg(current_asmdata.CurrAsmList,left.location,false);
|
|
location:=left.location;
|
|
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSF,
|
|
location.register,left.location.register,0),
|
|
cgsize2fpuoppostfix[def_cgsize(resultdef)]));
|
|
end;
|
|
fpu_vfpv2,
|
|
fpu_vfpv3,
|
|
fpu_vfpv3_d16:
|
|
begin
|
|
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
|
|
location:=left.location;
|
|
if (left.location.loc=LOC_CMMREGISTER) then
|
|
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
|
|
if (location.size=OS_F32) then
|
|
op:=A_FNEGS
|
|
else
|
|
op:=A_FNEGD;
|
|
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
|
|
location.register,left.location.register));
|
|
end;
|
|
else
|
|
internalerror(2009112602);
|
|
end;
|
|
end;
|
|
|
|
function tarmshlshrnode.first_shlshr64bitint: tnode;
|
|
begin
|
|
if (current_settings.cputype in cpu_thumb2) then
|
|
result:=inherited
|
|
else
|
|
result := nil;
|
|
end;
|
|
|
|
procedure tarmshlshrnode.second_64bit;
|
|
var
|
|
hreg64hi,hreg64lo,shiftreg:Tregister;
|
|
v : TConstExprInt;
|
|
l1,l2,l3:Tasmlabel;
|
|
so: tshifterop;
|
|
|
|
procedure emit_instr(p: tai);
|
|
begin
|
|
current_asmdata.CurrAsmList.concat(p);
|
|
end;
|
|
|
|
{Reg1 gets shifted and moved into reg2, and is set to zero afterwards}
|
|
procedure shift_more_than_32(reg1, reg2: TRegister; shiftval: Byte ; sm: TShiftMode);
|
|
begin
|
|
shifterop_reset(so); so.shiftimm:=shiftval - 32; so.shiftmode:=sm;
|
|
emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg1, so));
|
|
emit_instr(taicpu.op_reg_const(A_MOV, reg1, 0));
|
|
end;
|
|
|
|
procedure shift_less_than_32(reg1, reg2: TRegister; shiftval: Byte; shiftright: boolean);
|
|
begin
|
|
shifterop_reset(so); so.shiftimm:=shiftval;
|
|
if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
|
|
emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg1, reg1, so));
|
|
|
|
if shiftright then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
|
|
so.shiftimm:=32-shiftval;
|
|
emit_instr(taicpu.op_reg_reg_reg_shifterop(A_ORR, reg1, reg1, reg2, so));
|
|
|
|
if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
|
|
so.shiftimm:=shiftval;
|
|
emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg2, so));
|
|
end;
|
|
|
|
procedure shift_by_variable(reg1, reg2, shiftval: TRegister; shiftright: boolean);
|
|
var
|
|
shiftval2:TRegister;
|
|
begin
|
|
shifterop_reset(so);
|
|
shiftval2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
|
|
{Do we shift more than 32 bits?}
|
|
emit_instr(setoppostfix(taicpu.op_reg_reg_const(A_RSB, shiftval2, shiftval, 32), PF_S));
|
|
|
|
{This part cares for 32 bits and more}
|
|
emit_instr(setcondition(taicpu.op_reg_reg_const(A_SUB, shiftval2, shiftval, 32), C_MI));
|
|
if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
|
|
so.rs:=shiftval2;
|
|
emit_instr(setcondition(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg1, so), C_MI));
|
|
|
|
{Less than 32 bits}
|
|
so.rs:=shiftval;
|
|
emit_instr(setcondition(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg2, so), C_PL));
|
|
if shiftright then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
|
|
so.rs:=shiftval2;
|
|
emit_instr(setcondition(taicpu.op_reg_reg_reg_shifterop(A_ORR, reg2, reg2, reg1, so), C_PL));
|
|
|
|
{Final adjustments}
|
|
if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
|
|
so.rs:=shiftval;
|
|
emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg1, reg1, so));
|
|
end;
|
|
|
|
begin
|
|
if (current_settings.cputype in cpu_thumb2) then
|
|
begin
|
|
inherited;
|
|
exit;
|
|
end;
|
|
|
|
location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
|
|
|
|
{ load left operator in a register }
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
|
|
hreg64hi:=left.location.register64.reghi;
|
|
hreg64lo:=left.location.register64.reglo;
|
|
location.register64.reghi:=hreg64hi;
|
|
location.register64.reglo:=hreg64lo;
|
|
|
|
{ shifting by a constant directly coded: }
|
|
if (right.nodetype=ordconstn) then
|
|
begin
|
|
v:=Tordconstnode(right).value and 63;
|
|
{Single bit shift}
|
|
if v = 1 then
|
|
if nodetype=shln then
|
|
begin
|
|
{Shift left by one by 2 simple 32bit additions}
|
|
emit_instr(setoppostfix(taicpu.op_reg_reg_reg(A_ADD, hreg64lo, hreg64lo, hreg64lo), PF_S));
|
|
emit_instr(taicpu.op_reg_reg_reg(A_ADC, hreg64hi, hreg64hi, hreg64hi));
|
|
end
|
|
else
|
|
begin
|
|
{Shift right by first shifting hi by one and then using RRX (rotate right extended), which rotates through the carry}
|
|
shifterop_reset(so); so.shiftmode:=SM_LSR; so.shiftimm:=1;
|
|
emit_instr(setoppostfix(taicpu.op_reg_reg_shifterop(A_MOV, hreg64hi, hreg64hi, so), PF_S));
|
|
so.shiftmode:=SM_RRX; so.shiftimm:=0; {RRX does NOT have a shift amount}
|
|
emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, hreg64lo, hreg64lo, so));
|
|
end
|
|
{A 32bit shift just replaces a register and clears the other}
|
|
else if v = 32 then
|
|
begin
|
|
if nodetype=shln then
|
|
emit_instr(taicpu.op_reg_const(A_MOV, hreg64hi, 0))
|
|
else
|
|
emit_instr(taicpu.op_reg_const(A_MOV, hreg64lo, 0));
|
|
location.register64.reghi:=hreg64lo;
|
|
location.register64.reglo:=hreg64hi;
|
|
end
|
|
{Shift LESS than 32}
|
|
else if (v < 32) and (v > 1) then
|
|
if nodetype=shln then
|
|
shift_less_than_32(hreg64hi, hreg64lo, v.uvalue, false)
|
|
else
|
|
shift_less_than_32(hreg64lo, hreg64hi, v.uvalue, true)
|
|
{More than 32}
|
|
else if v > 32 then
|
|
if nodetype=shln then
|
|
shift_more_than_32(hreg64lo, hreg64hi, v.uvalue, SM_LSL)
|
|
else
|
|
shift_more_than_32(hreg64hi, hreg64lo, v.uvalue, SM_LSR);
|
|
end
|
|
else
|
|
begin
|
|
{ force right operators in a register }
|
|
hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,resultdef,false);
|
|
if nodetype = shln then
|
|
shift_by_variable(hreg64lo,hreg64hi,right.location.register, false)
|
|
else
|
|
shift_by_variable(hreg64hi,hreg64lo,right.location.register, true);
|
|
end;
|
|
end;
|
|
|
|
|
|
begin
|
|
cmoddivnode:=tarmmoddivnode;
|
|
cnotnode:=tarmnotnode;
|
|
cunaryminusnode:=tarmunaryminusnode;
|
|
cshlshrnode:=tarmshlshrnode;
|
|
end.
|