fpc/compiler/powerpc/nppcmat.pas
2002-01-03 14:57:52 +00:00

625 lines
24 KiB
ObjectPascal

{
$Id$
Copyright (c) 1998-2000 by Florian Klaempfl
Generate PowerPC assembler for math nodes
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
unit nppcmat;
{$i defines.inc}
interface
uses
node,nmat;
type
tppcmoddivnode = class(tmoddivnode)
procedure pass_2;override;
end;
tppcshlshrnode = class(tshlshrnode)
procedure pass_2;override;
end;
tppcunaryminusnode = class(tunaryminusnode)
procedure pass_2;override;
end;
tppcnotnode = class(tnotnode)
procedure pass_2;override;
end;
implementation
uses
globtype,systems,
cutils,verbose,globals,
symconst,symdef,aasm,types,
cgbase,cgobj,temp_gen,pass_1,pass_2,
ncon,
cpubase,
cga,tgcpu,nppcutil,cgcpu,cg64f32;
{*****************************************************************************
TPPCMODDIVNODE
*****************************************************************************}
procedure tppcmoddivnode.pass_2;
const
{ signed overflow }
divops: array[boolean, boolean] of tasmop =
((A_DIVWU,A_DIVWUO),(A_DIVW,A_DIVWO));
var
power,
l1, l2 : longint;
op : tasmop;
numerator,
divider,
resultreg : tregister;
saved : boolean;
begin
secondpass(left);
saved:=maybe_savetotemp(right.registers32,left,is_64bitint(left.resulttype.def));
secondpass(right);
if saved then
restorefromtemp(left,is_64bitint(left.resulttype.def));
set_location(location,left.location);
resultreg := R_NO;
{ put numerator in register }
if (left.location.loc in [LOC_REFERENCE,LOC_MEM]) then
begin
del_reference(left.location.reference);
numerator := getregisterint;
{ OS_32 because everything is always converted to longint/ }
{ cardinal in the resulttype pass (JM) }
cg.a_load_ref_reg(expraslist,OS_32,left.location.reference,
numerator);
resultreg := numerator;
end
else
begin
numerator := left.location.register;
if left.location.loc = LOC_CREGISTER then
resultreg := getregisterint
else
resultreg := numerator;
end;
if (nodetype = divn) and
(right.nodetype = ordconstn) and
ispowerof2(tordconstnode(right).value,power) then
begin
{ From 'The PowerPC Compiler Writer's Guide": }
{ This code uses the fact that, in the PowerPC architecture, }
{ the shift right algebraic instructions set the Carry bit if }
{ the source register contains a negative number and one or }
{ more 1-bits are shifted out. Otherwise, the carry bit is }
{ cleared. The addze instruction corrects the quotient, if }
{ necessary, when the dividend is negative. For example, if }
{ n = -13, (0xFFFF_FFF3), and k = 2, after executing the srawi }
{ instruction, q = -4 (0xFFFF_FFFC) and CA = 1. After executing }
{ the addze instruction, q = -3, the correct quotient. }
cg.a_op_const_reg_reg(list,OP_SAR,OS_32,aword(power),numerator,resultreg);
exprasmlist.concat(taicpu.op_reg_reg(A_ADDZE,resultreg,resultreg));
end
else
begin
{ load divider in a register if necessary }
case right.location.loc of
LOC_CREGISTER, LOC_REGISTER:
divider := right.location.register;
LOC_REFERENCE, LOC_MEM:
begin
divider := cg.get_scratch_reg(exprasmlist);
cg.a_load_ref_reg(exprasmlist,OS_32,
right.location.reference,divider);
del_reference(right.location.reference);
end;
end;
{ needs overflow checking, (-maxlongint-1) div (-1) overflows! }
{ And on PPC, the only way to catch a div-by-0 is by checking }
{ the overflow flag (JM) }
op := divops[is_signed(right.resulttype.def),
cs_check_overflow in aktlocalswitches];
exprasmlist(taicpu.op_reg_reg_reg(op,resultreg,numerator,
divider))
end;
{ free used registers }
if right.location.loc in [LOC_REFERENCE,LOC_MEM] then
cg.free_scratch_reg(exprasmlist,divider)
else
ungetregister(divider);
if numerator <> resultreg then
ungetregisterint(numerator);
{ set result location }
location.loc:=LOC_REGISTER;
location.register:=resultreg;
emitoverflowcheck(self);
end;
{*****************************************************************************
TPPCSHLRSHRNODE
*****************************************************************************}
procedure tppcshlshrnode.pass_2;
var
resultreg, hregister1,hregister2,
hregisterhigh,hregisterlow : tregister;
op : topcg;
asmop1, asmop2: tasmop;
shiftval: aword;
saved : boolean;
begin
secondpass(left);
saved:=maybe_savetotemp(right.registers32,left,is_64bitint(left.resulttype.def));
secondpass(right);
if saved then
restorefromtemp(left,is_64bitint(left.resulttype.def));
if is_64bitint(left.resulttype.def) then
begin
case left.location.loc of
LOC_REGISTER, LOC_CREGISTER:
begin
hregisterhigh := left.location.registerhigh;
hregisterlow := left.location.registerlow;
if left.location.loc = LOC_REGISTER then
begin
location.registerhigh := hregisterhigh;
location.registerlow := hregisterlow
end
else
begin
location.registerhigh := getregisterint;
location.registerlow := getregisterint;
end;
end;
LOC_REFERENCE,LOC_MEM:
begin
{ !!!!!!!! not good, registers are release too soon this way !!!! (JM) }
del_reference(left.location.reference);
hregisterhigh := getregisterint;
location.registerhigh := hregisterhigh;
hregisterlow := getregisterint;
location.registerlow := hregisterlow;
tcg64f32(cg).a_load64_ref_reg(list,left.location.reference,
hregisterlow,hregisterhigh);
end;
end;
if (right.nodetype = ordconstn) then
begin
if tordconstnode(right).value > 31 then
begin
if nodetype = shln then
begin
if (value and 31) <> 0 then
cg.a_op_const_reg_reg(exprasmlist,OP_SHL,OS_32,value and 31,
hregisterlow,location.registerhigh)
cg.a_load_const_reg(exprasmlist,OS_32,0,location.registerlow);
end
else
begin
if (value and 31) <> 0 then
cg.a_op_const_reg_reg(exprasmlist,OP_SHR,OS_32,value and 31,
hregisterhigh,location.registerlow);
cg.a_load_const_reg(exprasmlist,OS_32,0,location.registerhigh);
end;
end
else
begin
shiftval := aword(tordconstnode(right).value;
if nodetype = shln then
begin
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWINM,location.registerhigh,hregisterhigh,shiftval,
0,31-shiftval));
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWIMI,location.registerhigh,hregisterlow,shiftval,
32-shiftval,31));
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWINM,location.registerlow,hregisterlow,shiftval,
0,31-shiftval));
end
else
begin
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWINM,location.registerlow,hregisterlow,32-shiftval,
shiftval,31));
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWIMI,location.registerlow,hregisterhigh,32-shiftval,
0,shiftval-1));
exprasmlist.concat(taicpu.op_reg_reg_const_const_const(
A_RLWINM,location.registerhigh,hregisterhigh,32-shiftval,
shiftval,31));
end;
end;
end
else
{ no constant shiftcount }
begin
case right.location.loc of
LOC_REGISTER,LOC_CREGISTER:
begin
hregister1 := right.location.register;
end;
LOC_REFERENCE,LOC_MEM:
begin
hregister1 := get_scratch_reg(exprasmlist);
cg.a_load_ref_reg(exprasmlist,OS_S32,
right.location.reference,hregister1);
end;
end;
if nodetype = shln then
begin
asmop1 := A_SLW;
asmop2 := A_SRW;
end
else
begin
asmop1 := A_SRW;
asmop2 := A_SLW;
resultreg := location.registerhigh;
location.registerhigh := location.registerlow;
location.registerlow := resultreg;
end;
getexplicitregisterint(R_0);
exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBFIC,
R_0,hregister1,32));
exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
location.registerhigh,hregisterhigh,hregister1));
exprasmlist.concat(taicpu.op_reg_reg_reg(asmop2,
R_0,hregisterlow,R_0));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_OR,
location.registerhigh,location.registerhigh,R_0));
exprasmlist.concat(taicpu.op_reg_reg_const(A_SUBI,
R_0,hregister1,32));
exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
R_0,hregisterlow,R_0));
exprasmlist.concat(taicpu.op_reg_reg_reg(A_OR,
location.registerhigh,location.registerhigh,R_0));
exprasmlist.concat(taicpu.op_reg_reg_reg(asmop1,
location.registerlow,hregisterlow,hregister1));
ungetregister(R_0);
if right.location.loc in [LOC_MEM,LOC_REFERENCE] then
free_scratch_reg(exprasmlist,hregister1)
else
ungetregister(hregister1);
end
end
else
begin
{ load left operators in a register }
if (left.location.loc in [LOC_REFERENCE,LOC_MEM]) then
begin
del_reference(left.location.reference);
hregister1 := getregisterint;
{ OS_32 because everything is always converted to longint/ }
{ cardinal in the resulttype pass (JM) }
cg.a_load_ref_reg(expraslist,OS_32,left.location.reference,
hregister1);
resultreg := hregister1;
end
else
begin
hregister1 := left.location.register;
if left.location.loc = LOC_CREGISTER then
resultreg := getregisterint
else
resultreg := hregister1;
end;
{ determine operator }
if nodetype=shln then
op:=OP_SHL
else
op:=OP_SHR;
{ shifting by a constant directly coded: }
if (right.nodetype=ordconstn) then
cg.a_op_reg_reg_const(exprasmlist,op,OS_32,resultreg,
hregister1,tordconstnode(right).value and 31)
else
begin
{ load shift count in a register if necessary }
case right.location.loc of
LOC_CREGISTER, LOC_REGISTER:
hregister2 := right.location.register;
LOC_REFERENCE, LOC_MEM:
begin
hregister2 := cg.get_scratch_reg(exprasmlist);
cg.a_load_ref_reg(exprasmlist,OS_32,
right.location.reference,hregister2);
del_reference(right.location.reference);
end;
end;
tcgppc(cg).a_op_reg_reg_reg(exprasmlist,op,hregister1,
hregister2,resultreg);
if right.location.loc in [LOC_REFERENCE,LOC_MEM] then
cg.free_scratch_reg(exprasmlist,hregister2)
else
ungetregister(hregister2);
end;
{ set result location }
location.loc:=LOC_REGISTER;
location.register:=resultreg;
end;
end;
{*****************************************************************************
TPPCUNARYMINUSNODE
*****************************************************************************}
procedure tppcunaryminusnode.pass_2;
var
src1, src2, tmp: tregister;
op: tasmop;
begin
secondpass(left);
if is_64bitint(left.resulttype.def) then
begin
clear_location(location);
location.loc:=LOC_REGISTER;
case left.location.loc of
LOC_REGISTER, LOC_CREGISTER :
begin
src1 := left.location.registerlow;
src2 := left.location.registerhigh;
if left.location.loc = LOC_REGISTER then
begin
location.registerlow:=src1;
location.registerhigh:=src2;
end
else
begin
location.registerlow := getregisterint;
location.registerhigh := getregisterint;
end;
end;
LOC_REFERENCE,LOC_MEM :
begin
del_reference(left.location.reference);
location.registerlow:=getregisterint;
src1 := location.registerlow;
location.registerhigh:=getregisterint;
src2 := location.registerhigh;
tcg64f32(cg).a_load64_ref_reg(exprasmlist,left.location.reference,
location.registerlow,
location.registerhigh);
end;
end;
exprasmlist.concat(taicpu.op_reg_reg(A_NEG,location.registerlow,
src1));
cg.a_op_reg_reg(OP_NOT,OS_32,src2,location.registerhigh);
tmp := cg.get_scratch_reg(exprasmlist);
tcgppc(cg).a_op_const_reg_reg(OP_SAR,31,location.registerlow,tmp);
if not(cs_check_overflow in aktlocalswitches) then
cg.a_op_reg_reg(OP_ADD,OS_32,location.registerhigh,tmp)
else
exprasmlist.concat(taicpu.op_reg_reg_reg(A_ADDO,tmp,
location.registerhigh,tmp));
cg.free_scratch_reg(exprasmlist,tmp);
end
else
begin
location.loc:=LOC_REGISTER;
case left.location.loc of
LOC_FPU, LOC_REGISTER:
begin
src1 := left.location.register;
location.register := src1;
end;
LOC_CFPUREGISTER, LOC_CREGISTER:
begin
src1 := left.location.register;
if left.location.loc = LOC_CREGISTER then
location.register := getregisterint
else
location.register := getregisterfpu;
end;
LOC_REFERENCE,LOC_MEM:
begin
del_reference(left.location.reference);
if (left.resulttype.def.deftype=floatdef) then
begin
src1 := getregisterfpu;
location.register := src1;
floatload(tfloatdef(left.resulttype.def).typ,
left.location.reference,src1);
end
else
begin
src1 := getregisterint;
location.register:= src1;
cg.a_load_ref_reg(exprasmlist,OS_32,
left.location.reference,src1);
end;
end;
end;
{ choose appropriate operand }
if left.resulttype.def <> floatdef then
if not(cs_check_overflow in aktlocalswitches) then
op := A_NEG
else
op := A_NEGO
else
op := A_FNEG;
{ emit operation }
eprasmlist.concat(taicpu.op_reg_reg(op,location.register,src1));
end;
{ Here was a problem... }
{ Operand to be negated always }
{ seems to be converted to signed }
{ 32-bit before doing neg!! }
{ So this is useless... }
{ that's not true: -2^31 gives an overflow error if it is negated (FK) }
emitoverflowcheck(self);
end;
{*****************************************************************************
TPPCNOTNODE
*****************************************************************************}
procedure tppcnotnode.pass_2;
var
hl : tasmlabel;
regl, regh: tregister;
begin
if is_boolean(resulttype.def) then
begin
{ the second pass could change the location of left }
{ if it is a register variable, so we've to do }
{ this before the case statement }
if left.location.loc in [LOC_REFERENCE,LOC_MEM,
LOC_FLAGS,LOC_REGISTER,LOC_CREGISTER] then
secondpass(left);
case left.location.loc of
LOC_JUMP :
begin
hl:=truelabel;
truelabel:=falselabel;
falselabel:=hl;
secondpass(left);
maketojumpbool(left,lr_load_regvars);
hl:=truelabel;
truelabel:=falselabel;
falselabel:=hl;
end;
LOC_FLAGS :
location.resflags:=inverse_flags(left.location.resflags);
LOC_REGISTER, LOC_CREGISTER, LOC_REFERENCE, LOC_MEM :
begin
if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
regl := left.location.register
else
begin
regl := getregisterint;
cg.a_load_ref_reg(exprasmlist,def_cgsize(left.resulttype.def),
left.location.reference,regl);
end;
location.loc:=LOC_FLAGS;
location.resflags.cr:=0;
location.resflags.flag:=F_EQ;
exprasmlist.concat(taicpu.op_reg_const(A_CMPWI,regl,0));
ungetregister(regl);
end;
end;
end
else if is_64bitint(left.resulttype.def) then
begin
secondpass(left);
clear_location(location);
location.loc:=LOC_REGISTER;
{ make sure left is in a register and set the dest register }
case left.location.loc of
LOC_REFERENCE, LOC_MEM, LOC_CREGISTER:
begin
location.registerlow := getregisterint;
location.registerhigh := getregisterint;
if left.location.loc <> LOC_CREGISTER then
begin
tcg64f32(cg).a_load64_ref_reg(exprasmlist,
left.location.reference,location.registerlow,
location.registerhigh);
regl := location.registerlow;
regh := location.registerhigh;
end
else
begin
regl := left.location.registerlow;
regh := left.location.registerhigh;
end;
end;
LOC_REGISTER:
begin
regl := left.location.registerlow;
location.registerlow := regl;
regh := left.location.registerhigh;
location.registerhigh := regh;
end;
end;
{ perform the NOT operation }
exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.registerhigh,
regh);
exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.registerlow,
regl);
end
else
begin
secondpass(left);
clear_location(location);
location.loc:=LOC_REGISTER;
{ make sure left is in a register and set the dest register }
case left.location.loc of
LOC_REFERENCE, LOC_MEM, LOC_CREGISTER:
begin
location.register := getregisterint;
if left.location.loc <> LOC_CREGISTER then
begin
cg.a_load_ref_reg(exprasmlist,left.location.reference,
location.register);
regl := location.register;
end
else
regl := left.location.register;
end;
LOC_REGISTER:
regl := left.location.register;
end;
{ perform the NOT operation }
exprasmlist.concat(taicpu.op_reg_reg(A_NOT,location.register,
regl);
{ release the source reg if it wasn't reused }
if regl <> location.register then
ungetregisterint(regl);
end;
end;
begin
cmoddivnode:=tppcmoddivnode;
cshlshrnode:=tppcshlshrnode;
cunaryminusnode:=tppcunaryminusnode;
cnotnode:=tppcnotnode;
end.
{
$Log$
Revision 1.2 2002-01-03 14:57:52 jonas
* completed (not compilale yet though)
Revision 1.1 2001/12/29 15:28:58 jonas
* powerpc/cgcpu.pas compiles :)
* several powerpc-related fixes
* cpuasm unit is now based on common tainst unit
+ nppcmat unit for powerpc (almost complete)
}