Added initial support for the Cortex-M4F FPv4_S16 FPU

git-svn-id: branches/laksen/arm-embedded@22597 -
This commit is contained in:
Jeppe Johansen 2012-10-08 20:10:45 +00:00
parent 0087661fb5
commit a8f9b0dac4
15 changed files with 461 additions and 36 deletions

View File

@ -106,6 +106,8 @@ unit agarmgas;
result:='-mfpu=vfpv3 '+result;
if (current_settings.fputype = fpu_vfpv3_d16) then
result:='-mfpu=vfpv3-d16 '+result;
if (current_settings.fputype = fpu_fpv4_s16) then
result:='-mfpu=fpv4-sp-d16 '+result;
if current_settings.cputype=cpu_armv7m then
result:='-march=armv7m -mthumb -mthumb-interwork '+result
@ -292,8 +294,10 @@ unit agarmgas;
if taicpu(hp).ops = 0 then
s:=#9+gas_op2str[op]+' '+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
else if (taicpu(hp).opcode>=A_VABS) and (taicpu(hp).opcode<=A_VSUB) then
s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
else
s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+postfix+cond2str[taicpu(hp).condition]; // Conditional infixes are deprecated in unified syntax
s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+cond2str[taicpu(hp).condition]+postfix; // Conditional infixes are deprecated in unified syntax
end
else
s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];

View File

@ -161,6 +161,12 @@ unit cgcpu;
procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
end;
tthumb2cg64farm = class(tcg64farm)
@ -3120,10 +3126,17 @@ unit cgcpu;
rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
[RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
rg[R_FPUREGISTER]:=trgcputhumb2.create(R_FPUREGISTER,R_SUBNONE,
rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
[RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
rg[R_MMREGISTER]:=trgcputhumb2.create(R_MMREGISTER,R_SUBNONE,
[RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
if current_settings.fputype=fpu_fpv4_s16 then
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
[RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
],first_mm_imreg,[])
else
rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
[RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
end;
@ -3959,6 +3972,127 @@ unit cgcpu;
Result := ref;
end;
procedure Tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
var
instr: taicpu;
begin
if (fromsize=OS_F32) and
(tosize=OS_F32) then
begin
instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
list.Concat(instr);
add_move_instruction(instr);
end
else if (fromsize=OS_F64) and
(tosize=OS_F64) then
begin
//list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
//list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
end
else if (fromsize=OS_F32) and
(tosize=OS_F64) then
//list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
begin
//list.concat(nil);
end;
end;
procedure Tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
var
href: treference;
tmpreg: TRegister;
so: tshifterop;
begin
href:=ref;
if (href.base<>NR_NO) and
(href.index<>NR_NO) then
begin
tmpreg:=getintregister(list,OS_INT);
if href.shiftmode<>SM_None then
begin
so.rs:=href.index;
so.shiftimm:=href.shiftimm;
so.shiftmode:=href.shiftmode;
list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
end
else
a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
reference_reset_base(href,tmpreg,href.offset,0);
end;
if assigned(href.symbol) then
begin
tmpreg:=getintregister(list,OS_INT);
a_loadaddr_ref_reg(list,href,tmpreg);
reference_reset_base(href,tmpreg,0,0);
end;
if fromsize=OS_F32 then
list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F32))
else
list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F64));
end;
procedure Tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
var
href: treference;
so: tshifterop;
tmpreg: TRegister;
begin
href:=ref;
if (href.base<>NR_NO) and
(href.index<>NR_NO) then
begin
tmpreg:=getintregister(list,OS_INT);
if href.shiftmode<>SM_None then
begin
so.rs:=href.index;
so.shiftimm:=href.shiftimm;
so.shiftmode:=href.shiftmode;
list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
end
else
a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
reference_reset_base(href,tmpreg,href.offset,0);
end;
if assigned(href.symbol) then
begin
tmpreg:=getintregister(list,OS_INT);
a_loadaddr_ref_reg(list,href,tmpreg);
reference_reset_base(href,tmpreg,0,0);
end;
if fromsize=OS_F32 then
list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_32))
else
list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_64));
end;
procedure Tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
begin
if //(shuffle=nil) and
(tosize=OS_F32) then
list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
else
internalerror(2012100813);
end;
procedure Tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
begin
if //(shuffle=nil) and
(fromsize=OS_F32) then
list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
else
internalerror(2012100814);
end;
procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
var tmpreg: tregister;

View File

@ -139,7 +139,11 @@ unit cpubase;
{ multiple load/store vfp address modes }
PF_IAD,PF_DBD,PF_FDD,PF_EAD,
PF_IAS,PF_DBS,PF_FDS,PF_EAS,
PF_IAX,PF_DBX,PF_FDX,PF_EAX
PF_IAX,PF_DBX,PF_FDX,PF_EAX,
{ FPv4 postfixes }
PF_32,PF_64,PF_F32,PF_F64,
PF_F32S32,PF_F32U32,
PF_S32F32,PF_U32F32
);
TOpPostfixes = set of TOpPostfix;
@ -152,14 +156,17 @@ unit cpubase;
PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
PF_S,PF_D,PF_E,PF_None,PF_None);
oppostfix2str : array[TOpPostfix] of string[3] = ('',
oppostfix2str : array[TOpPostfix] of string[8] = ('',
's',
'd','e','p','ep',
'b','sb','bt','h','sh','t',
'ia','ib','da','db','fd','fa','ed','ea',
'iad','dbd','fdd','ead',
'ias','dbs','fds','eas',
'iax','dbx','fdx','eax');
'iax','dbx','fdx','eax',
'.32','.64','.f32','.f64',
'.f32.s32','.f32.u32',
'.s32.f32','.u32.f32');
roundingmode2str : array[TRoundingMode] of string[1] = ('',
'p','m','z');
@ -371,7 +378,7 @@ unit cpubase;
const
std_regname_table : array[tregisterindex] of string[7] = (
std_regname_table : array[tregisterindex] of string[10] = (
{$i rarmstd.inc}
);

View File

@ -65,7 +65,8 @@ Type
fpu_fpa11,
fpu_vfpv2,
fpu_vfpv3,
fpu_vfpv3_d16
fpu_vfpv3_d16,
fpu_fpv4_s16
);
tcontrollertype =
@ -227,7 +228,8 @@ Const
'FPA11',
'VFPV2',
'VFPV3',
'VFPV3_D16'
'VFPV3_D16',
'FPV4_S16'
);
@ -1004,7 +1006,7 @@ Const
)
);
vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16];
vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16];
{ Supported optimizations, only used for information }
supported_optimizerswitches = genericlevel1optimizerswitches+

View File

@ -124,7 +124,7 @@ unit cpupara;
getparaloc:=LOC_MMREGISTER
else if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
(cs_fp_emulation in current_settings.moduleswitches) or
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
{ the ARM eabi also allows passing VFP values via VFP registers,
but Mac OS X doesn't seem to do that and linux only does it if
built with the "-mfloat-abi=hard" option }
@ -608,7 +608,7 @@ unit cpupara;
end
else if (p.proccalloption in [pocall_softfloat]) or
(cs_fp_emulation in current_settings.moduleswitches) or
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
begin
case retcgsize of
OS_64,

View File

@ -118,6 +118,14 @@ unit cpupi;
if r in regs then
inc(floatsavesize,8);
end;
fpu_fpv4_s16:
begin
floatsavesize:=0;
regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
for r:=RS_D0 to RS_D15 do
if r in regs then
inc(floatsavesize,8);
end;
end;
floatsavesize:=align(floatsavesize,max(current_settings.alignment.localalignmin,4));
result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize+aint(floatsavesize);

View File

@ -46,7 +46,7 @@ implementation
cutils,verbose;
const
gas_regname_table : array[tregisterindex] of string[7] = (
gas_regname_table : array[tregisterindex] of string[10] = (
{$i rarmstd.inc}
);

View File

@ -35,6 +35,7 @@ interface
public
function pass_1 : tnode;override;
protected
function first_addfloat: tnode; override;
procedure second_addfloat;override;
procedure second_cmpfloat;override;
procedure second_cmpordinal;override;
@ -48,12 +49,12 @@ interface
globtype,systems,
cutils,verbose,globals,
constexp,
symconst,symdef,paramgr,
symconst,symdef,paramgr,symtable,symtype,
aasmbase,aasmtai,aasmdata,aasmcpu,defutil,htypechk,
cgbase,cgutils,cgcpu,
cpuinfo,pass_1,pass_2,regvars,procinfo,
cpupara,
ncon,nset,nadd,
ncon,nset,nadd,ncnv,ncal,nmat,
ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32,
hlcgobj
;
@ -212,6 +213,36 @@ interface
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
location.register,left.location.register,right.location.register));
end;
fpu_fpv4_s16:
begin
{ force mmreg as location, left right doesn't matter
as both will be in a fpureg }
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
if left.location.loc<>LOC_CMMREGISTER then
location.register:=left.location.register
else if right.location.loc<>LOC_CMMREGISTER then
location.register:=right.location.register
else
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
case nodetype of
addn :
op:=A_VADD;
muln :
op:=A_VMUL;
subn :
op:=A_VSUB;
slashn :
op:=A_VDIV;
else
internalerror(2009111401);
end;
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
end;
fpu_soft:
{ this case should be handled already by pass1 }
internalerror(200308252);
@ -273,6 +304,21 @@ interface
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.concat(taicpu.op_none(A_FMSTAT));
end;
fpu_fpv4_s16:
begin
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
if nodetype in [equaln,unequaln] then
op:=A_VCMP
else
op:=A_VCMPE;
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
left.location.register,right.location.register));
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
end;
fpu_soft:
{ this case should be handled already by pass1 }
internalerror(2009112404);
@ -464,6 +510,83 @@ interface
end;
end;
function tarmaddnode.first_addfloat: tnode;
var
procname: string[31];
{ do we need to reverse the result ? }
notnode : boolean;
fdef : tdef;
begin
result := nil;
notnode := false;
if current_settings.fputype = fpu_fpv4_s16 then
begin
case tfloatdef(left.resultdef).floattype of
s32real:
begin
result:=nil;
notnode:=false;
end;
s64real:
begin
fdef:=search_system_type('FLOAT64').typedef;
procname:='float64';
case nodetype of
addn:
procname:=procname+'_add';
muln:
procname:=procname+'_mul';
subn:
procname:=procname+'_sub';
slashn:
procname:=procname+'_div';
ltn:
procname:=procname+'_lt';
lten:
procname:=procname+'_le';
gtn:
begin
procname:=procname+'_le';
notnode:=true;
end;
gten:
begin
procname:=procname+'_lt';
notnode:=true;
end;
equaln:
procname:=procname+'_eq';
unequaln:
begin
procname:=procname+'_eq';
notnode:=true;
end;
else
CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),left.resultdef.typename,right.resultdef.typename);
end;
if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
resultdef:=pasbool8type;
result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
ctypeconvnode.create_internal(right,fdef),
ccallparanode.create(
ctypeconvnode.create_internal(left,fdef),nil))),resultdef);
left:=nil;
right:=nil;
{ do we need to reverse the result }
if notnode then
result:=cnotnode.create(result);
end;
end;
end
else
result:=inherited first_addfloat;
end;
procedure tarmaddnode.second_cmpordinal;
var

View File

@ -49,7 +49,7 @@ implementation
if (realresdef.typ=floatdef) and
(target_info.abi <> abi_eabihf) and
((cs_fp_emulation in current_settings.moduleswitches) or
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16])) then
(current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16])) then
begin
{ keep the fpu values in integer registers for now, the code
generator will move them to memory or an mmregister when necessary

View File

@ -32,6 +32,7 @@ interface
tarmtypeconvnode = class(tcgtypeconvnode)
protected
function first_int_to_real: tnode;override;
function first_real_to_real: tnode; override;
{ procedure second_int_to_int;override; }
{ procedure second_string_to_string;override; }
{ procedure second_cstring_to_pchar;override; }
@ -58,7 +59,7 @@ implementation
uses
verbose,globtype,globals,systems,
symconst,symdef,aasmbase,aasmtai,aasmdata,
symconst,symdef,aasmbase,aasmtai,aasmdata,symtable,
defutil,
cgbase,cgutils,
pass_1,pass_2,procinfo,
@ -76,7 +77,8 @@ implementation
var
fname: string[19];
begin
if cs_fp_emulation in current_settings.moduleswitches then
if (cs_fp_emulation in current_settings.moduleswitches) or
(current_settings.fputype=fpu_fpv4_s16) then
result:=inherited first_int_to_real
else
begin
@ -117,7 +119,8 @@ implementation
expectloc:=LOC_FPUREGISTER;
fpu_vfpv2,
fpu_vfpv3,
fpu_vfpv3_d16:
fpu_vfpv3_d16,
fpu_fpv4_s16:
expectloc:=LOC_MMREGISTER;
else
internalerror(2009112702);
@ -125,6 +128,48 @@ implementation
end;
end;
function tarmtypeconvnode.first_real_to_real: tnode;
begin
if (current_settings.fputype=fpu_fpv4_s16) then
begin
case tfloatdef(left.resultdef).floattype of
s32real:
case tfloatdef(resultdef).floattype of
s64real:
result:=ctypeconvnode.create_explicit(ccallnode.createintern('float32_to_float64',ccallparanode.create(
ctypeconvnode.create_internal(left,search_system_type('FLOAT32REC').typedef),nil)),resultdef);
s32real:
begin
result:=left;
left:=nil;
end;
else
internalerror(200610151);
end;
s64real:
case tfloatdef(resultdef).floattype of
s32real:
result:=ctypeconvnode.create_explicit(ccallnode.createintern('float64_to_float32',ccallparanode.create(
ctypeconvnode.create_internal(left,search_system_type('FLOAT64').typedef),nil)),resultdef);
s64real:
begin
result:=left;
left:=nil;
end;
else
internalerror(200610152);
end;
else
internalerror(200610153);
end;
left:=nil;
firstpass(result);
exit;
end
else
Result := inherited first_real_to_real;
end;
procedure tarmtypeconvnode.second_int_to_real;
const
@ -214,6 +259,22 @@ implementation
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(
signedprec2vfpop[signed,location.size],location.register,left.location.register));
end;
fpu_fpv4_s16:
begin
location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
signed:=left.location.size=OS_S32;
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
if (left.location.size<>OS_F32) then
internalerror(2009112703);
if left.location.size<>location.size then
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size)
else
location.register:=left.location.register;
if signed then
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
else
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
end;
end;
end;

View File

@ -91,7 +91,8 @@ implementation
end;
fpu_vfpv2,
fpu_vfpv3,
fpu_vfpv3_d16:
fpu_vfpv3_d16,
fpu_fpv4_s16:
begin
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
location_copy(location,left.location);
@ -123,6 +124,13 @@ implementation
fpu_vfpv3,
fpu_vfpv3_d16:
expectloc:=LOC_MMREGISTER;
fpu_fpv4_s16:
begin
if tfloatdef(left.resultdef).floattype=s32real then
expectloc:=LOC_MMREGISTER
else
exit(inherited first_abs_real);
end;
else
internalerror(2009112401);
end;
@ -146,6 +154,13 @@ implementation
fpu_vfpv3,
fpu_vfpv3_d16:
expectloc:=LOC_MMREGISTER;
fpu_fpv4_s16:
begin
if tfloatdef(left.resultdef).floattype=s32real then
expectloc:=LOC_MMREGISTER
else
exit(inherited first_sqr_real);
end;
else
internalerror(2009112402);
end;
@ -169,6 +184,13 @@ implementation
fpu_vfpv3,
fpu_vfpv3_d16:
expectloc:=LOC_MMREGISTER;
fpu_fpv4_s16:
begin
if tfloatdef(left.resultdef).floattype=s32real then
expectloc:=LOC_MMREGISTER
else
exit(inherited first_sqrt_real);
end;
else
internalerror(2009112403);
end;
@ -227,6 +249,8 @@ implementation
op:=A_FABSD;
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
end;
fpu_fpv4_s16:
current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
else
internalerror(2009111402);
end;
@ -254,6 +278,8 @@ implementation
op:=A_FMULD;
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,left.location.register));
end;
fpu_fpv4_s16:
current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
else
internalerror(2009111403);
end;
@ -281,6 +307,8 @@ implementation
op:=A_FSQRTD;
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
end;
fpu_fpv4_s16:
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register));
else
internalerror(2009111402);
end;

View File

@ -39,6 +39,7 @@ interface
end;
tarmunaryminusnode = class(tcgunaryminusnode)
function pass_1: tnode; override;
procedure second_float;override;
end;
@ -54,9 +55,10 @@ implementation
cutils,verbose,globals,constexp,
aasmbase,aasmcpu,aasmtai,aasmdata,
defutil,
symtype,symconst,symtable,
cgbase,cgobj,hlcgobj,cgutils,
pass_2,procinfo,
ncon,
ncon,ncnv,ncal,
cpubase,cpuinfo,
ncgutil,cgcpu,
nadd,pass_1,symdef;
@ -326,6 +328,46 @@ implementation
TARMUNARYMINUSNODE
*****************************************************************************}
function tarmunaryminusnode.pass_1: tnode;
var
procname: string[31];
fdef : tdef;
begin
if (current_settings.fputype<>fpu_fpv4_s16) or
(tfloatdef(resultdef).floattype=s32real) then
exit(inherited pass_1);
result:=nil;
firstpass(left);
if codegenerror then
exit;
if (left.resultdef.typ=floatdef) then
begin
case tfloatdef(resultdef).floattype of
s64real:
begin
procname:='float64_sub';
fdef:=search_system_type('FLOAT64').typedef;
end;
else
internalerror(2005082801);
end;
result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
ctypeconvnode.create_internal(left,fDef),
ccallparanode.create(ctypeconvnode.create_internal(crealconstnode.create(0,resultdef),fdef),nil))),resultdef);
left:=nil;
end
else
begin
if (left.resultdef.typ=floatdef) then
expectloc:=LOC_FPUREGISTER
else if (left.resultdef.typ=orddef) then
expectloc:=LOC_REGISTER;
end;
end;
procedure tarmunaryminusnode.second_float;
var
op: tasmop;
@ -357,6 +399,15 @@ implementation
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
location.register,left.location.register));
end;
fpu_fpv4_s16:
begin
location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
location:=left.location;
if (left.location.loc=LOC_CMMREGISTER) then
location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
location.register,left.location.register), PF_F32));
end
else
internalerror(2009112602);
end;

View File

@ -2608,7 +2608,11 @@ implementation
{ In non-emulation mode, real opcodes are
emitted for floating point values.
}
if not (cs_fp_emulation in current_settings.moduleswitches) then
if not ((cs_fp_emulation in current_settings.moduleswitches)
{$ifdef cpufpemu}
or (current_settings.fputype=fpu_soft)
{$endif cpufpemu}
) then
exit;
if not(target_info.system in systems_wince) then
@ -2768,12 +2772,9 @@ implementation
if nodetype=slashn then
begin
{$ifdef cpufpemu}
if (current_settings.fputype=fpu_soft) or (cs_fp_emulation in current_settings.moduleswitches) then
begin
result:=first_addfloat;
if assigned(result) then
exit;
end;
result:=first_addfloat;
if assigned(result) then
exit;
{$endif cpufpemu}
expectloc:=LOC_FPUREGISTER;
end
@ -2984,12 +2985,9 @@ implementation
else if (rd.typ=floatdef) or (ld.typ=floatdef) then
begin
{$ifdef cpufpemu}
if (current_settings.fputype=fpu_soft) or (cs_fp_emulation in current_settings.moduleswitches) then
begin
result:=first_addfloat;
if assigned(result) then
exit;
end;
result:=first_addfloat;
if assigned(result) then
exit;
{$endif cpufpemu}
if nodetype in [addn,subn,muln,andn,orn,xorn] then
expectloc:=LOC_FPUREGISTER

View File

@ -29,7 +29,7 @@ interface
cpuBase,cgBase;
type
TRegNameTable = array[tregisterindex] of string[7];
TRegNameTable = array[tregisterindex] of string[10];
TRegisterIndexTable = array[tregisterindex] of tregisterindex;
function findreg_by_number_table(r:Tregister;const regnumber_index:TRegisterIndexTable):tregisterindex;

View File

@ -33,10 +33,19 @@ Procedure SysInitFPU;{$ifdef SYSTEMINLINE}inline;{$endif}
begin
{ Enable FPU exceptions, but disable INEXACT, UNDERFLOW, DENORMAL }
asm
{$IFDEF FPUFPV4_S16}
movw r0, #(0xed88)
movt r0, #(0xe000)
ldr r1, [r0]
orr r1, r1, #(0xF << 20)
str r1, [r0]
bx lr
{$ELSE FPUFPV4_S16}
rfs r0
and r0,r0,#0xffe0ffff
orr r0,r0,#0x00070000
wfs r0
{$endif FPUFPV4_S16}
end;
end;
{$endif}