From fa4cbc89a5256a5964460afe6a105e1390599a17 Mon Sep 17 00:00:00 2001 From: florian Date: Fri, 3 Apr 2020 20:15:23 +0000 Subject: [PATCH] + Xtensa: hard float support, i.e. make use of floating point extension if available git-svn-id: trunk@44539 - --- compiler/defutil.pas | 10 ++- compiler/nld.pas | 4 ++ compiler/symdef.pas | 6 +- compiler/xtensa/cgcpu.pas | 47 ++++++++++++++ compiler/xtensa/cpuinfo.pas | 20 +++--- compiler/xtensa/ncpuadd.pas | 126 ++++++++++++++++++++++++++++++++++++ compiler/xtensa/ncpucnv.pas | 71 +++++++++++++------- compiler/xtensa/ncpumat.pas | 47 +++++++++----- 8 files changed, 281 insertions(+), 50 deletions(-) diff --git a/compiler/defutil.pas b/compiler/defutil.pas index 036e8c9e37..e6b3150b25 100644 --- a/compiler/defutil.pas +++ b/compiler/defutil.pas @@ -372,7 +372,8 @@ interface implementation uses - verbose,cutils; + verbose,cutils, + cpuinfo; { returns true, if def uses FPU } function is_fpu(def : tdef) : boolean; @@ -1518,7 +1519,12 @@ implementation objectdef : result:=int_cgsize(def.size); floatdef: - if cs_fp_emulation in current_settings.moduleswitches then + if (cs_fp_emulation in current_settings.moduleswitches) or +{$ifdef xtensa} + not(tfloatdef(def).floattype=s32real) or + not(FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) +{$endif xtensa} + then result:=int_cgsize(def.size) else result:=tfloat2tcgsize[tfloatdef(def).floattype]; diff --git a/compiler/nld.pas b/compiler/nld.pas index 6224e3d546..d5f91c5373 100644 --- a/compiler/nld.pas +++ b/compiler/nld.pas @@ -751,6 +751,10 @@ implementation and (use_vectorfpu(left.resultdef) and use_vectorfpu(right.resultdef) and (tfloatdef(left.resultdef).floattype=tfloatdef(right.resultdef).floattype)) +{$endif arm} +{$ifdef xtensa} + and not((FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) xor + (FPUXTENSA_DOUBLE in fpu_capabilities[current_settings.fputype])) {$endif} then begin diff --git a/compiler/symdef.pas b/compiler/symdef.pas index ac5ee8a135..84852006f2 100644 --- a/compiler/symdef.pas +++ b/compiler/symdef.pas @@ -2357,7 +2357,11 @@ implementation {$ifdef x86} result:=use_vectorfpu(self); {$else x86} - result:=(typ=floatdef) and not(cs_fp_emulation in current_settings.moduleswitches); + result:=(typ=floatdef) and not(cs_fp_emulation in current_settings.moduleswitches) and +{$ifdef xtensa} + (FPUXTENSA_SINGLE in fpu_capabilities[init_settings.fputype]) and (tfloatdef(self).floattype=s32real) +{$endif xtensa} + ; {$endif x86} end; diff --git a/compiler/xtensa/cgcpu.pas b/compiler/xtensa/cgcpu.pas index b18759a111..b44255557c 100644 --- a/compiler/xtensa/cgcpu.pas +++ b/compiler/xtensa/cgcpu.pas @@ -73,6 +73,10 @@ interface procedure g_concatcopy(list : TAsmList; const source,dest : treference; len : tcgint);override; + procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override; + procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);override; + procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override; + procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister); end; @@ -945,6 +949,49 @@ implementation end; + procedure tcgcpu.a_loadfpu_reg_reg(list: TAsmList; fromsize,tosize: tcgsize; reg1, reg2: tregister); + begin + if not(fromsize in [OS_32,OS_F32]) then + InternalError(2020032603); + list.concat(taicpu.op_reg_reg(A_MOV_S,reg2,reg1)); + end; + + + procedure tcgcpu.a_loadfpu_ref_reg(list: TAsmList; fromsize,tosize: tcgsize; const ref: treference; reg: tregister); + var + href: treference; + begin + if not(fromsize in [OS_32,OS_F32]) then + InternalError(2020032602); + href:=ref; + if assigned(href.symbol) or + (href.index<>NR_NO) or + (((href.offset<0) or (href.offset>1020) or (href.offset mod 4<>0))) then + fixref(list,href); + + list.concat(taicpu.op_reg_ref(A_LSI,reg,href)); + + if fromsize<>tosize then + a_loadfpu_reg_reg(list,fromsize,tosize,reg,reg); + end; + + + procedure tcgcpu.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); + var + href: treference; + begin + if not(fromsize in [OS_32,OS_F32]) then + InternalError(2020032604); + href:=ref; + if assigned(href.symbol) or + (href.index<>NR_NO) or + (((href.offset<0) or (href.offset>1020) or (href.offset mod 4<>0))) then + fixref(list,href); + + list.concat(taicpu.op_reg_ref(A_SSI,reg,href)); + end; + + procedure tcgcpu.maybeadjustresult(list : TAsmList; op : TOpCg; size : tcgsize; dst : tregister); const overflowops = [OP_MUL,OP_SHL,OP_ADD,OP_SUB,OP_NEG]; diff --git a/compiler/xtensa/cpuinfo.pas b/compiler/xtensa/cpuinfo.pas index a64dade915..c5aabd72f3 100644 --- a/compiler/xtensa/cpuinfo.pas +++ b/compiler/xtensa/cpuinfo.pas @@ -45,7 +45,8 @@ Type tfputype = (fpu_none, fpu_soft, - fpu_libgcc + fpu_libgcc, + fpu_hard ); Type @@ -97,7 +98,8 @@ Const fputypestr : array[tfputype] of string[10] = ( 'NONE', 'SOFT', - 'LIBGCC' + 'LIBGCC', + 'HARD' ); @@ -109,10 +111,10 @@ Const ( (controllertypestr:''; controllerunitstr:''; cputype:cpu_none; fputype:fpu_none; abi: abi_default; flashbase:0), (controllertypestr:'ESP8266'; controllerunitstr:'ESP8266'; cputype:cpu_lx106; fputype:fpu_none; abi: abi_xtensa_call0; { flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024 }), - (controllertypestr:'ESP32'; controllerunitstr:'ESP32'; cputype:cpu_lx6; fputype:fpu_none; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:2*1024*1024), - (controllertypestr:'ESP32_D0WD'; controllerunitstr:'ESP32_D0WD'; cputype:cpu_lx6; fputype:fpu_none; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024), - (controllertypestr:'ESP32_D2WD'; controllerunitstr:'ESP32_D2WD'; cputype:cpu_lx6; fputype:fpu_none; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024), - (controllertypestr:'ESP32_S0WD'; controllerunitstr:'ESP32_S0WD'; cputype:cpu_lx6; fputype:fpu_none; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024) + (controllertypestr:'ESP32'; controllerunitstr:'ESP32'; cputype:cpu_lx6; fputype:fpu_hard; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:2*1024*1024), + (controllertypestr:'ESP32_D0WD'; controllerunitstr:'ESP32_D0WD'; cputype:cpu_lx6; fputype:fpu_hard; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024), + (controllertypestr:'ESP32_D2WD'; controllerunitstr:'ESP32_D2WD'; cputype:cpu_lx6; fputype:fpu_hard; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024), + (controllertypestr:'ESP32_S0WD'; controllerunitstr:'ESP32_S0WD'; cputype:cpu_lx6; fputype:fpu_hard; abi: abi_xtensa_windowed; flashbase:$40000000; flashsize:448*1024; srambase:$40070000; sramsize: 520*1024) ); { Supported optimizations, only used for information } @@ -138,7 +140,8 @@ Const tfpuflags = ( - FPUXTENSA_DUMMY + FPUXTENSA_SINGLE, { FPU has single support } + FPUXTENSA_DOUBLE { FPU has double support, this is a dummy so far for easier checking what code to generate } ); const @@ -153,7 +156,8 @@ Const ( { fpu_none } [], { fpu_soft } [], - { fpu_libgcc } [] + { fpu_libgcc } [], + { fpu_hard } [FPUXTENSA_SINGLE] ); Implementation diff --git a/compiler/xtensa/ncpuadd.pas b/compiler/xtensa/ncpuadd.pas index 51cb828298..bce1ba1a8f 100644 --- a/compiler/xtensa/ncpuadd.pas +++ b/compiler/xtensa/ncpuadd.pas @@ -30,11 +30,16 @@ interface type TCPUAddNode = class(tcgaddnode) + private + procedure pass_left_and_right; protected function pass_1 : tnode;override; + function first_addfloat: tnode;override; procedure second_cmpordinal;override; procedure second_cmpsmallset;override; procedure second_cmp64bit;override; + procedure second_cmpfloat;override; + procedure second_addfloat;override; procedure second_cmp; end; @@ -176,6 +181,127 @@ interface second_cmp; end; + + procedure TCPUAddNode.pass_left_and_right; + begin + { calculate the operator which is more difficult } + firstcomplex(self); + + { in case of constant put it to the left } + if (left.nodetype=ordconstn) then + swapleftright; + + secondpass(left); + secondpass(right); + end; + + + function TCPUAddNode.first_addfloat: tnode; + begin + result := nil; + + if (FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) and + (tfloatdef(left.resultdef).floattype=s32real) and (nodetype<>slashn) then + begin + if nodetype in [equaln,unequaln,lten,ltn,gten,gtn] then + expectloc:=LOC_FLAGS + else + expectloc:=LOC_FPUREGISTER; + end + else + result:=first_addfloat_soft; + end; + + + procedure TCPUAddNode.second_addfloat; + var + op : TAsmOp; + cmpop, + singleprec , inv: boolean; + begin + pass_left_and_right; + if (nf_swapped in flags) then + swapleftright; + + hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true); + hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true); + + cmpop:=false; + inv:=false; + case nodetype of + addn : + op:=A_ADD_S; + muln : + op:=A_MUL_S; + subn : + op:=A_SUB_S; + unequaln, + equaln: + begin + op:=A_OEQ_S; + cmpop:=true; + end; + ltn: + begin + op:=A_OLT_S; + cmpop:=true; + end; + lten: + begin + op:=A_OLE_S; + cmpop:=true; + end; + gtn: + begin + op:=A_OLT_S; + swapleftright; + cmpop:=true; + end; + gten: + begin + op:=A_OLE_S; + swapleftright; + cmpop:=true; + end; + else + internalerror(2020032601); + end; + + { initialize de result } + if cmpop then + begin + location_reset(location,LOC_FLAGS,OS_NO); + location.resflags.register:=NR_B0; + location.resflags.flag:=F_NZ; + end + else + begin + location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef)); + location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size); + end; + + { emit the actual operation } + if cmpop then + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,right.location.register)); + cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList); + + if inv then + location.resflags.flag:=F_Z; + end + else + begin + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,right.location.register)); + cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList); + end; + end; + + + procedure TCPUAddNode.second_cmpfloat; + begin + second_addfloat; + end; + begin caddnode:=tcpuaddnode; end. diff --git a/compiler/xtensa/ncpucnv.pas b/compiler/xtensa/ncpucnv.pas index 12948bea51..b491723f9f 100644 --- a/compiler/xtensa/ncpucnv.pas +++ b/compiler/xtensa/ncpucnv.pas @@ -30,29 +30,9 @@ interface type tcputypeconvnode = class(tcgtypeconvnode) - protected - // function first_int_to_real: tnode;override; - // function first_real_to_real: tnode; override; - { procedure second_int_to_int;override; } - { procedure second_string_to_string;override; } - { procedure second_cstring_to_pchar;override; } - { procedure second_string_to_chararray;override; } - { procedure second_array_to_pointer;override; } - // function first_int_to_real: tnode; override; - { procedure second_pointer_to_array;override; } - { procedure second_chararray_to_string;override; } - { procedure second_char_to_string;override; } - // procedure second_int_to_real;override; - // procedure second_real_to_real;override; - { procedure second_cord_to_pointer;override; } - { procedure second_proc_to_procvar;override; } - { procedure second_bool_to_int;override; } - procedure second_int_to_bool;override; - { procedure second_load_smallset;override; } - { procedure second_ansistring_to_pchar;override; } - { procedure second_pchar_to_string;override; } - { procedure second_class_to_intf;override; } - { procedure second_char_to_char;override; } + protected + function first_real_to_real: tnode;override; + procedure second_int_to_bool;override; end; implementation @@ -70,6 +50,51 @@ implementation tcputypeconvnode *****************************************************************************} + function tcputypeconvnode.first_real_to_real: tnode; + begin + if (FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) and + not(FPUXTENSA_DOUBLE in fpu_capabilities[current_settings.fputype]) then + begin + case tfloatdef(left.resultdef).floattype of + s32real: + case tfloatdef(resultdef).floattype of + s64real: + result:=ctypeconvnode.create_explicit(ccallnode.createintern('float32_to_float64',ccallparanode.create( + ctypeconvnode.create_internal(left,search_system_type('FLOAT32REC').typedef),nil)),resultdef); + s32real: + begin + result:=left; + left:=nil; + end; + else + internalerror(200610151); + end; + s64real: + case tfloatdef(resultdef).floattype of + s32real: + result:=ctypeconvnode.create_explicit(ccallnode.createintern('float64_to_float32',ccallparanode.create( + ctypeconvnode.create_internal(left,search_system_type('FLOAT64').typedef),nil)),resultdef); + s64real: + begin + result:=left; + left:=nil; + end; + else + internalerror(200610152); + end; + else + internalerror(200610153); + end; + left:=nil; + firstpass(result); + exit; + end + else + Result := inherited first_real_to_real; + end; + + + procedure tcputypeconvnode.second_int_to_bool; var hreg1, onereg: tregister; diff --git a/compiler/xtensa/ncpumat.pas b/compiler/xtensa/ncpumat.pas index 0c8456c7eb..c96614e40e 100644 --- a/compiler/xtensa/ncpumat.pas +++ b/compiler/xtensa/ncpumat.pas @@ -121,25 +121,40 @@ implementation procedure tcpuunaryminusnode.second_float; begin secondpass(left); - case current_settings.fputype of - fpu_soft: - begin + if (current_settings.fputype=fpu_soft) or (tfloatdef(left.resultdef).floattype<>s32real) or + not(FPUXTENSA_SINGLE in fpu_capabilities[current_settings.fputype]) then + begin + if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false); - location:=left.location; - case location.size of - OS_32: - cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.register); - OS_64: - cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi); - else - internalerror(2014033101); - end; - end - else - internalerror(2009112602); - end; + location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); + if location.size in [OS_64,OS_S64,OS_F64] then + begin + location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32); + location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32); + end + else + location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size); + + case location.size of + OS_32: + cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),left.location.register,location.register); + OS_64: + cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),left.location.registerhi,location.registerhi); + else + internalerror(2014033101); + end; + end + else + begin + if not(left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER]) then + hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false); + location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef)); + location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size); + current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_NEG_S,location.register,left.location.register)); + end; end; + procedure tcpushlshrnode.second_64bit; var v : TConstExprInt;