From 054bf32f1f80746f9ffe6cddd2c392fab2061ff0 Mon Sep 17 00:00:00 2001 From: Jeppe Johansen Date: Sat, 21 Jul 2018 22:34:42 +0000 Subject: [PATCH] Add RV64GC cpu type. Fix float loading. Fix a number of small issues with wrong operand sizes. Fixed concatcopy code generation. Align jump table for case statements. git-svn-id: branches/laksen/riscv_new@39481 - --- compiler/aoptobj.pas | 5 +++ compiler/globals.pas | 4 +- compiler/riscv/cgrv.pas | 85 ++++++++++------------------------- compiler/riscv/nrvset.pas | 1 + compiler/riscv64/aoptcpu.pas | 4 -- compiler/riscv64/cgcpu.pas | 63 +++++++++++++++----------- compiler/riscv64/cpubase.pas | 5 ++- compiler/riscv64/cpuinfo.pas | 13 +++--- compiler/riscv64/nrv64cnv.pas | 6 +-- compiler/systems/i_linux.pas | 2 +- 10 files changed, 80 insertions(+), 108 deletions(-) diff --git a/compiler/aoptobj.pas b/compiler/aoptobj.pas index 885b9ee64c..dbf9dac333 100644 --- a/compiler/aoptobj.pas +++ b/compiler/aoptobj.pas @@ -1342,7 +1342,12 @@ Unit AoptObj; {$if defined(arm) or defined(aarch64)} (hp.condition=c_None) and {$endif arm or aarch64} +{$if defined(riscv32) or defined(riscv64)} (hp.ops>0) and + (hp.oper[0]^.reg=NR_X0) and +{$else riscv} + (hp.ops>0) and +{$endif riscv} (JumpTargetOp(hp)^.typ = top_ref) and (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel); end; diff --git a/compiler/globals.pas b/compiler/globals.pas index d5dddbbc4d..d4f9af7be4 100644 --- a/compiler/globals.pas +++ b/compiler/globals.pas @@ -536,8 +536,8 @@ interface fputype : fpu_fd; {$endif riscv32} {$ifdef riscv64} - cputype : cpu_rv64imafd; - optimizecputype : cpu_rv64imafd; + cputype : cpu_rv64imafdc; + optimizecputype : cpu_rv64imafdc; asmcputype : cpu_none; fputype : fpu_fd; {$endif riscv64} diff --git a/compiler/riscv/cgrv.pas b/compiler/riscv/cgrv.pas index 5fb781150b..e340bebd93 100644 --- a/compiler/riscv/cgrv.pas +++ b/compiler/riscv/cgrv.pas @@ -32,6 +32,9 @@ unit cgrv; parabase; type + + { tcgrv } + tcgrv = class(tcg) procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : tcgpara); override; @@ -40,6 +43,7 @@ unit cgrv; procedure a_call_reg(list : TAsmList;reg: tregister); override; procedure a_call_name(list : TAsmList;const s : string; weak: boolean); override; + procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override; procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: TCGSize; reg: tregister; const ref: treference); override; procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override; procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; register: tregister); override; @@ -125,6 +129,15 @@ unit cgrv; end; + procedure tcgrv.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); + begin + if a=0 then + a_load_reg_ref(list,size,size,NR_X0,ref) + else + inherited a_load_const_ref(list, size, a, ref); + end; + + procedure tcgrv.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : tcgpara); var ref: treference; @@ -226,20 +239,6 @@ unit cgrv; if (not assigned(href.symbol)) and (href.offset=0) then a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r) - {else if (assigned(href.symbol) or - (not is_imm12(href.offset))) and - (href.base<>NR_NO) then - begin - b:= href.base; - - href.base:=NR_NO; - href.refaddr:=addr_hi20; - list.concat(taicpu.op_reg_ref(A_LUI,r,href)); - href.refaddr:=addr_lo12; - list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,r,href)); - - list.concat(taicpu.op_reg_reg_reg(A_ADD,r,r,b)); - end} else if (assigned(href.symbol) or (not is_imm12(href.offset))) and (href.base<>NR_NO) then @@ -253,7 +252,7 @@ unit cgrv; href.refaddr:=addr_pcrel_hi20; list.concat(taicpu.op_reg_ref(A_AUIPC,r,href)); - reference_reset_symbol(href,l,0,0,[]); + reference_reset_symbol(href,l,0,0,ref.volatility); href.refaddr:=addr_pcrel_lo12; list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,r,href)); @@ -264,26 +263,6 @@ unit cgrv; begin list.concat(taicpu.op_reg_reg_const(A_ADDI,r,href.base,href.offset)); end - {else if (href.refaddr=addr_pcrel) then - begin - tmpreg:=getintregister(list,OS_ADDR); - - current_asmdata.getaddrlabel(l); - - a_label(list,l); - - b:=href.base; - href.base:=NR_NO; - - href.refaddr:=addr_hi20; - href.relsymbol:=l; - list.concat(taicpu.op_reg_ref(A_LUI,tmpreg,href)); - href.refaddr:=addr_lo12; - list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,tmpreg,href)); - - if b<>NR_NO then - list.concat(taicpu.op_reg_reg_reg(A_ADD,r,r,b)); - end} else if (href.refaddr=addr_pcrel) then begin tmpreg:=getintregister(list,OS_ADDR); @@ -297,7 +276,7 @@ unit cgrv; href.refaddr:=addr_pcrel_hi20; list.concat(taicpu.op_reg_ref(A_AUIPC,tmpreg,href)); - reference_reset_symbol(href,l,0,0,[]); + reference_reset_symbol(href,l,0,0,ref.volatility); href.refaddr:=addr_pcrel_lo12; list.concat(taicpu.op_reg_reg_ref(A_ADDI,r,tmpreg,href)); @@ -310,25 +289,9 @@ unit cgrv; procedure tcgrv.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel); - var - reg1: TRegister; - ai: taicpu; begin if a=0 then - begin - reg1:=NR_X0; - if TOpCmp2AsmCond[cmp_op]=C_None then - begin - cmp_op:=swap_opcmp(cmp_op); - reg1:=reg; - reg:=NR_X0; - end; - - ai:=taicpu.op_reg_reg_sym_ofs(A_Bxx,reg,reg1,l,0); - ai.is_jmp:=true; - ai.condition:=TOpCmp2AsmCond[cmp_op]; - list.concat(ai); - end + a_cmp_reg_reg_label(list,size,cmp_op,NR_X0,reg,l) else inherited; end; @@ -469,7 +432,7 @@ unit cgrv; begin tmpreg:=getintregister(list,OS_ADDR); a_loadaddr_ref_reg(list,href,tmpreg); - reference_reset_base(href,tmpreg,0,ctempposinvalid,0,[]); + reference_reset_base(href,tmpreg,0,ctempposinvalid,0,ref.volatility); end; case fromsize of @@ -558,7 +521,7 @@ unit cgrv; begin tmpreg:=getintregister(list,OS_ADDR); a_loadaddr_ref_reg(list,href,tmpreg); - reference_reset_base(href,tmpreg,0,ctempposinvalid,0,[]); + reference_reset_base(href,tmpreg,0,ctempposinvalid,0,ref.volatility); end; if fromsize=OS_F32 then @@ -586,7 +549,7 @@ unit cgrv; begin tmpreg:=getintregister(list,OS_ADDR); a_loadaddr_ref_reg(list,href,tmpreg); - reference_reset_base(href,tmpreg,0,ctempposinvalid,0,[]); + reference_reset_base(href,tmpreg,0,ctempposinvalid,0,ref.volatility); end; if fromsize<>tosize then @@ -614,13 +577,11 @@ unit cgrv; result:=true; if ref.refaddr=addr_pcrel then - begin - exit; - end; + exit; if assigned(ref.symbol) then begin - reference_reset_symbol(href,ref.symbol,ref.offset,ref.alignment,[]); + reference_reset_symbol(href,ref.symbol,ref.offset,ref.alignment,ref.volatility); ref.symbol:=nil; ref.offset:=0; @@ -631,7 +592,7 @@ unit cgrv; href.refaddr:=addr_pcrel_hi20; list.concat(taicpu.op_reg_ref(A_AUIPC,tmpreg,href)); - reference_reset_symbol(href,l,0,0,[]); + reference_reset_symbol(href,l,0,0,ref.volatility); href.refaddr:=addr_pcrel_lo12; list.concat(taicpu.op_reg_reg_ref(A_ADDI,tmpreg,tmpreg,href)); @@ -654,7 +615,7 @@ unit cgrv; a_load_const_reg(list, OS_ADDR,ref.offset,tmpreg); - reference_reset_base(ref,tmpreg,0,ctempposinvalid,ref.alignment,[]); + reference_reset_base(ref,tmpreg,0,ctempposinvalid,ref.alignment,ref.volatility); end; if (ref.index<>NR_NO) and diff --git a/compiler/riscv/nrvset.pas b/compiler/riscv/nrvset.pas index e0a51771b6..5a78175480 100644 --- a/compiler/riscv/nrvset.pas +++ b/compiler/riscv/nrvset.pas @@ -143,6 +143,7 @@ implementation current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_JALR,NR_X0, indexreg)); { generate jump table } + current_asmdata.CurrAsmList.concat(cai_align.Create(4)); current_asmdata.CurrAsmList.concat(Tai_label.Create(table)); genitem(current_asmdata.CurrAsmList,hp); end; diff --git a/compiler/riscv64/aoptcpu.pas b/compiler/riscv64/aoptcpu.pas index cebdc53420..34ae3695ad 100644 --- a/compiler/riscv64/aoptcpu.pas +++ b/compiler/riscv64/aoptcpu.pas @@ -164,10 +164,6 @@ implementation result:=true; end; end; - A_ANDI: - begin - - end; end; end; end; diff --git a/compiler/riscv64/cgcpu.pas b/compiler/riscv64/cgcpu.pas index 0cb243c670..3e58547145 100644 --- a/compiler/riscv64/cgcpu.pas +++ b/compiler/riscv64/cgcpu.pas @@ -104,10 +104,10 @@ implementation list.concat(ai); rg[R_INTREGISTER].add_move_instruction(ai); end - else if (fromsize=OS_S32) then + {else if (fromsize=OS_S32) then list.Concat(taicpu.op_reg_reg_const(A_ADDIW,reg2,reg1,0)) else if (fromsize=OS_8) then - list.Concat(taicpu.op_reg_reg_const(A_ANDI,reg2,reg1,$FF)) + list.Concat(taicpu.op_reg_reg_const(A_ANDI,reg2,reg1,$FF))} else begin if tcgsize2size[tosize]0 then list.concat(taicpu.op_reg_const(A_LUI,register,((a shr 12)+1) and $FFFFF)) @@ -149,7 +150,7 @@ implementation end else begin - reference_reset(hr,4,[]); + reference_reset(hr,8,[]); current_asmdata.getjumplabel(l); current_procinfo.aktlocaldata.Concat(cai_align.Create(8)); @@ -305,7 +306,7 @@ implementation regs, fregs: tcpuregisterset; r: TSuperRegister; href: treference; - stackcount: longint; + stackcount, stackAdjust: longint; begin if not(nostackframe) then begin @@ -333,19 +334,29 @@ implementation fregs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall); for r:=RS_F0 to RS_F31 do if r in fregs then - inc(stackcount,8); + inc(stackcount,8); inc(localsize,stackcount); - if not is_imm12(-localsize) then + if not is_imm12(-(localsize-stackcount)) then begin if not (RS_RETURN_ADDRESS_REG in regs) then begin include(regs,RS_RETURN_ADDRESS_REG); inc(localsize,8); + inc(stackcount,8); end; end; - stackcount:=0; + stackAdjust:=0; + if (CPURV_HAS_COMPACT in cpu_capabilities[current_settings.cputype]) and + (stackcount>0) then + begin + list.concat(taicpu.op_reg_reg_const(A_ADDI,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,-stackcount)); + inc(href.offset,stackcount); + stackAdjust:=stackcount; + dec(localsize,stackcount); + end; + for r:=RS_X0 to RS_X31 do if r in regs then begin @@ -362,7 +373,7 @@ implementation end; if current_procinfo.framepointer<>NR_STACK_POINTER_REG then - list.concat(taicpu.op_reg_reg_const(A_ADDI,NR_FRAME_POINTER_REG,NR_STACK_POINTER_REG,0)); + list.concat(taicpu.op_reg_reg_const(A_ADDI,NR_FRAME_POINTER_REG,NR_STACK_POINTER_REG,stackAdjust)); if localsize>0 then begin @@ -384,7 +395,7 @@ implementation var r: tsuperregister; regs, fregs: tcpuregisterset; - stackcount, localsize: longint; + localsize: longint; href: treference; begin if not(nostackframe) then @@ -397,7 +408,6 @@ implementation if (pi_do_call in current_procinfo.flags) then regs:=regs+[RS_RETURN_ADDRESS_REG]; - stackcount:=0; reference_reset_base(href,NR_STACK_POINTER_REG,-8,ctempposinvalid,0,[]); for r:=RS_X31 downto RS_X0 do if r in regs then @@ -407,7 +417,7 @@ implementation fregs:=rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall); for r:=RS_F0 to RS_F31 do if r in fregs then - dec(stackcount,8); + dec(href.offset,8); localsize:=current_procinfo.calc_stackframe_size+(-href.offset-8); if current_procinfo.framepointer<>NR_STACK_POINTER_REG then @@ -445,7 +455,6 @@ implementation begin inc(href.offset,8); list.concat(taicpu.op_reg_ref(A_LD,newreg(R_INTREGISTER,r,R_SUBWHOLE),href)); - inc(stackcount); end; end; @@ -517,8 +526,8 @@ implementation g_concatcopy_move(list, src2, dst2, len) else begin - Count := len div 4; - if (count<=4) and reference_is_reusable(src2) then + Count := len div 8; + if (count<=8) and reference_is_reusable(src2) then src:=src2 else begin @@ -527,7 +536,7 @@ implementation src.base := GetAddressRegister(list); a_loadaddr_ref_reg(list, src2, src.base); end; - if (count<=4) and reference_is_reusable(dst2) then + if (count<=8) and reference_is_reusable(dst2) then dst:=dst2 else begin @@ -544,27 +553,27 @@ implementation a_load_const_reg(list, OS_INT, Count, countreg); current_asmdata.getjumplabel(lab); a_label(list, lab); - list.concat(taicpu.op_reg_ref(A_LW, tmpreg1, src)); - list.concat(taicpu.op_reg_ref(A_SW, tmpreg1, dst)); - list.concat(taicpu.op_reg_reg_const(A_ADDI, src.base, src.base, 4)); - list.concat(taicpu.op_reg_reg_const(A_ADDI, dst.base, dst.base, 4)); + list.concat(taicpu.op_reg_ref(A_LD, tmpreg1, src)); + list.concat(taicpu.op_reg_ref(A_SD, tmpreg1, dst)); + list.concat(taicpu.op_reg_reg_const(A_ADDI, src.base, src.base, 8)); + list.concat(taicpu.op_reg_reg_const(A_ADDI, dst.base, dst.base, 8)); list.concat(taicpu.op_reg_reg_const(A_ADDI, countreg, countreg, -1)); a_cmp_reg_reg_label(list,OS_INT,OC_GT,NR_X0,countreg,lab); - len := len mod 4; + len := len mod 8; end; { unrolled loop } - Count := len div 4; + Count := len div 8; if Count > 0 then begin tmpreg1 := GetIntRegister(list, OS_INT); for count2 := 1 to Count do begin - list.concat(taicpu.op_reg_ref(A_LW, tmpreg1, src)); - list.concat(taicpu.op_reg_ref(A_SW, tmpreg1, dst)); - Inc(src.offset, 4); - Inc(dst.offset, 4); + list.concat(taicpu.op_reg_ref(A_LD, tmpreg1, src)); + list.concat(taicpu.op_reg_ref(A_SD, tmpreg1, dst)); + Inc(src.offset, 8); + Inc(dst.offset, 8); end; - len := len mod 4; + len := len mod 8; end; if (len and 4) <> 0 then begin diff --git a/compiler/riscv64/cpubase.pas b/compiler/riscv64/cpubase.pas index 6bc19044de..1038fde44f 100644 --- a/compiler/riscv64/cpubase.pas +++ b/compiler/riscv64/cpubase.pas @@ -37,6 +37,7 @@ uses type TAsmOp=(A_None, + { Pseudo instructions } A_NOP, { normal opcodes } A_LUI,A_AUIPC,A_JAL,A_JALR, @@ -290,7 +291,7 @@ const The value of this constant is equal to the constant PARM_BOUNDARY / BITS_PER_UNIT in the GCC source. } - std_param_align = 4; { for 32-bit version only } + std_param_align = 8; { for 32-bit version only } {***************************************************************************** @@ -379,7 +380,7 @@ implementation begin case getregtype(reg) of R_INTREGISTER : - result:=OS_32; + result:=OS_64; R_MMREGISTER: result:=OS_M128; R_FPUREGISTER: diff --git a/compiler/riscv64/cpuinfo.pas b/compiler/riscv64/cpuinfo.pas index 0c3f2b1124..91879a0b48 100644 --- a/compiler/riscv64/cpuinfo.pas +++ b/compiler/riscv64/cpuinfo.pas @@ -34,6 +34,7 @@ type { possible supported processors for this target } tcputype = (cpu_none, + cpu_rv64imafdc, cpu_rv64imafd, cpu_rv64ima, cpu_rv64im, @@ -86,6 +87,7 @@ Const ]; cputypestr: array[tcputype] of string[10] = ('', + 'RV64IMAFDC', 'RV64IMAFD', 'RV64IMA', 'RV64IM', @@ -123,11 +125,12 @@ Const const cpu_capabilities : array[tcputype] of set of tcpuflags = - ( { cpu_none } [], - { cpu_rv64imafd } [CPURV_HAS_MUL,CPURV_HAS_ATOMIC], - { cpu_rv64ima } [CPURV_HAS_MUL,CPURV_HAS_ATOMIC], - { cpu_rv64im } [CPURV_HAS_MUL], - { cpu_rv64i } [] + ( { cpu_none } [], + { cpu_rv64imafdc } [CPURV_HAS_MUL,CPURV_HAS_ATOMIC,CPURV_HAS_COMPACT], + { cpu_rv64imafd } [CPURV_HAS_MUL,CPURV_HAS_ATOMIC], + { cpu_rv64ima } [CPURV_HAS_MUL,CPURV_HAS_ATOMIC], + { cpu_rv64im } [CPURV_HAS_MUL], + { cpu_rv64i } [] ); implementation diff --git a/compiler/riscv64/nrv64cnv.pas b/compiler/riscv64/nrv64cnv.pas index 531b6d49f1..271fec5ebf 100644 --- a/compiler/riscv64/nrv64cnv.pas +++ b/compiler/riscv64/nrv64cnv.pas @@ -121,12 +121,8 @@ unit nrv64cnv; begin { Load memory in fpu register } hlcg.location_force_mem(current_asmdata.CurrAsmList, left.location, left.resultdef); - cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList, OS_F32, OS_F32, left.location.reference, location.Register); + cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList, location.size, location.size, left.location.reference, location.Register); tg.ungetiftemp(current_asmdata.CurrAsmList, left.location.reference); - - case restype of - s64real: cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList, OS_F32, OS_F64, location.register, location.Register); - end; end; end; diff --git a/compiler/systems/i_linux.pas b/compiler/systems/i_linux.pas index 5ccdb25e90..16fd5b8652 100644 --- a/compiler/systems/i_linux.pas +++ b/compiler/systems/i_linux.pas @@ -1154,7 +1154,7 @@ unit i_linux; recordalignmax : 16; maxCrecordalign : 16 ); - first_parm_offset : 8; + first_parm_offset : 16; stacksize : 10*1024*1024; stackalign : 16; abi : abi_default;