diff --git a/compiler/nflw.pas b/compiler/nflw.pas index 5cd718ddd6..b64671e261 100644 --- a/compiler/nflw.pas +++ b/compiler/nflw.pas @@ -304,9 +304,9 @@ implementation {$ifdef i8086} cpuinfo, {$endif i8086} - {$if defined(xtensa) or defined(i386)} + {$if defined(xtensa) or defined(i386) or defined(riscv)} cpuinfo, - {$endif defined(xtensa) or defined(i386)} + {$endif defined(xtensa) or defined(i386) or defined(riscv)} cgbase,procinfo ; @@ -1654,6 +1654,15 @@ implementation (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and {$endif defined(aarch64)} +{$if defined(riscv)} + { RiscV fmin/fmax/fminm/fmaxm uses the IEEE semantics (2008 or 201x) of min/max regarding NaN (using either + always the NaN or non-NaN operand instead of the second one in case on is NaN), so + we can use them only when fast math is on } + ((cs_opt_fastmath in current_settings.optimizerswitches) and + ((is_single(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_F in cpu_capabilities[current_settings.cputype])) or + (is_double(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_D in cpu_capabilities[current_settings.cputype])) or + (is_quad(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_Q in cpu_capabilities[current_settings.cputype])))) and +{$endif defined(riscv)} ( { the right size of the assignment in the then clause must either } diff --git a/compiler/riscv/nrvinl.pas b/compiler/riscv/nrvinl.pas index 8c44213ee9..0200126c2f 100644 --- a/compiler/riscv/nrvinl.pas +++ b/compiler/riscv/nrvinl.pas @@ -30,38 +30,37 @@ interface node,ninl,ncginl; type + trvinlinenode = class(tcginlinenode) + { first pass override + so that the code generator will actually generate + these nodes. + } + function first_sqrt_real: tnode; override; + function first_abs_real: tnode; override; + function first_sqr_real: tnode; override; + function first_round_real: tnode; override; + function first_trunc_real: tnode; override; + function first_fma: tnode; override; + function first_minmax: tnode; override; - { trvinlinenode } + procedure second_sqrt_real; override; + procedure second_abs_real; override; + procedure second_sqr_real; override; + procedure second_round_real; override; + procedure second_trunc_real; override; - trvinlinenode = class(tcginlinenode) - { first pass override - so that the code generator will actually generate - these nodes. - } - function first_sqrt_real: tnode; override; - function first_abs_real: tnode; override; - function first_sqr_real: tnode; override; - function first_round_real: tnode; override; - function first_trunc_real: tnode; override; - - function first_fma: tnode; override; - - procedure second_sqrt_real; override; - procedure second_abs_real; override; - procedure second_sqr_real; override; - procedure second_round_real; override; - procedure second_trunc_real; override; - - procedure second_fma; override; - protected - procedure load_fpu_location; - end; + procedure second_fma; override; + procedure second_minmax; override; + protected + procedure load_fpu_location; + end; implementation uses ncal, cutils,globals,verbose,globtype, + compinnr, aasmtai,aasmdata,aasmcpu, symconst,symdef, defutil, @@ -159,6 +158,20 @@ implementation end; + function trvinlinenode.first_minmax : tnode; + begin + if is_single(resultdef) or is_double(resultdef) or is_quad(resultdef) then + begin + expectloc:=LOC_FPUREGISTER; + Result:=nil; + if needs_check_for_fpu_exceptions then + Include(current_procinfo.flags,pi_do_call); + end + else + Result:=inherited first_minmax; + end; + + { load the FPU into the an fpu register } procedure trvinlinenode.load_fpu_location; begin @@ -376,6 +389,58 @@ implementation end; + procedure trvinlinenode.second_minmax; + var + paraarray : array[1..2] of tnode; + i: Integer; + ai: taicpu; + opcode: TAsmOp; + cond: TAsmCond; + begin + paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue; + paraarray[2]:=tcallparanode(parameters).paravalue; + + for i:=low(paraarray) to high(paraarray) do + secondpass(paraarray[i]); + + if is_single(resultdef) or is_double(resultdef) then + begin + { no memory operand is allowed } + for i:=low(paraarray) to high(paraarray) do + begin + if not(paraarray[i].location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then + hlcg.location_force_fpureg(current_asmdata.CurrAsmList,paraarray[i].location, + paraarray[i].resultdef,true); + end; + + location_reset(location,LOC_FPUREGISTER,paraarray[1].location.size); + location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size); + + case inlinenumber of + in_min_single: + opcode:=A_FMIN_S; + in_min_double: + opcode:=A_FMIN_D; + in_min_quad: + opcode:=A_FMAX_Q; + in_max_single: + opcode:=A_FMAX_S; + in_max_double: + opcode:=A_FMAX_D; + in_max_quad: + opcode:=A_FMAX_Q; + else + Internalerror(2025010502); + end; + current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(opcode, + location.register,paraarray[1].location.register,paraarray[2].location.register)); + + cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList); + end + else + internalerror(2025010501); + end; + begin cinlinenode:=trvinlinenode; end.