+ min/max optimization support for RiscV

This commit is contained in:
florian 2025-01-06 15:21:09 +01:00
parent b5eaa8555a
commit 7aae7a8d51
2 changed files with 100 additions and 26 deletions

View File

@ -304,9 +304,9 @@ implementation
{$ifdef i8086} {$ifdef i8086}
cpuinfo, cpuinfo,
{$endif i8086} {$endif i8086}
{$if defined(xtensa) or defined(i386)} {$if defined(xtensa) or defined(i386) or defined(riscv)}
cpuinfo, cpuinfo,
{$endif defined(xtensa) or defined(i386)} {$endif defined(xtensa) or defined(i386) or defined(riscv)}
cgbase,procinfo cgbase,procinfo
; ;
@ -1654,6 +1654,15 @@ implementation
(is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or
is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and
{$endif defined(aarch64)} {$endif defined(aarch64)}
{$if defined(riscv)}
{ RiscV fmin/fmax/fminm/fmaxm uses the IEEE semantics (2008 or 201x) of min/max regarding NaN (using either
always the NaN or non-NaN operand instead of the second one in case on is NaN), so
we can use them only when fast math is on }
((cs_opt_fastmath in current_settings.optimizerswitches) and
((is_single(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_F in cpu_capabilities[current_settings.cputype])) or
(is_double(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_D in cpu_capabilities[current_settings.cputype])) or
(is_quad(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_Q in cpu_capabilities[current_settings.cputype])))) and
{$endif defined(riscv)}
( (
{ the right size of the assignment in the then clause must either } { the right size of the assignment in the then clause must either }

View File

@ -30,38 +30,37 @@ interface
node,ninl,ncginl; node,ninl,ncginl;
type type
trvinlinenode = class(tcginlinenode)
{ first pass override
so that the code generator will actually generate
these nodes.
}
function first_sqrt_real: tnode; override;
function first_abs_real: tnode; override;
function first_sqr_real: tnode; override;
function first_round_real: tnode; override;
function first_trunc_real: tnode; override;
function first_fma: tnode; override;
function first_minmax: tnode; override;
{ trvinlinenode } procedure second_sqrt_real; override;
procedure second_abs_real; override;
procedure second_sqr_real; override;
procedure second_round_real; override;
procedure second_trunc_real; override;
trvinlinenode = class(tcginlinenode) procedure second_fma; override;
{ first pass override procedure second_minmax; override;
so that the code generator will actually generate protected
these nodes. procedure load_fpu_location;
} end;
function first_sqrt_real: tnode; override;
function first_abs_real: tnode; override;
function first_sqr_real: tnode; override;
function first_round_real: tnode; override;
function first_trunc_real: tnode; override;
function first_fma: tnode; override;
procedure second_sqrt_real; override;
procedure second_abs_real; override;
procedure second_sqr_real; override;
procedure second_round_real; override;
procedure second_trunc_real; override;
procedure second_fma; override;
protected
procedure load_fpu_location;
end;
implementation implementation
uses uses
ncal, ncal,
cutils,globals,verbose,globtype, cutils,globals,verbose,globtype,
compinnr,
aasmtai,aasmdata,aasmcpu, aasmtai,aasmdata,aasmcpu,
symconst,symdef, symconst,symdef,
defutil, defutil,
@ -159,6 +158,20 @@ implementation
end; end;
function trvinlinenode.first_minmax : tnode;
begin
if is_single(resultdef) or is_double(resultdef) or is_quad(resultdef) then
begin
expectloc:=LOC_FPUREGISTER;
Result:=nil;
if needs_check_for_fpu_exceptions then
Include(current_procinfo.flags,pi_do_call);
end
else
Result:=inherited first_minmax;
end;
{ load the FPU into the an fpu register } { load the FPU into the an fpu register }
procedure trvinlinenode.load_fpu_location; procedure trvinlinenode.load_fpu_location;
begin begin
@ -376,6 +389,58 @@ implementation
end; end;
procedure trvinlinenode.second_minmax;
var
paraarray : array[1..2] of tnode;
i: Integer;
ai: taicpu;
opcode: TAsmOp;
cond: TAsmCond;
begin
paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
paraarray[2]:=tcallparanode(parameters).paravalue;
for i:=low(paraarray) to high(paraarray) do
secondpass(paraarray[i]);
if is_single(resultdef) or is_double(resultdef) then
begin
{ no memory operand is allowed }
for i:=low(paraarray) to high(paraarray) do
begin
if not(paraarray[i].location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
hlcg.location_force_fpureg(current_asmdata.CurrAsmList,paraarray[i].location,
paraarray[i].resultdef,true);
end;
location_reset(location,LOC_FPUREGISTER,paraarray[1].location.size);
location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
case inlinenumber of
in_min_single:
opcode:=A_FMIN_S;
in_min_double:
opcode:=A_FMIN_D;
in_min_quad:
opcode:=A_FMAX_Q;
in_max_single:
opcode:=A_FMAX_S;
in_max_double:
opcode:=A_FMAX_D;
in_max_quad:
opcode:=A_FMAX_Q;
else
Internalerror(2025010502);
end;
current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(opcode,
location.register,paraarray[1].location.register,paraarray[2].location.register));
cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
end
else
internalerror(2025010501);
end;
begin begin
cinlinenode:=trvinlinenode; cinlinenode:=trvinlinenode;
end. end.