+ min/max optimization support for RiscV

2025-04-05 18:47:54 +02:00 · 2025-01-06 15:21:09 +01:00 · 2025-01-06 15:21:09 +01:00 · 7aae7a8d51
commit 7aae7a8d51
parent b5eaa8555a
2 changed files with 100 additions and 26 deletions
--- a/compiler/nflw.pas
+++ b/compiler/nflw.pas
@ -304,9 +304,9 @@ implementation
    {$ifdef i8086}
      cpuinfo,
    {$endif i8086}
-    {$if defined(xtensa) or defined(i386)}
+    {$if defined(xtensa) or defined(i386) or defined(riscv)}
      cpuinfo,
-    {$endif defined(xtensa) or defined(i386)}
+    {$endif defined(xtensa) or defined(i386) or defined(riscv)}
      cgbase,procinfo
      ;
@ -1654,6 +1654,15 @@ implementation
          (is_single(tassignmentnode(thenstmnt).left.resultdef) or is_double(tassignmentnode(thenstmnt).left.resultdef) or
           is_32bitint(tassignmentnode(thenstmnt).left.resultdef) or is_64bitint(tassignmentnode(thenstmnt).left.resultdef)) and
 {$endif defined(aarch64)}
 {$if defined(riscv)}
          { RiscV fmin/fmax/fminm/fmaxm uses the IEEE semantics (2008 or 201x) of min/max regarding NaN (using either
            always the NaN or non-NaN operand instead of the second one in case on is NaN), so
            we can use them only when fast math is on }
          ((cs_opt_fastmath in current_settings.optimizerswitches) and
           ((is_single(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_F in cpu_capabilities[current_settings.cputype])) or
            (is_double(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_D in cpu_capabilities[current_settings.cputype])) or
            (is_quad(tassignmentnode(thenstmnt).left.resultdef) and (CPURV_HAS_Q in cpu_capabilities[current_settings.cputype])))) and
 {$endif defined(riscv)}
          (
          { the right size of the assignment in the then clause must either }
--- a/compiler/riscv/nrvinl.pas
+++ b/compiler/riscv/nrvinl.pas
@ -30,38 +30,37 @@ interface
       node,ninl,ncginl;
    type
      trvinlinenode = class(tcginlinenode)
        { first pass override
          so that the code generator will actually generate
          these nodes.
        }
        function first_sqrt_real: tnode; override;
        function first_abs_real: tnode; override;
        function first_sqr_real: tnode; override;
        function first_round_real: tnode; override;
        function first_trunc_real: tnode; override;
        function first_fma: tnode; override;
        function first_minmax: tnode; override;
-       { trvinlinenode }
+        procedure second_sqrt_real; override;
        procedure second_abs_real; override;
        procedure second_sqr_real; override;
        procedure second_round_real; override;
        procedure second_trunc_real; override;
-       trvinlinenode = class(tcginlinenode)
+        procedure second_fma; override;
-          { first pass override
+        procedure second_minmax; override;
-            so that the code generator will actually generate
+      protected
-            these nodes.
+        procedure load_fpu_location;
-          }
+      end;
          function first_sqrt_real: tnode; override;
          function first_abs_real: tnode; override;
          function first_sqr_real: tnode; override;
          function first_round_real: tnode; override;
          function first_trunc_real: tnode; override;
          function first_fma: tnode; override;
          procedure second_sqrt_real; override;
          procedure second_abs_real; override;
          procedure second_sqr_real; override;
          procedure second_round_real; override;
          procedure second_trunc_real; override;
          procedure second_fma; override;
       protected
          procedure load_fpu_location;
       end;
 implementation
    uses
      ncal,
      cutils,globals,verbose,globtype,
      compinnr,
      aasmtai,aasmdata,aasmcpu,
      symconst,symdef,
      defutil,
@ -159,6 +158,20 @@ implementation
       end;
    function trvinlinenode.first_minmax : tnode;
      begin
        if is_single(resultdef) or is_double(resultdef)  or is_quad(resultdef) then
          begin
            expectloc:=LOC_FPUREGISTER;
            Result:=nil;
            if needs_check_for_fpu_exceptions then
              Include(current_procinfo.flags,pi_do_call);
          end
        else
          Result:=inherited first_minmax;
      end;
     { load the FPU into the an fpu register }
     procedure trvinlinenode.load_fpu_location;
       begin
@ -376,6 +389,58 @@ implementation
       end;
    procedure trvinlinenode.second_minmax;
      var
        paraarray : array[1..2] of tnode;
        i: Integer;
        ai: taicpu;
        opcode: TAsmOp;
        cond: TAsmCond;
      begin
        paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
          paraarray[2]:=tcallparanode(parameters).paravalue;
        for i:=low(paraarray) to high(paraarray) do
           secondpass(paraarray[i]);
        if is_single(resultdef) or is_double(resultdef) then
           begin
             { no memory operand is allowed }
             for i:=low(paraarray) to high(paraarray) do
               begin
                 if not(paraarray[i].location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
                   hlcg.location_force_fpureg(current_asmdata.CurrAsmList,paraarray[i].location,
                     paraarray[i].resultdef,true);
               end;
             location_reset(location,LOC_FPUREGISTER,paraarray[1].location.size);
             location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
             case inlinenumber of
               in_min_single:
                 opcode:=A_FMIN_S;
               in_min_double:
                 opcode:=A_FMIN_D;
               in_min_quad:
                 opcode:=A_FMAX_Q;
               in_max_single:
                 opcode:=A_FMAX_S;
               in_max_double:
                 opcode:=A_FMAX_D;
               in_max_quad:
                 opcode:=A_FMAX_Q;
               else
                 Internalerror(2025010502);
             end;
             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(opcode,
               location.register,paraarray[1].location.register,paraarray[2].location.register));
             cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
           end
         else
           internalerror(2025010501);
      end;
 begin
   cinlinenode:=trvinlinenode;
 end.