+ sqr uses sse on x86 if possible

* the result type of sqr is equal to the argument in case of usual floats git-svn-id: trunk@1265 -
2025-09-07 13:10:34 +02:00 · 2005-10-03 12:31:31 +00:00 · 2005-10-03 12:31:31 +00:00 · 6371333361
commit 6371333361
parent c15e0911e5
2 changed files with 27 additions and 4 deletions
--- a/compiler/ninl.pas
+++ b/compiler/ninl.pas
@ -1206,6 +1206,20 @@ implementation
          end;


+      procedure setfloatresulttype;
+        begin
+          if (left.resulttype.def.deftype=floatdef) and
+            (tfloatdef(left.resulttype.def).typ in [s32real,s64real,s80real,s128real]) then
+            resulttype:=left.resulttype
+          else
+            begin
+              inserttypeconv(left,pbestrealtype^);
+              resulttype:=pbestrealtype^;
+            end;
+        end;
+
+
+
      var
         vl,vl2    : TConstExprInt;
         vr        : bestreal;
@ -1935,8 +1949,7 @@ implementation
                  else
                   begin
                     set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                   end;
                end;

--- a/compiler/x86/nx86inl.pas
+++ b/compiler/x86/nx86inl.pas
@ -246,8 +246,18 @@ implementation
     procedure tx86inlinenode.second_sqr_real;

       begin
-         load_fpu_location;
-         emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             cg.a_opmm_loc_reg(exprasmlist,OP_MUL,left.location.size,left.location,left.location.register,mms_movescalar);
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
+           end;
       end;

     procedure tx86inlinenode.second_sqrt_real;