+ use sse for sqrt if possible

* same optimization for sqrt as for sqr git-svn-id: trunk@1266 -
2025-10-20 23:01:28 +02:00 · 2005-10-03 12:47:05 +00:00 · 2005-10-03 12:47:05 +00:00 · 19e97e4da2
commit 19e97e4da2
parent 6371333361
2 changed files with 20 additions and 4 deletions
--- a/compiler/ninl.pas
+++ b/compiler/ninl.pas
@ -1966,8 +1966,7 @@ implementation
                  else
                   begin
                     set_varstate(left,vs_used,[vsf_must_be_valid]);
-                     inserttypeconv(left,pbestrealtype^);
-                     resulttype:=pbestrealtype^;
+                     setfloatresulttype;
                   end;
                end;

--- a/compiler/x86/nx86inl.pas
+++ b/compiler/x86/nx86inl.pas
@ -262,8 +262,25 @@ implementation

     procedure tx86inlinenode.second_sqrt_real;
       begin
-         load_fpu_location;
-         emit_none(A_FSQRT,S_NO);
+         if use_sse(resulttype.def) then
+           begin
+             secondpass(left);
+             location_force_mmregscalar(exprasmlist,left.location,false);
+             location:=left.location;
+             case tfloatdef(resulttype.def).typ of
+               s32real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSS,S_XMM,location.register,location.register));
+               s64real:
+                 exprasmlist.concat(taicpu.op_reg_reg(A_SQRTSD,S_XMM,location.register,location.register));
+               else
+                 internalerror(200510031);
+             end;
+           end
+         else
+           begin
+             load_fpu_location;
+             emit_none(A_FSQRT,S_NO);
+           end;
       end;

     procedure tx86inlinenode.second_ln_real;