* sqrt(real): for targets with emulated floating point, invoke float64_sqrt or float32_sqrt from softfpu code. Testing on ARM CPU shows that float64_sqrt executes about twice faster and offers better accuracy than fpc_sqrt_real from genmath.inc.

* softfpu.pp: changed float64_sqrt into a function, so it is consistent with other compiler-invoked routines. git-svn-id: trunk@27809 -
2025-09-02 12:50:27 +02:00 · 2014-05-21 15:07:13 +00:00 · 2014-05-21 15:07:13 +00:00 · 22e099d000
commit 22e099d000
parent fc5f45f65c
2 changed files with 44 additions and 14 deletions
--- a/compiler/ninl.pas
+++ b/compiler/ninl.pas
@ -3748,11 +3748,43 @@ implementation
      end;

     function tinlinenode.first_sqrt_real : tnode;
+      var
+        fdef: tdef;
+        procname: string[31];
      begin
-        { create the call to the helper }
-        { on entry left node contains the parameter }
-        first_sqrt_real := ctypeconvnode.create(ccallnode.createintern('fpc_sqrt_real',
+        if (cs_fp_emulation in current_settings.moduleswitches)
+{$ifdef cpufpemu}
+            or (current_settings.fputype=fpu_soft)
+{$endif cpufpemu}
+            and not (target_info.system in systems_wince) then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                begin
+                  fdef:=search_system_type('FLOAT32REC').typedef;
+                  procname:='float32_sqrt';
+                end;
+              s64real:
+                begin
+                  fdef:=search_system_type('FLOAT64').typedef;
+                  procname:='float64_sqrt';
+                end;
+              {!!! not yet implemented
+              s128real:
+              }
+            else
+              internalerror(2014052101);
+            end;
+            first_sqrt_real:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+               ctypeconvnode.create_internal(left,fdef),nil)),resultdef);
+          end
+        else
+          begin
+            { create the call to the helper }
+            { on entry left node contains the parameter }
+            first_sqrt_real := ctypeconvnode.create(ccallnode.createintern('fpc_sqrt_real',
                ccallparanode.create(left,nil)),resultdef);
+          end;
        left := nil;
      end;

--- a/rtl/inc/softfpu.pp
+++ b/rtl/inc/softfpu.pp
@ -230,7 +230,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 *}
-Procedure float64_sqrt( a: float64; var out: float64 ); compilerproc;
+function float64_sqrt( a: float64 ): float64; compilerproc;
 {*
 -------------------------------------------------------------------------------
 Returns the remainder of the double-precision floating-point value `a'
@ -5389,14 +5389,13 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-Point Arithmetic.
 -------------------------------------------------------------------------------
 *}
-Procedure float64_sqrt( a: float64; var out: float64 );
+function float64_sqrt( a: float64 ): float64;
 {$ifdef fpc}[public,Alias:'FLOAT64_SQRT'];compilerproc;{$endif}
 Var
    aSign: flag;
    aExp, zExp: int16;
    aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0: bits32;
    rem0, rem1, rem2, rem3, term0, term1, term2, term3: bits32;
-    z: float64;
    label invalid;
 Begin
    aSig1 := extractFloat64Frac1( a );
@ -5407,12 +5406,12 @@ Begin
    Begin
        if ( aSig0 OR  aSig1 ) <> 0 then
        Begin
-           propagateFloat64NaN( a, a, out );
+           propagateFloat64NaN( a, a, result );
           exit;
        End;
        if ( aSign = 0) then
        Begin
-          out := a;
+          result := a;
          exit;
        End;
        goto invalid;
@ -5421,21 +5420,20 @@ Begin
    Begin
        if ( ( aExp OR  aSig0 OR  aSig1 ) = 0 ) then
        Begin
-           out := a;
+           result := a;
           exit;
        End;
 invalid:
        float_raise( float_flag_invalid );
-        z.low := float64_default_nan_low;
-        z.high := float64_default_nan_high;
-        out := z;
+        result.low := float64_default_nan_low;
+        result.high := float64_default_nan_high;
        exit;
    End;
    if ( aExp = 0 ) then
    Begin
        if ( ( aSig0 OR  aSig1 ) = 0 ) then
        Begin
-           packFloat64( 0, 0, 0, 0, out );
+           packFloat64( 0, 0, 0, 0, result );
           exit;
        End;
        normalizeFloat64Subnormal( aSig0, aSig1, aExp, aSig0, aSig1 );
@ -5476,7 +5474,7 @@ Begin
        zSig1 := zSig1 or bits32( ( rem1 OR  rem2 OR  rem3 ) <> 0 );
    End;
    shift64ExtraRightJamming( zSig0, zSig1, 0, 10, zSig0, zSig1, zSig2 );
-    roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2, out );
+    roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2, result );
 End;

 {*