* continued to work on float128 stuff

git-svn-id: trunk@9117 -
2025-10-30 21:51:29 +01:00 · 2007-11-03 21:41:54 +00:00 · 2007-11-03 21:41:54 +00:00 · e3085015f7
commit e3085015f7
parent 6577a39bab
3 changed files with 92 additions and 5 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -4659,7 +4659,6 @@ rtl/inc/except.inc svneol=native#text/plain
 rtl/inc/fexpand.inc svneol=native#text/plain
 rtl/inc/file.inc svneol=native#text/plain
 rtl/inc/filerec.inc svneol=native#text/plain
 rtl/inc/float128.pp svneol=native#text/plain
 rtl/inc/generic.inc svneol=native#text/plain
 rtl/inc/genmath.inc svneol=native#text/plain
 rtl/inc/genset.inc svneol=native#text/plain
@ -4714,6 +4713,7 @@ rtl/inc/threadh.inc svneol=native#text/plain
 rtl/inc/threadvr.inc svneol=native#text/plain
 rtl/inc/typefile.inc svneol=native#text/plain
 rtl/inc/ucomplex.pp svneol=native#text/plain
 rtl/inc/ufloat128.pp svneol=native#text/plain
 rtl/inc/varerror.inc svneol=native#text/plain
 rtl/inc/variant.inc svneol=native#text/plain
 rtl/inc/varianth.inc svneol=native#text/plain
--- a/rtl/inc/softfpu.pp
+++ b/rtl/inc/softfpu.pp
@ -70,7 +70,7 @@ these four paragraphs for those parts of this code that are retained.
 *}
 { $define FPC_SOFTFLOAT_FLOATX80}
-{ $define FPC_SOFTFLOAT_FLOAT128}
+{$define FPC_SOFTFLOAT_FLOAT128}
 { the softfpu unit can be also embedded directly into the system unit }
@ -436,6 +436,7 @@ function float128_to_int64(a: float128): int64;
 function float128_to_int64_round_to_zero(a: float128): int64;
 function float128_to_float32(a: float128): float32;
 function float128_to_float64(a: float128): float64;
 function float64_to_float128( a : float64) : float128;
 {$ifdef FPC_SOFTFLOAT_FLOAT80}
 function float128_to_floatx80(a: float128): floatx80;
 {$endif FPC_SOFTFLOAT_FLOAT80}
@ -1892,6 +1893,17 @@ Begin
 End;
 function float64ToCommonNaN( a : float64 ) : commonNaNT;
 Var
    z : commonNaNT;
 Begin
    if ( float64_is_signaling_nan( a )<>0 ) then
        float_raise( float_flag_invalid );
    z.sign := a.high shr 31;
    shortShift64Left( a.high, a.low, 12, z.high, z.low );
    result := z;
 End;
 {*
 -------------------------------------------------------------------------------
 Returns the result of converting the canonical NaN `a' to the double-
@ -2473,6 +2485,13 @@ Function extractFloat64Frac1(a: float64): bits32;
    extractFloat64Frac1 := a.low;
  End;
 {$define FPC_SYSTEM_HAS_extractFloat64Frac}
 Function extractFloat64Frac(a: float64): bits64;
  Begin
    extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF;
  End;
 {*
 -------------------------------------------------------------------------------
 Returns the exponent bits of the double-precision floating-point value `a'.
@ -2537,6 +2556,16 @@ Procedure normalizeFloat64Subnormal(
      End;
  End;
 procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64);
 var
  shiftCount : int8;
 begin
    shiftCount := countLeadingZeros64( aSig ) - 11;
    zSigPtr := aSig shl shiftCount;
    zExpPtr := 1 - shiftCount;
 end;
 {*
 -------------------------------------------------------------------------------
 Packs the sign `zSign', the exponent `zExp', and the significand formed by
@ -8397,6 +8426,43 @@ begin
 end;
 {----------------------------------------------------------------------------
 | Returns the result of converting the double-precision floating-point value
 | `a' to the quadruple-precision floating-point format.  The conversion is
 | performed according to the IEC/IEEE Standard for Binary Floating-Point
 | Arithmetic.
 *----------------------------------------------------------------------------}
 function float64_to_float128( a : float64) : float128;
 var
    aSign : flag;
    aExp : int16;
    aSig, zSig0, zSig1 : bits64;
 begin
    aSig := extractFloat64Frac( a );
    aExp := extractFloat64Exp( a );
    aSign := extractFloat64Sign( a );
    if ( aExp = $7FF ) then begin
        if ( aSig<>0 ) then
          result:=commonNaNToFloat128( float64ToCommonNaN( a ) );
        result:=packFloat128( aSign, $7FFF, 0, 0 );
        exit;
    end;
    if ( aExp = 0 ) then begin
        if ( aSig = 0 ) then
          begin
            result:=packFloat128( aSign, 0, 0, 0 );
            exit;
          end;
        normalizeFloat64Subnormal( aSig, aExp, aSig );
        dec(aExp);
    end;
    shift128Right( aSig, 0, 4, zSig0, zSig1 );
    result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 );
 end;
 {$endif FPC_SOFTFLOAT_FLOAT128}
 {$endif not(defined(fpc_softfpu_interface))}
--- a/rtl/inc/ufloat128.pp
+++ b/rtl/inc/ufloat128.pp
@ -13,13 +13,17 @@
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 **********************************************************************}
-unit float128;
+{$inline on}
 unit ufloat128;
  interface
    uses
      softfpu;
    type
      float128 = softfpu.float128;
    operator+ (const f1,f2 : float128) result : float128;inline;
    operator* (const f1,f2 : float128) result : float128;inline;
    operator- (const f1,f2 : float128) result : float128;inline;
@ -29,9 +33,25 @@ unit float128;
    operator :=(const source : float128) dest : double;inline;
    procedure DumpFloat128(const f : float128);
  implementation
    procedure DumpFloat128(const f : float128);
      type
        ta = packed array[0..15] of byte;
      var
        i : longint;
      begin
        for i:=15 downto 0 do
          begin
            write(hexstr(ta(f)[i],2));
            if i<15 then
              write(' ');
          end;
      end;
    operator+ (const f1,f2 : float128) result : float128;inline;
      begin
        result:=float128_add(f1,f2);
@ -58,13 +78,14 @@ unit float128;
    operator :=(const source : double) dest : float128;inline;
      begin
-        dest:=float64_to_float128(source);
+        dest:=float64_to_float128(float64(source));
      end;
    operator :=(const source : float128) dest : double;inline;
      begin
-        dest:=float128_to_float64(source);
+        dest:=double(float128_to_float64(source));
      end;
 end.