From e3085015f73fae150eddf223dfe28d489652db3d Mon Sep 17 00:00:00 2001 From: florian Date: Sat, 3 Nov 2007 21:41:54 +0000 Subject: [PATCH] * continued to work on float128 stuff git-svn-id: trunk@9117 - --- .gitattributes | 2 +- rtl/inc/softfpu.pp | 68 ++++++++++++++++++++++++++- rtl/inc/{float128.pp => ufloat128.pp} | 27 +++++++++-- 3 files changed, 92 insertions(+), 5 deletions(-) rename rtl/inc/{float128.pp => ufloat128.pp} (76%) diff --git a/.gitattributes b/.gitattributes index 5060c33e33..bf4144a570 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4659,7 +4659,6 @@ rtl/inc/except.inc svneol=native#text/plain rtl/inc/fexpand.inc svneol=native#text/plain rtl/inc/file.inc svneol=native#text/plain rtl/inc/filerec.inc svneol=native#text/plain -rtl/inc/float128.pp svneol=native#text/plain rtl/inc/generic.inc svneol=native#text/plain rtl/inc/genmath.inc svneol=native#text/plain rtl/inc/genset.inc svneol=native#text/plain @@ -4714,6 +4713,7 @@ rtl/inc/threadh.inc svneol=native#text/plain rtl/inc/threadvr.inc svneol=native#text/plain rtl/inc/typefile.inc svneol=native#text/plain rtl/inc/ucomplex.pp svneol=native#text/plain +rtl/inc/ufloat128.pp svneol=native#text/plain rtl/inc/varerror.inc svneol=native#text/plain rtl/inc/variant.inc svneol=native#text/plain rtl/inc/varianth.inc svneol=native#text/plain diff --git a/rtl/inc/softfpu.pp b/rtl/inc/softfpu.pp index bb9366236a..a8718e917c 100644 --- a/rtl/inc/softfpu.pp +++ b/rtl/inc/softfpu.pp @@ -70,7 +70,7 @@ these four paragraphs for those parts of this code that are retained. *} { $define FPC_SOFTFLOAT_FLOATX80} -{ $define FPC_SOFTFLOAT_FLOAT128} +{$define FPC_SOFTFLOAT_FLOAT128} { the softfpu unit can be also embedded directly into the system unit } @@ -436,6 +436,7 @@ function float128_to_int64(a: float128): int64; function float128_to_int64_round_to_zero(a: float128): int64; function float128_to_float32(a: float128): float32; function float128_to_float64(a: float128): float64; +function float64_to_float128( a : float64) : float128; {$ifdef FPC_SOFTFLOAT_FLOAT80} function float128_to_floatx80(a: float128): floatx80; {$endif FPC_SOFTFLOAT_FLOAT80} @@ -1892,6 +1893,17 @@ Begin End; +function float64ToCommonNaN( a : float64 ) : commonNaNT; +Var + z : commonNaNT; +Begin + if ( float64_is_signaling_nan( a )<>0 ) then + float_raise( float_flag_invalid ); + z.sign := a.high shr 31; + shortShift64Left( a.high, a.low, 12, z.high, z.low ); + result := z; + +End; {* ------------------------------------------------------------------------------- Returns the result of converting the canonical NaN `a' to the double- @@ -2473,6 +2485,13 @@ Function extractFloat64Frac1(a: float64): bits32; extractFloat64Frac1 := a.low; End; + +{$define FPC_SYSTEM_HAS_extractFloat64Frac} +Function extractFloat64Frac(a: float64): bits64; + Begin + extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF; + End; + {* ------------------------------------------------------------------------------- Returns the exponent bits of the double-precision floating-point value `a'. @@ -2537,6 +2556,16 @@ Procedure normalizeFloat64Subnormal( End; End; +procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64); +var + shiftCount : int8; +begin + shiftCount := countLeadingZeros64( aSig ) - 11; + zSigPtr := aSig shl shiftCount; + zExpPtr := 1 - shiftCount; +end; + + {* ------------------------------------------------------------------------------- Packs the sign `zSign', the exponent `zExp', and the significand formed by @@ -8397,6 +8426,43 @@ begin end; +{---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the quadruple-precision floating-point format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. +*----------------------------------------------------------------------------} + +function float64_to_float128( a : float64) : float128; +var + aSign : flag; + aExp : int16; + aSig, zSig0, zSig1 : bits64; +begin + aSig := extractFloat64Frac( a ); + aExp := extractFloat64Exp( a ); + aSign := extractFloat64Sign( a ); + if ( aExp = $7FF ) then begin + if ( aSig<>0 ) then + result:=commonNaNToFloat128( float64ToCommonNaN( a ) ); + result:=packFloat128( aSign, $7FFF, 0, 0 ); + exit; + end; + if ( aExp = 0 ) then begin + if ( aSig = 0 ) then + begin + result:=packFloat128( aSign, 0, 0, 0 ); + exit; + end; + + normalizeFloat64Subnormal( aSig, aExp, aSig ); + dec(aExp); + end; + shift128Right( aSig, 0, 4, zSig0, zSig1 ); + result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 ); + +end; + {$endif FPC_SOFTFLOAT_FLOAT128} {$endif not(defined(fpc_softfpu_interface))} diff --git a/rtl/inc/float128.pp b/rtl/inc/ufloat128.pp similarity index 76% rename from rtl/inc/float128.pp rename to rtl/inc/ufloat128.pp index bd2ac0637d..28eec51f60 100644 --- a/rtl/inc/float128.pp +++ b/rtl/inc/ufloat128.pp @@ -13,13 +13,17 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. **********************************************************************} -unit float128; +{$inline on} +unit ufloat128; interface uses softfpu; + type + float128 = softfpu.float128; + operator+ (const f1,f2 : float128) result : float128;inline; operator* (const f1,f2 : float128) result : float128;inline; operator- (const f1,f2 : float128) result : float128;inline; @@ -29,9 +33,25 @@ unit float128; operator :=(const source : float128) dest : double;inline; + procedure DumpFloat128(const f : float128); implementation + procedure DumpFloat128(const f : float128); + type + ta = packed array[0..15] of byte; + var + i : longint; + begin + for i:=15 downto 0 do + begin + write(hexstr(ta(f)[i],2)); + if i<15 then + write(' '); + end; + end; + + operator+ (const f1,f2 : float128) result : float128;inline; begin result:=float128_add(f1,f2); @@ -58,13 +78,14 @@ unit float128; operator :=(const source : double) dest : float128;inline; begin - dest:=float64_to_float128(source); + dest:=float64_to_float128(float64(source)); end; operator :=(const source : float128) dest : double;inline; begin - dest:=float128_to_float64(source); + dest:=double(float128_to_float64(source)); end; + end.