mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-06-05 08:18:47 +02:00
* continued to work on float128 stuff
git-svn-id: trunk@9117 -
This commit is contained in:
parent
6577a39bab
commit
e3085015f7
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -4659,7 +4659,6 @@ rtl/inc/except.inc svneol=native#text/plain
|
|||||||
rtl/inc/fexpand.inc svneol=native#text/plain
|
rtl/inc/fexpand.inc svneol=native#text/plain
|
||||||
rtl/inc/file.inc svneol=native#text/plain
|
rtl/inc/file.inc svneol=native#text/plain
|
||||||
rtl/inc/filerec.inc svneol=native#text/plain
|
rtl/inc/filerec.inc svneol=native#text/plain
|
||||||
rtl/inc/float128.pp svneol=native#text/plain
|
|
||||||
rtl/inc/generic.inc svneol=native#text/plain
|
rtl/inc/generic.inc svneol=native#text/plain
|
||||||
rtl/inc/genmath.inc svneol=native#text/plain
|
rtl/inc/genmath.inc svneol=native#text/plain
|
||||||
rtl/inc/genset.inc svneol=native#text/plain
|
rtl/inc/genset.inc svneol=native#text/plain
|
||||||
@ -4714,6 +4713,7 @@ rtl/inc/threadh.inc svneol=native#text/plain
|
|||||||
rtl/inc/threadvr.inc svneol=native#text/plain
|
rtl/inc/threadvr.inc svneol=native#text/plain
|
||||||
rtl/inc/typefile.inc svneol=native#text/plain
|
rtl/inc/typefile.inc svneol=native#text/plain
|
||||||
rtl/inc/ucomplex.pp svneol=native#text/plain
|
rtl/inc/ucomplex.pp svneol=native#text/plain
|
||||||
|
rtl/inc/ufloat128.pp svneol=native#text/plain
|
||||||
rtl/inc/varerror.inc svneol=native#text/plain
|
rtl/inc/varerror.inc svneol=native#text/plain
|
||||||
rtl/inc/variant.inc svneol=native#text/plain
|
rtl/inc/variant.inc svneol=native#text/plain
|
||||||
rtl/inc/varianth.inc svneol=native#text/plain
|
rtl/inc/varianth.inc svneol=native#text/plain
|
||||||
|
@ -70,7 +70,7 @@ these four paragraphs for those parts of this code that are retained.
|
|||||||
*}
|
*}
|
||||||
|
|
||||||
{ $define FPC_SOFTFLOAT_FLOATX80}
|
{ $define FPC_SOFTFLOAT_FLOATX80}
|
||||||
{ $define FPC_SOFTFLOAT_FLOAT128}
|
{$define FPC_SOFTFLOAT_FLOAT128}
|
||||||
|
|
||||||
{ the softfpu unit can be also embedded directly into the system unit }
|
{ the softfpu unit can be also embedded directly into the system unit }
|
||||||
|
|
||||||
@ -436,6 +436,7 @@ function float128_to_int64(a: float128): int64;
|
|||||||
function float128_to_int64_round_to_zero(a: float128): int64;
|
function float128_to_int64_round_to_zero(a: float128): int64;
|
||||||
function float128_to_float32(a: float128): float32;
|
function float128_to_float32(a: float128): float32;
|
||||||
function float128_to_float64(a: float128): float64;
|
function float128_to_float64(a: float128): float64;
|
||||||
|
function float64_to_float128( a : float64) : float128;
|
||||||
{$ifdef FPC_SOFTFLOAT_FLOAT80}
|
{$ifdef FPC_SOFTFLOAT_FLOAT80}
|
||||||
function float128_to_floatx80(a: float128): floatx80;
|
function float128_to_floatx80(a: float128): floatx80;
|
||||||
{$endif FPC_SOFTFLOAT_FLOAT80}
|
{$endif FPC_SOFTFLOAT_FLOAT80}
|
||||||
@ -1892,6 +1893,17 @@ Begin
|
|||||||
|
|
||||||
End;
|
End;
|
||||||
|
|
||||||
|
function float64ToCommonNaN( a : float64 ) : commonNaNT;
|
||||||
|
Var
|
||||||
|
z : commonNaNT;
|
||||||
|
Begin
|
||||||
|
if ( float64_is_signaling_nan( a )<>0 ) then
|
||||||
|
float_raise( float_flag_invalid );
|
||||||
|
z.sign := a.high shr 31;
|
||||||
|
shortShift64Left( a.high, a.low, 12, z.high, z.low );
|
||||||
|
result := z;
|
||||||
|
|
||||||
|
End;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the canonical NaN `a' to the double-
|
Returns the result of converting the canonical NaN `a' to the double-
|
||||||
@ -2473,6 +2485,13 @@ Function extractFloat64Frac1(a: float64): bits32;
|
|||||||
extractFloat64Frac1 := a.low;
|
extractFloat64Frac1 := a.low;
|
||||||
End;
|
End;
|
||||||
|
|
||||||
|
|
||||||
|
{$define FPC_SYSTEM_HAS_extractFloat64Frac}
|
||||||
|
Function extractFloat64Frac(a: float64): bits64;
|
||||||
|
Begin
|
||||||
|
extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF;
|
||||||
|
End;
|
||||||
|
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the exponent bits of the double-precision floating-point value `a'.
|
Returns the exponent bits of the double-precision floating-point value `a'.
|
||||||
@ -2537,6 +2556,16 @@ Procedure normalizeFloat64Subnormal(
|
|||||||
End;
|
End;
|
||||||
End;
|
End;
|
||||||
|
|
||||||
|
procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64);
|
||||||
|
var
|
||||||
|
shiftCount : int8;
|
||||||
|
begin
|
||||||
|
shiftCount := countLeadingZeros64( aSig ) - 11;
|
||||||
|
zSigPtr := aSig shl shiftCount;
|
||||||
|
zExpPtr := 1 - shiftCount;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Packs the sign `zSign', the exponent `zExp', and the significand formed by
|
Packs the sign `zSign', the exponent `zExp', and the significand formed by
|
||||||
@ -8397,6 +8426,43 @@ begin
|
|||||||
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
{----------------------------------------------------------------------------
|
||||||
|
| Returns the result of converting the double-precision floating-point value
|
||||||
|
| `a' to the quadruple-precision floating-point format. The conversion is
|
||||||
|
| performed according to the IEC/IEEE Standard for Binary Floating-Point
|
||||||
|
| Arithmetic.
|
||||||
|
*----------------------------------------------------------------------------}
|
||||||
|
|
||||||
|
function float64_to_float128( a : float64) : float128;
|
||||||
|
var
|
||||||
|
aSign : flag;
|
||||||
|
aExp : int16;
|
||||||
|
aSig, zSig0, zSig1 : bits64;
|
||||||
|
begin
|
||||||
|
aSig := extractFloat64Frac( a );
|
||||||
|
aExp := extractFloat64Exp( a );
|
||||||
|
aSign := extractFloat64Sign( a );
|
||||||
|
if ( aExp = $7FF ) then begin
|
||||||
|
if ( aSig<>0 ) then
|
||||||
|
result:=commonNaNToFloat128( float64ToCommonNaN( a ) );
|
||||||
|
result:=packFloat128( aSign, $7FFF, 0, 0 );
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
if ( aExp = 0 ) then begin
|
||||||
|
if ( aSig = 0 ) then
|
||||||
|
begin
|
||||||
|
result:=packFloat128( aSign, 0, 0, 0 );
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
|
|
||||||
|
normalizeFloat64Subnormal( aSig, aExp, aSig );
|
||||||
|
dec(aExp);
|
||||||
|
end;
|
||||||
|
shift128Right( aSig, 0, 4, zSig0, zSig1 );
|
||||||
|
result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 );
|
||||||
|
|
||||||
|
end;
|
||||||
|
|
||||||
{$endif FPC_SOFTFLOAT_FLOAT128}
|
{$endif FPC_SOFTFLOAT_FLOAT128}
|
||||||
|
|
||||||
{$endif not(defined(fpc_softfpu_interface))}
|
{$endif not(defined(fpc_softfpu_interface))}
|
||||||
|
@ -13,13 +13,17 @@
|
|||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
**********************************************************************}
|
**********************************************************************}
|
||||||
unit float128;
|
{$inline on}
|
||||||
|
unit ufloat128;
|
||||||
|
|
||||||
interface
|
interface
|
||||||
|
|
||||||
uses
|
uses
|
||||||
softfpu;
|
softfpu;
|
||||||
|
|
||||||
|
type
|
||||||
|
float128 = softfpu.float128;
|
||||||
|
|
||||||
operator+ (const f1,f2 : float128) result : float128;inline;
|
operator+ (const f1,f2 : float128) result : float128;inline;
|
||||||
operator* (const f1,f2 : float128) result : float128;inline;
|
operator* (const f1,f2 : float128) result : float128;inline;
|
||||||
operator- (const f1,f2 : float128) result : float128;inline;
|
operator- (const f1,f2 : float128) result : float128;inline;
|
||||||
@ -29,9 +33,25 @@ unit float128;
|
|||||||
|
|
||||||
operator :=(const source : float128) dest : double;inline;
|
operator :=(const source : float128) dest : double;inline;
|
||||||
|
|
||||||
|
procedure DumpFloat128(const f : float128);
|
||||||
|
|
||||||
implementation
|
implementation
|
||||||
|
|
||||||
|
procedure DumpFloat128(const f : float128);
|
||||||
|
type
|
||||||
|
ta = packed array[0..15] of byte;
|
||||||
|
var
|
||||||
|
i : longint;
|
||||||
|
begin
|
||||||
|
for i:=15 downto 0 do
|
||||||
|
begin
|
||||||
|
write(hexstr(ta(f)[i],2));
|
||||||
|
if i<15 then
|
||||||
|
write(' ');
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
operator+ (const f1,f2 : float128) result : float128;inline;
|
operator+ (const f1,f2 : float128) result : float128;inline;
|
||||||
begin
|
begin
|
||||||
result:=float128_add(f1,f2);
|
result:=float128_add(f1,f2);
|
||||||
@ -58,13 +78,14 @@ unit float128;
|
|||||||
|
|
||||||
operator :=(const source : double) dest : float128;inline;
|
operator :=(const source : double) dest : float128;inline;
|
||||||
begin
|
begin
|
||||||
dest:=float64_to_float128(source);
|
dest:=float64_to_float128(float64(source));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
operator :=(const source : float128) dest : double;inline;
|
operator :=(const source : float128) dest : double;inline;
|
||||||
begin
|
begin
|
||||||
dest:=float128_to_float64(source);
|
dest:=double(float128_to_float64(source));
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
end.
|
end.
|
Loading…
Reference in New Issue
Block a user