mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-05-31 14:22:44 +02:00
* continued to work on float128 stuff
git-svn-id: trunk@9117 -
This commit is contained in:
parent
6577a39bab
commit
e3085015f7
2
.gitattributes
vendored
2
.gitattributes
vendored
@ -4659,7 +4659,6 @@ rtl/inc/except.inc svneol=native#text/plain
|
||||
rtl/inc/fexpand.inc svneol=native#text/plain
|
||||
rtl/inc/file.inc svneol=native#text/plain
|
||||
rtl/inc/filerec.inc svneol=native#text/plain
|
||||
rtl/inc/float128.pp svneol=native#text/plain
|
||||
rtl/inc/generic.inc svneol=native#text/plain
|
||||
rtl/inc/genmath.inc svneol=native#text/plain
|
||||
rtl/inc/genset.inc svneol=native#text/plain
|
||||
@ -4714,6 +4713,7 @@ rtl/inc/threadh.inc svneol=native#text/plain
|
||||
rtl/inc/threadvr.inc svneol=native#text/plain
|
||||
rtl/inc/typefile.inc svneol=native#text/plain
|
||||
rtl/inc/ucomplex.pp svneol=native#text/plain
|
||||
rtl/inc/ufloat128.pp svneol=native#text/plain
|
||||
rtl/inc/varerror.inc svneol=native#text/plain
|
||||
rtl/inc/variant.inc svneol=native#text/plain
|
||||
rtl/inc/varianth.inc svneol=native#text/plain
|
||||
|
@ -70,7 +70,7 @@ these four paragraphs for those parts of this code that are retained.
|
||||
*}
|
||||
|
||||
{ $define FPC_SOFTFLOAT_FLOATX80}
|
||||
{ $define FPC_SOFTFLOAT_FLOAT128}
|
||||
{$define FPC_SOFTFLOAT_FLOAT128}
|
||||
|
||||
{ the softfpu unit can be also embedded directly into the system unit }
|
||||
|
||||
@ -436,6 +436,7 @@ function float128_to_int64(a: float128): int64;
|
||||
function float128_to_int64_round_to_zero(a: float128): int64;
|
||||
function float128_to_float32(a: float128): float32;
|
||||
function float128_to_float64(a: float128): float64;
|
||||
function float64_to_float128( a : float64) : float128;
|
||||
{$ifdef FPC_SOFTFLOAT_FLOAT80}
|
||||
function float128_to_floatx80(a: float128): floatx80;
|
||||
{$endif FPC_SOFTFLOAT_FLOAT80}
|
||||
@ -1892,6 +1893,17 @@ Begin
|
||||
|
||||
End;
|
||||
|
||||
function float64ToCommonNaN( a : float64 ) : commonNaNT;
|
||||
Var
|
||||
z : commonNaNT;
|
||||
Begin
|
||||
if ( float64_is_signaling_nan( a )<>0 ) then
|
||||
float_raise( float_flag_invalid );
|
||||
z.sign := a.high shr 31;
|
||||
shortShift64Left( a.high, a.low, 12, z.high, z.low );
|
||||
result := z;
|
||||
|
||||
End;
|
||||
{*
|
||||
-------------------------------------------------------------------------------
|
||||
Returns the result of converting the canonical NaN `a' to the double-
|
||||
@ -2473,6 +2485,13 @@ Function extractFloat64Frac1(a: float64): bits32;
|
||||
extractFloat64Frac1 := a.low;
|
||||
End;
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_extractFloat64Frac}
|
||||
Function extractFloat64Frac(a: float64): bits64;
|
||||
Begin
|
||||
extractFloat64Frac := bits64(a) and $000FFFFFFFFFFFFF;
|
||||
End;
|
||||
|
||||
{*
|
||||
-------------------------------------------------------------------------------
|
||||
Returns the exponent bits of the double-precision floating-point value `a'.
|
||||
@ -2537,6 +2556,16 @@ Procedure normalizeFloat64Subnormal(
|
||||
End;
|
||||
End;
|
||||
|
||||
procedure normalizeFloat64Subnormal(aSig : bits64;var zExpPtr : int16; var zSigPtr : bits64);
|
||||
var
|
||||
shiftCount : int8;
|
||||
begin
|
||||
shiftCount := countLeadingZeros64( aSig ) - 11;
|
||||
zSigPtr := aSig shl shiftCount;
|
||||
zExpPtr := 1 - shiftCount;
|
||||
end;
|
||||
|
||||
|
||||
{*
|
||||
-------------------------------------------------------------------------------
|
||||
Packs the sign `zSign', the exponent `zExp', and the significand formed by
|
||||
@ -8397,6 +8426,43 @@ begin
|
||||
|
||||
end;
|
||||
|
||||
{----------------------------------------------------------------------------
|
||||
| Returns the result of converting the double-precision floating-point value
|
||||
| `a' to the quadruple-precision floating-point format. The conversion is
|
||||
| performed according to the IEC/IEEE Standard for Binary Floating-Point
|
||||
| Arithmetic.
|
||||
*----------------------------------------------------------------------------}
|
||||
|
||||
function float64_to_float128( a : float64) : float128;
|
||||
var
|
||||
aSign : flag;
|
||||
aExp : int16;
|
||||
aSig, zSig0, zSig1 : bits64;
|
||||
begin
|
||||
aSig := extractFloat64Frac( a );
|
||||
aExp := extractFloat64Exp( a );
|
||||
aSign := extractFloat64Sign( a );
|
||||
if ( aExp = $7FF ) then begin
|
||||
if ( aSig<>0 ) then
|
||||
result:=commonNaNToFloat128( float64ToCommonNaN( a ) );
|
||||
result:=packFloat128( aSign, $7FFF, 0, 0 );
|
||||
exit;
|
||||
end;
|
||||
if ( aExp = 0 ) then begin
|
||||
if ( aSig = 0 ) then
|
||||
begin
|
||||
result:=packFloat128( aSign, 0, 0, 0 );
|
||||
exit;
|
||||
end;
|
||||
|
||||
normalizeFloat64Subnormal( aSig, aExp, aSig );
|
||||
dec(aExp);
|
||||
end;
|
||||
shift128Right( aSig, 0, 4, zSig0, zSig1 );
|
||||
result:=packFloat128( aSign, aExp + $3C00, zSig0, zSig1 );
|
||||
|
||||
end;
|
||||
|
||||
{$endif FPC_SOFTFLOAT_FLOAT128}
|
||||
|
||||
{$endif not(defined(fpc_softfpu_interface))}
|
||||
|
@ -13,13 +13,17 @@
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
**********************************************************************}
|
||||
unit float128;
|
||||
{$inline on}
|
||||
unit ufloat128;
|
||||
|
||||
interface
|
||||
|
||||
uses
|
||||
softfpu;
|
||||
|
||||
type
|
||||
float128 = softfpu.float128;
|
||||
|
||||
operator+ (const f1,f2 : float128) result : float128;inline;
|
||||
operator* (const f1,f2 : float128) result : float128;inline;
|
||||
operator- (const f1,f2 : float128) result : float128;inline;
|
||||
@ -29,9 +33,25 @@ unit float128;
|
||||
|
||||
operator :=(const source : float128) dest : double;inline;
|
||||
|
||||
procedure DumpFloat128(const f : float128);
|
||||
|
||||
implementation
|
||||
|
||||
procedure DumpFloat128(const f : float128);
|
||||
type
|
||||
ta = packed array[0..15] of byte;
|
||||
var
|
||||
i : longint;
|
||||
begin
|
||||
for i:=15 downto 0 do
|
||||
begin
|
||||
write(hexstr(ta(f)[i],2));
|
||||
if i<15 then
|
||||
write(' ');
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
operator+ (const f1,f2 : float128) result : float128;inline;
|
||||
begin
|
||||
result:=float128_add(f1,f2);
|
||||
@ -58,13 +78,14 @@ unit float128;
|
||||
|
||||
operator :=(const source : double) dest : float128;inline;
|
||||
begin
|
||||
dest:=float64_to_float128(source);
|
||||
dest:=float64_to_float128(float64(source));
|
||||
end;
|
||||
|
||||
|
||||
operator :=(const source : float128) dest : double;inline;
|
||||
begin
|
||||
dest:=float128_to_float64(source);
|
||||
dest:=double(float128_to_float64(source));
|
||||
end;
|
||||
|
||||
|
||||
end.
|
Loading…
Reference in New Issue
Block a user