mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-18 19:49:12 +02:00
* bugfix for int64 to float conversion
This commit is contained in:
parent
87d0fb3cda
commit
c0a2149c38
@ -6,7 +6,7 @@ to pascal was done by Carl Eric Codere in 2002 (ccodere@ieee.org).
|
|||||||
===============================================================================
|
===============================================================================
|
||||||
|
|
||||||
This C source file is part of the SoftFloat IEC/IEEE Floating-Point
|
This C source file is part of the SoftFloat IEC/IEEE Floating-Point
|
||||||
Arithmetic Package, Release 2a.
|
Arithmetic Package, Release 2a.
|
||||||
|
|
||||||
Written by John R. Hauser. This work was made possible in part by the
|
Written by John R. Hauser. This work was made possible in part by the
|
||||||
International Computer Science Institute, located at Suite 600, 1947 Center
|
International Computer Science Institute, located at Suite 600, 1947 Center
|
||||||
@ -15,7 +15,7 @@ National Science Foundation under grant MIP-9311980. The original version
|
|||||||
of this code was written as part of a project to build a fixed-point vector
|
of this code was written as part of a project to build a fixed-point vector
|
||||||
processor in collaboration with the University of California at Berkeley,
|
processor in collaboration with the University of California at Berkeley,
|
||||||
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
overseen by Profs. Nelson Morgan and John Wawrzynek. More information
|
||||||
is available through the Web page
|
is available through the Web page
|
||||||
`http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
|
`http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/SoftFloat.html'.
|
||||||
|
|
||||||
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
|
||||||
@ -36,7 +36,7 @@ unit softfpu;
|
|||||||
{ Overflow checking must be disabled,
|
{ Overflow checking must be disabled,
|
||||||
since some operations expect overflow!
|
since some operations expect overflow!
|
||||||
}
|
}
|
||||||
{$Q-}
|
{$Q-}
|
||||||
|
|
||||||
interface
|
interface
|
||||||
|
|
||||||
@ -69,7 +69,7 @@ TYPE
|
|||||||
uint64 = qword;
|
uint64 = qword;
|
||||||
bits64 = qword;
|
bits64 = qword;
|
||||||
sbits64 = int64;
|
sbits64 = int64;
|
||||||
|
|
||||||
{$ifdef ENDIAN_LITTLE}
|
{$ifdef ENDIAN_LITTLE}
|
||||||
float64 = packed record
|
float64 = packed record
|
||||||
low: bits32;
|
low: bits32;
|
||||||
@ -98,7 +98,7 @@ the corresponding value `b', and 0 otherwise. The comparison is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_lt(a: float64;b: float64): flag;
|
Function float64_lt(a: float64;b: float64): flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns 1 if the double-precision floating-point value `a' is less than
|
Returns 1 if the double-precision floating-point value `a' is less than
|
||||||
@ -107,7 +107,7 @@ is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|||||||
Arithmetic.
|
Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_le(a: float64;b: float64): flag;
|
Function float64_le(a: float64;b: float64): flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns 1 if the double-precision floating-point value `a' is equal to
|
Returns 1 if the double-precision floating-point value `a' is equal to
|
||||||
@ -115,7 +115,7 @@ the corresponding value `b', and 0 otherwise. The comparison is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_eq(a: float64;b: float64): flag;
|
Function float64_eq(a: float64;b: float64): flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the square root of the double-precision floating-point value `a'.
|
Returns the square root of the double-precision floating-point value `a'.
|
||||||
@ -123,7 +123,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
|
|||||||
Floating-Point Arithmetic.
|
Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_sqrt( a: float64; var out: float64 );
|
Procedure float64_sqrt( a: float64; var out: float64 );
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the remainder of the double-precision floating-point value `a'
|
Returns the remainder of the double-precision floating-point value `a'
|
||||||
@ -131,7 +131,7 @@ with respect to the corresponding value `b'. The operation is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_rem(a: float64; b : float64; var out: float64);
|
Procedure float64_rem(a: float64; b : float64; var out: float64);
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of dividing the double-precision floating-point value `a'
|
Returns the result of dividing the double-precision floating-point value `a'
|
||||||
@ -139,7 +139,7 @@ by the corresponding value `b'. The operation is performed according to the
|
|||||||
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_div(a: float64; b : float64 ; var out: float64 );
|
Procedure float64_div(a: float64; b : float64 ; var out: float64 );
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of multiplying the double-precision floating-point values
|
Returns the result of multiplying the double-precision floating-point values
|
||||||
@ -147,7 +147,7 @@ Returns the result of multiplying the double-precision floating-point values
|
|||||||
for Binary Floating-Point Arithmetic.
|
for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_mul( a: float64; b:float64; Var out: float64);
|
Procedure float64_mul( a: float64; b:float64; Var out: float64);
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of subtracting the double-precision floating-point values
|
Returns the result of subtracting the double-precision floating-point values
|
||||||
@ -155,7 +155,7 @@ Returns the result of subtracting the double-precision floating-point values
|
|||||||
for Binary Floating-Point Arithmetic.
|
for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_sub(a: float64; b : float64; var out: float64);
|
Procedure float64_sub(a: float64; b : float64; var out: float64);
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of adding the double-precision floating-point values `a'
|
Returns the result of adding the double-precision floating-point values `a'
|
||||||
@ -163,7 +163,7 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
|
|||||||
Binary Floating-Point Arithmetic.
|
Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_add( a: float64; b : float64; Var out : float64);
|
Procedure float64_add( a: float64; b : float64; Var out : float64);
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Rounds the double-precision floating-point value `a' to an integer,
|
Rounds the double-precision floating-point value `a' to an integer,
|
||||||
@ -172,7 +172,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
|
|||||||
Floating-Point Arithmetic.
|
Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float64_round_to_int(a: float64; var out: float64 );
|
Procedure float64_round_to_int(a: float64; var out: float64 );
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the double-precision floating-point value
|
Returns the result of converting the double-precision floating-point value
|
||||||
@ -181,7 +181,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|||||||
Arithmetic.
|
Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_to_float32(a: float64 ): float32;
|
Function float64_to_float32(a: float64 ): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the double-precision floating-point value
|
Returns the result of converting the double-precision floating-point value
|
||||||
@ -193,7 +193,7 @@ the conversion overflows, the largest integer with the same sign as `a' is
|
|||||||
returned.
|
returned.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_to_int32_round_to_zero(a: float64 ): int32;
|
Function float64_to_int32_round_to_zero(a: float64 ): int32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the double-precision floating-point value
|
Returns the result of converting the double-precision floating-point value
|
||||||
@ -205,7 +205,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
|
|||||||
largest integer with the same sign as `a' is returned.
|
largest integer with the same sign as `a' is returned.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float64_to_int32(a: float64): int32;
|
Function float64_to_int32(a: float64): int32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns 1 if the single-precision floating-point value `a' is less than
|
Returns 1 if the single-precision floating-point value `a' is less than
|
||||||
@ -213,7 +213,7 @@ the corresponding value `b', and 0 otherwise. The comparison is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_lt( a:float32 ; b : float32): flag;
|
Function float32_lt( a:float32 ; b : float32): flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns 1 if the single-precision floating-point value `a' is less than
|
Returns 1 if the single-precision floating-point value `a' is less than
|
||||||
@ -222,7 +222,7 @@ is performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|||||||
Arithmetic.
|
Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_le( a: float32; b : float32 ):flag;
|
Function float32_le( a: float32; b : float32 ):flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns 1 if the single-precision floating-point value `a' is equal to
|
Returns 1 if the single-precision floating-point value `a' is equal to
|
||||||
@ -230,7 +230,7 @@ the corresponding value `b', and 0 otherwise. The comparison is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_eq( a:float32; b:float32): flag;
|
Function float32_eq( a:float32; b:float32): flag;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the square root of the single-precision floating-point value `a'.
|
Returns the square root of the single-precision floating-point value `a'.
|
||||||
@ -238,7 +238,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
|
|||||||
Floating-Point Arithmetic.
|
Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_sqrt(a: float32 ): float32;
|
Function float32_sqrt(a: float32 ): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the remainder of the single-precision floating-point value `a'
|
Returns the remainder of the single-precision floating-point value `a'
|
||||||
@ -246,7 +246,7 @@ with respect to the corresponding value `b'. The operation is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_rem(a: float32; b: float32 ):float32;
|
Function float32_rem(a: float32; b: float32 ):float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of dividing the single-precision floating-point value `a'
|
Returns the result of dividing the single-precision floating-point value `a'
|
||||||
@ -254,7 +254,7 @@ by the corresponding value `b'. The operation is performed according to the
|
|||||||
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_div(a: float32;b: float32 ): float32;
|
Function float32_div(a: float32;b: float32 ): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of multiplying the single-precision floating-point values
|
Returns the result of multiplying the single-precision floating-point values
|
||||||
@ -262,7 +262,7 @@ Returns the result of multiplying the single-precision floating-point values
|
|||||||
for Binary Floating-Point Arithmetic.
|
for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_mul(a: float32; b: float32 ) : float32;
|
Function float32_mul(a: float32; b: float32 ) : float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of subtracting the single-precision floating-point values
|
Returns the result of subtracting the single-precision floating-point values
|
||||||
@ -270,7 +270,7 @@ Returns the result of subtracting the single-precision floating-point values
|
|||||||
for Binary Floating-Point Arithmetic.
|
for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_sub( a: float32 ; b:float32 ): float32;
|
Function float32_sub( a: float32 ; b:float32 ): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of adding the single-precision floating-point values `a'
|
Returns the result of adding the single-precision floating-point values `a'
|
||||||
@ -278,7 +278,7 @@ and `b'. The operation is performed according to the IEC/IEEE Standard for
|
|||||||
Binary Floating-Point Arithmetic.
|
Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_add( a: float32; b:float32 ): float32;
|
Function float32_add( a: float32; b:float32 ): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Rounds the single-precision floating-point value `a' to an integer,
|
Rounds the single-precision floating-point value `a' to an integer,
|
||||||
@ -287,7 +287,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
|
|||||||
Floating-Point Arithmetic.
|
Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_round_to_int( a: float32): float32;
|
Function float32_round_to_int( a: float32): float32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the single-precision floating-point value
|
Returns the result of converting the single-precision floating-point value
|
||||||
@ -296,7 +296,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-Point
|
|||||||
Arithmetic.
|
Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure float32_to_float64( a : float32; var out: Float64);
|
Procedure float32_to_float64( a : float32; var out: Float64);
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the single-precision floating-point value
|
Returns the result of converting the single-precision floating-point value
|
||||||
@ -308,7 +308,7 @@ the conversion overflows, the largest integer with the same sign as `a' is
|
|||||||
returned.
|
returned.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_to_int32_round_to_zero( a: Float32 ): int32;
|
Function float32_to_int32_round_to_zero( a: Float32 ): int32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the single-precision floating-point value
|
Returns the result of converting the single-precision floating-point value
|
||||||
@ -320,7 +320,7 @@ positive integer is returned. Otherwise, if the conversion overflows, the
|
|||||||
largest integer with the same sign as `a' is returned.
|
largest integer with the same sign as `a' is returned.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function float32_to_int32( a : float32) : int32;
|
Function float32_to_int32( a : float32) : int32;
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the 32-bit two's complement integer `a' to
|
Returns the result of converting the 32-bit two's complement integer `a' to
|
||||||
@ -328,7 +328,7 @@ the double-precision floating-point format. The conversion is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Procedure int32_to_float64( a: int32; var c: float64 );
|
Procedure int32_to_float64( a: int32; var c: float64 );
|
||||||
{*
|
{*
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
Returns the result of converting the 32-bit two's complement integer `a' to
|
Returns the result of converting the 32-bit two's complement integer `a' to
|
||||||
@ -336,7 +336,7 @@ the single-precision floating-point format. The conversion is performed
|
|||||||
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
Function int32_to_float32( a: int32): float32;
|
Function int32_to_float32( a: int32): float32;
|
||||||
|
|
||||||
{*----------------------------------------------------------------------------
|
{*----------------------------------------------------------------------------
|
||||||
| Returns the result of converting the 64-bit two's complement integer `a'
|
| Returns the result of converting the 64-bit two's complement integer `a'
|
||||||
@ -367,28 +367,28 @@ Software IEC/IEEE floating-point rounding mode.
|
|||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
*}
|
*}
|
||||||
{
|
{
|
||||||
Round to nearest.
|
Round to nearest.
|
||||||
This is the default mode. It should be used unless there is a specific
|
This is the default mode. It should be used unless there is a specific
|
||||||
need for one of the others. In this mode results are rounded to the
|
need for one of the others. In this mode results are rounded to the
|
||||||
nearest representable value. If the result is midway between two
|
nearest representable value. If the result is midway between two
|
||||||
representable values, the even representable is chosen. Even here
|
representable values, the even representable is chosen. Even here
|
||||||
means the lowest-order bit is zero. This rounding mode prevents
|
means the lowest-order bit is zero. This rounding mode prevents
|
||||||
statistical bias and guarantees numeric stability: round-off errors
|
statistical bias and guarantees numeric stability: round-off errors
|
||||||
in a lengthy calculation will remain smaller than half of FLT_EPSILON.
|
in a lengthy calculation will remain smaller than half of FLT_EPSILON.
|
||||||
|
|
||||||
Round toward plus Infinity.
|
Round toward plus Infinity.
|
||||||
All results are rounded to the smallest representable value which is
|
All results are rounded to the smallest representable value which is
|
||||||
greater than the result.
|
greater than the result.
|
||||||
|
|
||||||
Round toward minus Infinity.
|
Round toward minus Infinity.
|
||||||
All results are rounded to the largest representable value which is
|
All results are rounded to the largest representable value which is
|
||||||
less than the result.
|
less than the result.
|
||||||
|
|
||||||
Round toward zero.
|
Round toward zero.
|
||||||
All results are rounded to the largest representable value whose
|
All results are rounded to the largest representable value whose
|
||||||
magnitude is less than that of the result. In other words, if the
|
magnitude is less than that of the result. In other words, if the
|
||||||
result is negative it is rounded up; if it is positive, it is
|
result is negative it is rounded up; if it is positive, it is
|
||||||
rounded down.
|
rounded down.
|
||||||
}
|
}
|
||||||
float_round_nearest_even = 0;
|
float_round_nearest_even = 0;
|
||||||
float_round_down = 1;
|
float_round_down = 1;
|
||||||
@ -443,7 +443,7 @@ Begin
|
|||||||
float_exception_flags := float_exception_flags or i;
|
float_exception_flags := float_exception_flags or i;
|
||||||
if (float_exception_flags and float_flag_invalid) <> 0 then
|
if (float_exception_flags and float_flag_invalid) <> 0 then
|
||||||
RunError(207)
|
RunError(207)
|
||||||
else
|
else
|
||||||
if (float_exception_flags and float_flag_divbyzero) <> 0 then
|
if (float_exception_flags and float_flag_divbyzero) <> 0 then
|
||||||
RunError(200)
|
RunError(200)
|
||||||
else
|
else
|
||||||
@ -479,7 +479,7 @@ var
|
|||||||
Begin
|
Begin
|
||||||
if ( count = 0 ) then
|
if ( count = 0 ) then
|
||||||
z := a
|
z := a
|
||||||
else
|
else
|
||||||
if ( count < 32 ) then
|
if ( count < 32 ) then
|
||||||
Begin
|
Begin
|
||||||
z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
|
z := ( a shr count ) or bits32( (( a shl ( ( - count ) AND 31 )) ) <> 0);
|
||||||
@ -557,7 +557,7 @@ Begin
|
|||||||
z1 := a1;
|
z1 := a1;
|
||||||
z0 := a0;
|
z0 := a0;
|
||||||
End
|
End
|
||||||
else
|
else
|
||||||
if ( count < 32 ) then
|
if ( count < 32 ) then
|
||||||
Begin
|
Begin
|
||||||
z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
|
z1 := ( a0 shl negCount ) OR ( a1 shr count ) OR bits32( ( a1 shl negCount ) <> 0 );
|
||||||
@ -569,7 +569,7 @@ Begin
|
|||||||
Begin
|
Begin
|
||||||
z1 := a0 OR bits32( a1 <> 0 );
|
z1 := a0 OR bits32( a1 <> 0 );
|
||||||
End
|
End
|
||||||
else
|
else
|
||||||
if ( count < 64 ) Then
|
if ( count < 64 ) Then
|
||||||
Begin
|
Begin
|
||||||
z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
|
z1 := ( a0 shr ( count AND 31 ) ) OR bits32( ( ( a0 shl negCount ) OR a1 ) <> 0 );
|
||||||
@ -1081,7 +1081,7 @@ End;
|
|||||||
function countLeadingZeros64( a : bits64): int8;
|
function countLeadingZeros64( a : bits64): int8;
|
||||||
var
|
var
|
||||||
shiftcount : int8;
|
shiftcount : int8;
|
||||||
Begin
|
Begin
|
||||||
shiftCount := 0;
|
shiftCount := 0;
|
||||||
if ( a < (bits64(1) shl 32 )) then
|
if ( a < (bits64(1) shl 32 )) then
|
||||||
shiftCount := shiftcount + 32
|
shiftCount := shiftcount + 32
|
||||||
@ -1441,7 +1441,7 @@ End;
|
|||||||
sign : flag;
|
sign : flag;
|
||||||
high, low : bits32;
|
high, low : bits32;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
(*----------------------------------------------------------------------------
|
(*----------------------------------------------------------------------------
|
||||||
| The pattern for a default generated single-precision NaN.
|
| The pattern for a default generated single-precision NaN.
|
||||||
*----------------------------------------------------------------------------*)
|
*----------------------------------------------------------------------------*)
|
||||||
@ -1464,7 +1464,7 @@ function float32_is_signaling_nan(a: float32):flag;
|
|||||||
begin
|
begin
|
||||||
float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and ( (a and $003FFFFF)<>0 );
|
float32_is_signaling_nan := flag( ( ( a shr 22 ) and $1FF ) = $1FE ) and ( (a and $003FFFFF)<>0 );
|
||||||
end;
|
end;
|
||||||
|
|
||||||
(*----------------------------------------------------------------------------
|
(*----------------------------------------------------------------------------
|
||||||
| Returns the result of converting the single-precision floating-point NaN
|
| Returns the result of converting the single-precision floating-point NaN
|
||||||
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
|
||||||
@ -1490,7 +1490,7 @@ function CommonNanToFloat32(a : CommonNaNT): float32;
|
|||||||
begin
|
begin
|
||||||
CommonNanToFloat32:= ( ( (bits32) a.sign ) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
|
CommonNanToFloat32:= ( ( (bits32) a.sign ) shl 31 ) OR $7FC00000 OR ( a.high shr 9 );
|
||||||
end;
|
end;
|
||||||
|
|
||||||
(*----------------------------------------------------------------------------
|
(*----------------------------------------------------------------------------
|
||||||
| Takes two single-precision floating-point values `a' and `b', one of which
|
| Takes two single-precision floating-point values `a' and `b', one of which
|
||||||
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
|
| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
|
||||||
@ -1607,7 +1607,7 @@ var
|
|||||||
c := a;
|
c := a;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
|
|
||||||
(****************************************************************************)
|
(****************************************************************************)
|
||||||
(* END ENDIAN SPECIFIC CODE *)
|
(* END ENDIAN SPECIFIC CODE *)
|
||||||
@ -4573,14 +4573,14 @@ Begin
|
|||||||
begin
|
begin
|
||||||
int64_to_float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
|
int64_to_float32:= packFloat32( zSign, $95 - shiftCount, absA shl shiftCount );
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
shiftCount := shiftCount + 7;
|
shiftCount := shiftCount + 7;
|
||||||
if ( shiftCount < 0 ) then
|
if ( shiftCount < 0 ) then
|
||||||
begin
|
begin
|
||||||
intval.low := int64rec(AbsA).low;
|
intval.low := int64rec(AbsA).low;
|
||||||
intval.high := int64rec(AbsA).high;
|
intval.high := int64rec(AbsA).high;
|
||||||
shift64RightJamming( intval.low, intval.high, - shiftCount,
|
shift64RightJamming( intval.low, intval.high, - shiftCount,
|
||||||
intval.low, intval.high);
|
intval.low, intval.high);
|
||||||
int64rec(absA).low := intval.low;
|
int64rec(absA).low := intval.low;
|
||||||
int64rec(absA).high := intval.high;
|
int64rec(absA).high := intval.high;
|
||||||
@ -4597,51 +4597,47 @@ End;
|
|||||||
| to the double-precision floating-point format. The conversion is performed
|
| to the double-precision floating-point format. The conversion is performed
|
||||||
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
|
||||||
*----------------------------------------------------------------------------*}
|
*----------------------------------------------------------------------------*}
|
||||||
|
|
||||||
function int64_to_float64( a: int64 ): float64;
|
function int64_to_float64( a: int64 ): float64;
|
||||||
var
|
var
|
||||||
zSign : flag;
|
zSign : flag;
|
||||||
float_result : float64;
|
float_result : float64;
|
||||||
intval : int64rec;
|
intval : int64rec;
|
||||||
|
AbsA : bits64;
|
||||||
|
shiftcount : int8;
|
||||||
|
zSig0, zSig1 : bits32;
|
||||||
Begin
|
Begin
|
||||||
if ( a = 0 ) then
|
if ( a = 0 ) then
|
||||||
begin
|
Begin
|
||||||
int64_to_float64.low := 0;
|
packFloat64( 0, 0, 0, 0, float_result );
|
||||||
int64_to_float64.high := 0;
|
exit;
|
||||||
exit;
|
|
||||||
end;
|
end;
|
||||||
if ( a = sbits64 ( 1 shl 64 ) ) then
|
zSign := flag( a < 0 );
|
||||||
begin
|
if ZSign<>0 then
|
||||||
packFloat64(1, $43E, 0, 0, float_result);
|
AbsA := -a
|
||||||
int64_to_float64 := float_result;
|
|
||||||
exit;
|
|
||||||
end;
|
|
||||||
if a < 0 then
|
|
||||||
zSign := flag(TRUE)
|
|
||||||
else
|
else
|
||||||
zSign := flag(FALSE);
|
AbsA := a;
|
||||||
if zSign<>0 then
|
shiftCount := countLeadingZeros64( absA ) - 11;
|
||||||
a := -a;
|
if ( 0 <= shiftCount ) then
|
||||||
if zSign <> 0 then
|
Begin
|
||||||
begin
|
absA := absA shl shiftcount;
|
||||||
a:=-a;
|
zSig0:=int64rec(absA).high;
|
||||||
intval.low := int64rec(a).low;
|
zSig1:=int64rec(absA).low;
|
||||||
intval.high := int64rec(a).high;
|
End
|
||||||
normalizeRoundAndPackFloat64( zSign, $43C, intval.low, intval.high , float_result )
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
begin
|
Begin
|
||||||
intval.low := int64rec(a).low;
|
shift64Right( absA, 0, - shiftCount, zSig0, zSig1 );
|
||||||
intval.high := int64rec(a).high;
|
End;
|
||||||
normalizeRoundAndPackFloat64( zSign, $43C, intval.low, intval.high , float_result );
|
packFloat64( zSign, $432 - shiftCount, zSig0, zSig1, float_result );
|
||||||
end;
|
|
||||||
int64_to_float64:= float_result;
|
int64_to_float64:= float_result;
|
||||||
End;
|
End;
|
||||||
|
|
||||||
end.
|
end.
|
||||||
{
|
{
|
||||||
$Log$
|
$Log$
|
||||||
Revision 1.3 2002-10-12 20:24:22 carl
|
Revision 1.4 2002-10-13 15:47:39 carl
|
||||||
|
* bugfix for int64 to float conversion
|
||||||
|
|
||||||
|
Revision 1.3 2002/10/12 20:24:22 carl
|
||||||
+ int64_tof_loat conversion routines
|
+ int64_tof_loat conversion routines
|
||||||
|
|
||||||
Revision 1.2 2002/10/08 20:07:08 carl
|
Revision 1.2 2002/10/08 20:07:08 carl
|
||||||
|
Loading…
Reference in New Issue
Block a user