mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-06 14:47:55 +02:00
* split fpc_mul_<64 bit> into separate procedures with and without overflow checking
git-svn-id: trunk@35454 -
This commit is contained in:
parent
3900ccf260
commit
7213a13081
@ -2944,9 +2944,7 @@ implementation
|
||||
end;
|
||||
|
||||
{ otherwise, create the parameters for the helper }
|
||||
right := ccallparanode.create(
|
||||
cordconstnode.create(ord(cs_check_overflow in current_settings.localswitches),pasbool8type,true),
|
||||
ccallparanode.create(right,ccallparanode.create(left,nil)));
|
||||
right := ccallparanode.create(right,ccallparanode.create(left,nil));
|
||||
left := nil;
|
||||
{ only qword needs the unsigned code, the
|
||||
signed code is also used for currency }
|
||||
@ -2954,6 +2952,9 @@ implementation
|
||||
procname := 'fpc_mul_int64'
|
||||
else
|
||||
procname := 'fpc_mul_qword';
|
||||
if cs_check_overflow in current_settings.localswitches then
|
||||
procname := procname + '_checkoverflow';
|
||||
|
||||
result := ccallnode.createintern(procname,right);
|
||||
right := nil;
|
||||
end;
|
||||
|
@ -13,9 +13,38 @@
|
||||
|
||||
**********************************************************************}
|
||||
|
||||
{$ifndef VER3_0}
|
||||
{$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
|
||||
{$define FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
function fpc_mul_qword(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
asm
|
||||
stmfd sp!,{r4,r5,r6,r14}
|
||||
mov r6,#0
|
||||
// r4 = result lo, r5 = result hi
|
||||
{$ifdef ENDIAN_LITTLE}
|
||||
// lo(f1)*lo(f2)
|
||||
umull r4,r5,r0,r2
|
||||
// lo(f1)*hi(f2)
|
||||
umlal r5,r6,r0,r3
|
||||
// hi(f1)*lo(f2)
|
||||
umlal r5,r6,r1,r2
|
||||
mov r0,r4
|
||||
mov r1,r5
|
||||
{$else}
|
||||
// lo(f1)*lo(f2)
|
||||
umull r4,r5,r1,r3
|
||||
// lo(f1)*hi(f2)
|
||||
umlal r5,r6,r1,r2
|
||||
// hi(f1)*lo(f2)
|
||||
umlal r5,r6,r0,r3
|
||||
mov r1,r4
|
||||
mov r0,r5
|
||||
{$endif}
|
||||
ldmfd sp!,{r4,r5,r6,r15}
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
|
||||
asm
|
||||
stmfd sp!,{r4,r5,r6,r14}
|
||||
mov r6,#0
|
||||
@ -51,10 +80,6 @@ asm
|
||||
{$endif}
|
||||
// no overflow?
|
||||
beq .Lexit
|
||||
// checkoverflow?
|
||||
ldr r2,[sp,#16]
|
||||
cmp r2,#0
|
||||
beq .Lexit
|
||||
|
||||
mov r0,#215
|
||||
mov r1,fp
|
||||
@ -63,4 +88,4 @@ asm
|
||||
ldmfd sp!,{r4,r5,r6,r15}
|
||||
end;
|
||||
{$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
|
||||
|
||||
{$endif VER3_0}
|
||||
|
@ -361,15 +361,10 @@
|
||||
movl saveedi,%edi
|
||||
end;
|
||||
|
||||
{$ifndef VER3_0}
|
||||
{$define FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
{ multiplies two qwords
|
||||
the longbool for checkoverflow avoids a misaligned stack
|
||||
}
|
||||
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
var
|
||||
overflowed : boolean;
|
||||
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
begin
|
||||
overflowed:=false;
|
||||
{ the following piece of code is taken from the
|
||||
AMD Athlon Processor x86 Code Optimization manual }
|
||||
asm
|
||||
@ -383,8 +378,6 @@
|
||||
mull %edx
|
||||
jmp .Lqwordmulready
|
||||
.Lqwordmultwomul:
|
||||
cmpl $0,checkoverflow
|
||||
jnz .Loverflowchecked
|
||||
imul f1+4,%edx
|
||||
imul %eax,%ecx
|
||||
addl %edx,%ecx
|
||||
@ -393,6 +386,29 @@
|
||||
.Lqwordmulready:
|
||||
movl %eax,__RESULT
|
||||
movl %edx,__RESULT+4
|
||||
.Lend:
|
||||
end [ 'eax','edx','ecx'];
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
|
||||
var
|
||||
overflowed : boolean;
|
||||
begin
|
||||
overflowed:=false;
|
||||
{ the following piece of code is taken from the
|
||||
AMD Athlon Processor x86 Code Optimization manual }
|
||||
asm
|
||||
movl f1+4,%edx
|
||||
movl f2+4,%ecx
|
||||
orl %ecx,%edx
|
||||
movl f2,%edx
|
||||
movl f1,%eax
|
||||
jnz .Loverflowchecked
|
||||
{ if both upper dwords are =0 then it cannot overflow }
|
||||
mull %edx
|
||||
movl %eax,__RESULT
|
||||
movl %edx,__RESULT+4
|
||||
jmp .Lend
|
||||
|
||||
.Loverflowchecked:
|
||||
@ -431,4 +447,4 @@
|
||||
if overflowed then
|
||||
HandleErrorFrame(215,get_frame);
|
||||
end;
|
||||
|
||||
{$endif VER3_0}
|
@ -1,3 +1,4 @@
|
||||
|
||||
{
|
||||
This file is part of the Free Pascal run time library.
|
||||
Copyright (c) 2013 by the Free Pascal development team
|
||||
@ -13,8 +14,9 @@
|
||||
|
||||
**********************************************************************}
|
||||
|
||||
{$ifndef VER3_0}
|
||||
{$define FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
function fpc_mul_qword( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
begin
|
||||
{ routine contributed by Max Nazhalov
|
||||
|
||||
@ -97,9 +99,6 @@ begin
|
||||
mov word[result+6],dx
|
||||
mov si,word[f1+4]
|
||||
mov ax,word[f1+6]
|
||||
mov bx,word[checkoverflow]
|
||||
or bx,word[checkoverflow+2]
|
||||
jnz @@checked
|
||||
mov di,word[f2]
|
||||
mul di
|
||||
mov cx,ax
|
||||
@ -124,74 +123,156 @@ begin
|
||||
adc cx,dx
|
||||
add word[result+4],bx
|
||||
adc word[result+6],cx
|
||||
jmp @@done
|
||||
@@checked:
|
||||
end [ 'ax','bx','cx','dx','si','di' ];
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_qword_checkoverflow( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
|
||||
begin
|
||||
{ routine contributed by Max Nazhalov
|
||||
|
||||
64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0)
|
||||
|
||||
//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result:
|
||||
(A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0)
|
||||
|
||||
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0)
|
||||
-- never overflows, forms the base of the final result, name it as "R64"
|
||||
|
||||
(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||
-- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)"
|
||||
|
||||
(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result
|
||||
-- to be calculated on steps 2 and 3 as a correction for the "R64"
|
||||
|
||||
//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result):
|
||||
(A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0)
|
||||
|
||||
((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||
-- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)"
|
||||
|
||||
((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked.
|
||||
-- overflows if the hi_word "<>0"
|
||||
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||
|
||||
((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked.
|
||||
-- overflows if the hi_word "<>0"
|
||||
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||
|
||||
(A2*B0)<<32: the whole dword is significand, name it as "X"
|
||||
-- overflows if R64+(X<<32) produces C-flag
|
||||
|
||||
//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result):
|
||||
(A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2)
|
||||
|
||||
((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||
-- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)"
|
||||
|
||||
((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked.
|
||||
-- overflows if the hi_word "<>0"
|
||||
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||
|
||||
((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked.
|
||||
-- overflows if the hi_word "<>0"
|
||||
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||
|
||||
(A0*B2)<<32: the whole dword is significand, name it as "Y"
|
||||
-- overflows if R64+(Y<<32) produces C-flag
|
||||
}
|
||||
asm
|
||||
mov di,word[f1]
|
||||
mov bx,word[f1+2]
|
||||
mov si,word[f2]
|
||||
mov ax,word[f2+2]
|
||||
push bp
|
||||
mov cx,ax
|
||||
mul bx
|
||||
xchg ax,bx
|
||||
mov bp,dx
|
||||
mul si
|
||||
xchg ax,cx
|
||||
add bx,dx
|
||||
adc bp,0
|
||||
mul di
|
||||
add cx,ax
|
||||
adc bx,dx
|
||||
adc bp,0
|
||||
mov ax,di
|
||||
mul si
|
||||
add cx,dx
|
||||
adc bx,0
|
||||
adc bp,0
|
||||
mov dx,bp
|
||||
pop bp
|
||||
mov word[result],ax
|
||||
mov word[result+2],cx
|
||||
mov word[result+4],bx
|
||||
mov word[result+6],dx
|
||||
mov si,word[f1+4]
|
||||
mov ax,word[f1+6]
|
||||
mov bx,word[f2+6]
|
||||
mov cx,ax
|
||||
or cx,si
|
||||
jz @@nover1
|
||||
mov cx,word[f2+4]
|
||||
or cx,bx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
@@nover1:
|
||||
test bx,bx
|
||||
jz @@nover2
|
||||
mov bx,word[f1+2]
|
||||
test bx,bx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
@@nover2:
|
||||
test ax,ax
|
||||
jz @@nover3
|
||||
or bx,word[f2+2]
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
@@nover3:
|
||||
mov di,word[f2]
|
||||
mul di
|
||||
test dx,dx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
mov cx,ax
|
||||
mov ax,word[f2+2]
|
||||
mul si
|
||||
test dx,dx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
add cx,ax
|
||||
jc @@done
|
||||
jc @@overflow
|
||||
mov ax,di
|
||||
mul si
|
||||
mov bx,ax
|
||||
add cx,dx
|
||||
jc @@done
|
||||
jc @@overflow
|
||||
mov si,word[f2+4]
|
||||
mov ax,word[f2+6]
|
||||
mov di,word[f1]
|
||||
mul di
|
||||
test dx,dx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
add cx,ax
|
||||
jc @@done
|
||||
jc @@overflow
|
||||
mov ax,word[f1+2]
|
||||
mul si
|
||||
test dx,dx
|
||||
jnz @@done
|
||||
jnz @@overflow
|
||||
add cx,ax
|
||||
jc @@done
|
||||
jc @@overflow
|
||||
mov ax,di
|
||||
mul si
|
||||
add bx,ax
|
||||
adc cx,dx
|
||||
jc @@done
|
||||
jc @@overflow
|
||||
add word[result+4],bx
|
||||
adc word[result+6],cx
|
||||
jc @@done
|
||||
// checked and succeed
|
||||
xor ax,ax
|
||||
mov word[checkoverflow],ax
|
||||
mov word[checkoverflow+2],ax
|
||||
jnc @@done
|
||||
@@overflow:
|
||||
call FPC_OVERFLOW
|
||||
@@done:
|
||||
end [ 'ax','bx','cx','dx','si','di' ];
|
||||
if checkoverflow then
|
||||
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||
end;
|
||||
{$endif VER3_0}
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
|
||||
|
@ -602,8 +602,15 @@ function fpc_div_qword(n,z : qword) : qword; compilerproc;
|
||||
function fpc_mod_qword(n,z : qword) : qword; compilerproc;
|
||||
function fpc_div_int64(n,z : int64) : int64; compilerproc;
|
||||
function fpc_mod_int64(n,z : int64) : int64; compilerproc;
|
||||
{$ifdef VER3_0}
|
||||
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
|
||||
function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
|
||||
{$else VER3_0}
|
||||
function fpc_mul_qword(f1,f2 : qword) : qword; compilerproc;
|
||||
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword; compilerproc;
|
||||
function fpc_mul_int64(f1,f2 : int64) : int64; compilerproc;
|
||||
function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64; compilerproc;
|
||||
{$endif VER3_0}
|
||||
function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc;
|
||||
function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc;
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
|
||||
{
|
||||
This file is part of the Free Pascal run time library.
|
||||
Copyright (c) 1999-2000 by the Free Pascal development team
|
||||
@ -267,6 +268,7 @@
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_MOD_INT64}
|
||||
|
||||
{$ifdef VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
{ multiplies two qwords
|
||||
@ -304,24 +306,77 @@
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
|
||||
{$else VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
var
|
||||
bitpos : qword;
|
||||
l : longint;
|
||||
begin
|
||||
result:=0;
|
||||
bitpos:=1;
|
||||
|
||||
for l:=0 to 63 do
|
||||
begin
|
||||
if (f2 and bitpos)<>0 then
|
||||
result:=result+f1;
|
||||
f1:=f1 shl 1;
|
||||
bitpos:=bitpos shl 1;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
|
||||
var
|
||||
_f1,bitpos : qword;
|
||||
l : longint;
|
||||
f1overflowed : boolean;
|
||||
begin
|
||||
result:=0;
|
||||
bitpos:=1;
|
||||
f1overflowed:=false;
|
||||
|
||||
for l:=0 to 63 do
|
||||
begin
|
||||
if (f2 and bitpos)<>0 then
|
||||
begin
|
||||
_f1:=result;
|
||||
result:=result+f1;
|
||||
|
||||
{ if one of the operands is greater than the result an
|
||||
overflow occurs }
|
||||
if (f1overflowed or ((_f1<>0) and (f1<>0) and
|
||||
((_f1>result) or (f1>result)))) then
|
||||
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||
end;
|
||||
{ when bootstrapping, we forget about overflow checking for qword :) }
|
||||
f1overflowed:=f1overflowed or ((f1 and (qword(1) shl 63))<>0);
|
||||
f1:=f1 shl 1;
|
||||
bitpos:=bitpos shl 1;
|
||||
end;
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
|
||||
{$endif VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
|
||||
function fpc_mul_qword_compilerproc(f1,f2 : qword;checkoverflow : longbool) : qword; external name 'FPC_MUL_QWORD';
|
||||
function fpc_mul_qword_compilerproc(f1,f2 : qword) : qword; external name 'FPC_MUL_QWORD';
|
||||
|
||||
function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc;
|
||||
begin
|
||||
fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2,false);
|
||||
fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2);
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
|
||||
|
||||
|
||||
{$ifdef VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
|
||||
function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
|
||||
|
||||
var
|
||||
sign : boolean;
|
||||
q1,q2,q3 : qword;
|
||||
|
||||
begin
|
||||
{$ifdef EXCLUDE_COMPLEX_PROCS}
|
||||
runerror(219);
|
||||
@ -370,9 +425,68 @@
|
||||
end;
|
||||
{$endif FPC_SYSTEM_HAS_MUL_INT64}
|
||||
|
||||
{$else VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
|
||||
function fpc_mul_int64(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
|
||||
begin
|
||||
{ there's no difference between signed and unsigned multiplication,
|
||||
when the destination size is equal to the source size and overflow
|
||||
checking is off }
|
||||
{ qword(f1)*qword(f2) is coded as a call to mulqword }
|
||||
result:=int64(qword(f1)*qword(f2));
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64_CHECKOVERFLOW']; compilerproc;
|
||||
{$ifdef EXCLUDE_COMPLEX_PROCS}
|
||||
begin
|
||||
runerror(217);
|
||||
end;
|
||||
{$else EXCLUDE_COMPLEX_PROCS}
|
||||
var
|
||||
sign : boolean;
|
||||
q1,q2,q3 : qword;
|
||||
begin
|
||||
sign:=false;
|
||||
if f1<0 then
|
||||
begin
|
||||
sign:=not(sign);
|
||||
q1:=qword(-f1);
|
||||
end
|
||||
else
|
||||
q1:=f1;
|
||||
if f2<0 then
|
||||
begin
|
||||
sign:=not(sign);
|
||||
q2:=qword(-f2);
|
||||
end
|
||||
else
|
||||
q2:=f2;
|
||||
{ the q1*q2 is coded as call to mulqword }
|
||||
q3:=q1*q2;
|
||||
|
||||
if (q1 <> 0) and (q2 <>0) and
|
||||
((q1>q3) or (q2>q3) or
|
||||
{ the bit 63 can be only set if we have $80000000 00000000 }
|
||||
{ and sign is true }
|
||||
(q3 shr 63<>0) and
|
||||
((q3<>qword(qword(1) shl 63)) or not(sign))
|
||||
) then
|
||||
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||
|
||||
if sign then
|
||||
result:=-q3
|
||||
else
|
||||
result:=q3;
|
||||
end;
|
||||
{$endif EXCLUDE_COMPLEX_PROCS}
|
||||
{$endif FPC_SYSTEM_HAS_MUL_INT64}
|
||||
|
||||
{$endif VER3_0}
|
||||
|
||||
{$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64}
|
||||
function fpc_mul_int64_compilerproc(f1,f2 : int64;checkoverflow : longbool) : int64; external name 'FPC_MUL_INT64';
|
||||
function fpc_mul_int64_compilerproc(f1,f2 : int64) : int64; external name 'FPC_MUL_INT64';
|
||||
|
||||
function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc;
|
||||
{$ifdef EXCLUDE_COMPLEX_PROCS}
|
||||
@ -381,7 +495,7 @@
|
||||
end;
|
||||
{$else EXCLUDE_COMPLEX_PROCS}
|
||||
begin
|
||||
fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2,false);
|
||||
fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2);
|
||||
end;
|
||||
{$endif EXCLUDE_COMPLEX_PROCS}
|
||||
|
||||
|
@ -140,11 +140,9 @@
|
||||
mr R4,R6
|
||||
end;
|
||||
|
||||
{$ifndef VER3_0}
|
||||
{$define FPC_SYSTEM_HAS_MUL_QWORD}
|
||||
{ multiplies two qwords
|
||||
the longbool for checkoverflow avoids a misaligned stack
|
||||
}
|
||||
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||
assembler; nostackframe;
|
||||
asm
|
||||
// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
|
||||
@ -185,6 +183,54 @@
|
||||
add r9,r9,r12
|
||||
cmplwi cr7,r9,64 // is the sum now >= 64?
|
||||
cmplwi cr1,r9,62 // or <= 62?
|
||||
|
||||
.LDone:
|
||||
mullw r4,r4,r6 // lsw of product of lsw's
|
||||
mr r3,r8 // get msw of product in correct register
|
||||
end;
|
||||
|
||||
|
||||
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
|
||||
assembler; nostackframe;
|
||||
asm
|
||||
// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
|
||||
// res f1 f2
|
||||
|
||||
or. r10,r3,r5 // are both msw's 0?
|
||||
mulhwu r8,r4,r6 // msw of product of lsw's
|
||||
xor r0,r0,r0 // r0 := 0 for overflow checking
|
||||
beq .LDone // if both msw's are zero, skip cross products
|
||||
mullw r9,r4,r5 // lsw of first cross-product
|
||||
cntlzw r11,r3 // count leading zeroes of msw1
|
||||
cntlzw r12,r5 // count leading zeroes of msw2
|
||||
mullw r7,r3,r6 // lsw of second cross-product
|
||||
add r12,r11,r12 // sum of leading zeroes
|
||||
mr r10,r8
|
||||
or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains
|
||||
add r8,r8,r9 // add
|
||||
cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow
|
||||
add r8,r8,r7 // add
|
||||
bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)
|
||||
// there's no overflow, otherwise more thorough check
|
||||
add r7,r7,r9
|
||||
mulhwu r3,r6,r3
|
||||
addc r7,r7,r10 // add the msw of the product of the lsw's, record carry
|
||||
cntlzw r9,r5
|
||||
cntlzw r10,r4 // get leading zeroes count of lsw f1
|
||||
mulhwu r5,r4,r5
|
||||
addze r3,r3
|
||||
subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0
|
||||
cntlzw r7,r6
|
||||
subfic r11,r9,31 // same for f2
|
||||
srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0
|
||||
srawi r11,r11,31
|
||||
and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
|
||||
and r9,r7,r11 // same for f2
|
||||
or. r5,r5,r3
|
||||
add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate
|
||||
add r9,r9,r12
|
||||
cmplwi cr7,r9,64 // is the sum now >= 64?
|
||||
cmplwi cr1,r9,62 // or <= 62?
|
||||
bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow
|
||||
ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow
|
||||
// for 63 zeroes, we need additional checks
|
||||
@ -198,5 +244,4 @@
|
||||
mullw r4,r4,r6 // lsw of product of lsw's
|
||||
mr r3,r8 // get msw of product in correct register
|
||||
end;
|
||||
|
||||
|
||||
{$endif VER3_0}
|
||||
|
Loading…
Reference in New Issue
Block a user