* split fpc_mul_<64 bit> into separate procedures with and without overflow checking

git-svn-id: trunk@35454 -
This commit is contained in:
florian 2017-02-19 19:15:14 +00:00
parent 3900ccf260
commit 7213a13081
7 changed files with 345 additions and 56 deletions

View File

@ -2944,9 +2944,7 @@ implementation
end;
{ otherwise, create the parameters for the helper }
right := ccallparanode.create(
cordconstnode.create(ord(cs_check_overflow in current_settings.localswitches),pasbool8type,true),
ccallparanode.create(right,ccallparanode.create(left,nil)));
right := ccallparanode.create(right,ccallparanode.create(left,nil));
left := nil;
{ only qword needs the unsigned code, the
signed code is also used for currency }
@ -2954,6 +2952,9 @@ implementation
procname := 'fpc_mul_int64'
else
procname := 'fpc_mul_qword';
if cs_check_overflow in current_settings.localswitches then
procname := procname + '_checkoverflow';
result := ccallnode.createintern(procname,right);
right := nil;
end;

View File

@ -13,9 +13,38 @@
**********************************************************************}
{$ifndef VER3_0}
{$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
{$define FPC_SYSTEM_HAS_MUL_QWORD}
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
function fpc_mul_qword(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
asm
stmfd sp!,{r4,r5,r6,r14}
mov r6,#0
// r4 = result lo, r5 = result hi
{$ifdef ENDIAN_LITTLE}
// lo(f1)*lo(f2)
umull r4,r5,r0,r2
// lo(f1)*hi(f2)
umlal r5,r6,r0,r3
// hi(f1)*lo(f2)
umlal r5,r6,r1,r2
mov r0,r4
mov r1,r5
{$else}
// lo(f1)*lo(f2)
umull r4,r5,r1,r3
// lo(f1)*hi(f2)
umlal r5,r6,r1,r2
// hi(f1)*lo(f2)
umlal r5,r6,r0,r3
mov r1,r4
mov r0,r5
{$endif}
ldmfd sp!,{r4,r5,r6,r15}
end;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
asm
stmfd sp!,{r4,r5,r6,r14}
mov r6,#0
@ -51,10 +80,6 @@ asm
{$endif}
// no overflow?
beq .Lexit
// checkoverflow?
ldr r2,[sp,#16]
cmp r2,#0
beq .Lexit
mov r0,#215
mov r1,fp
@ -63,4 +88,4 @@ asm
ldmfd sp!,{r4,r5,r6,r15}
end;
{$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)}
{$endif VER3_0}

View File

@ -361,15 +361,10 @@
movl saveedi,%edi
end;
{$ifndef VER3_0}
{$define FPC_SYSTEM_HAS_MUL_QWORD}
{ multiplies two qwords
the longbool for checkoverflow avoids a misaligned stack
}
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
var
overflowed : boolean;
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
begin
overflowed:=false;
{ the following piece of code is taken from the
AMD Athlon Processor x86 Code Optimization manual }
asm
@ -383,8 +378,6 @@
mull %edx
jmp .Lqwordmulready
.Lqwordmultwomul:
cmpl $0,checkoverflow
jnz .Loverflowchecked
imul f1+4,%edx
imul %eax,%ecx
addl %edx,%ecx
@ -393,6 +386,29 @@
.Lqwordmulready:
movl %eax,__RESULT
movl %edx,__RESULT+4
.Lend:
end [ 'eax','edx','ecx'];
end;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
var
overflowed : boolean;
begin
overflowed:=false;
{ the following piece of code is taken from the
AMD Athlon Processor x86 Code Optimization manual }
asm
movl f1+4,%edx
movl f2+4,%ecx
orl %ecx,%edx
movl f2,%edx
movl f1,%eax
jnz .Loverflowchecked
{ if both upper dwords are =0 then it cannot overflow }
mull %edx
movl %eax,__RESULT
movl %edx,__RESULT+4
jmp .Lend
.Loverflowchecked:
@ -431,4 +447,4 @@
if overflowed then
HandleErrorFrame(215,get_frame);
end;
{$endif VER3_0}

View File

@ -1,3 +1,4 @@
{
This file is part of the Free Pascal run time library.
Copyright (c) 2013 by the Free Pascal development team
@ -13,8 +14,9 @@
**********************************************************************}
{$ifndef VER3_0}
{$define FPC_SYSTEM_HAS_MUL_QWORD}
function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
function fpc_mul_qword( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
begin
{ routine contributed by Max Nazhalov
@ -97,9 +99,6 @@ begin
mov word[result+6],dx
mov si,word[f1+4]
mov ax,word[f1+6]
mov bx,word[checkoverflow]
or bx,word[checkoverflow+2]
jnz @@checked
mov di,word[f2]
mul di
mov cx,ax
@ -124,74 +123,156 @@ begin
adc cx,dx
add word[result+4],bx
adc word[result+6],cx
jmp @@done
@@checked:
end [ 'ax','bx','cx','dx','si','di' ];
end;
function fpc_mul_qword_checkoverflow( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
begin
{ routine contributed by Max Nazhalov
64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0)
//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result:
(A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0)
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0)
-- never overflows, forms the base of the final result, name it as "R64"
(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
-- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)"
(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result
-- to be calculated on steps 2 and 3 as a correction for the "R64"
//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result):
(A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0)
((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
-- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)"
((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked.
-- overflows if the hi_word "<>0"
-- overflows if R64+(lo_word<<48) produces C-flag
((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked.
-- overflows if the hi_word "<>0"
-- overflows if R64+(lo_word<<48) produces C-flag
(A2*B0)<<32: the whole dword is significand, name it as "X"
-- overflows if R64+(X<<32) produces C-flag
//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result):
(A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2)
((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
-- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)"
((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked.
-- overflows if the hi_word "<>0"
-- overflows if R64+(lo_word<<48) produces C-flag
((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked.
-- overflows if the hi_word "<>0"
-- overflows if R64+(lo_word<<48) produces C-flag
(A0*B2)<<32: the whole dword is significand, name it as "Y"
-- overflows if R64+(Y<<32) produces C-flag
}
asm
mov di,word[f1]
mov bx,word[f1+2]
mov si,word[f2]
mov ax,word[f2+2]
push bp
mov cx,ax
mul bx
xchg ax,bx
mov bp,dx
mul si
xchg ax,cx
add bx,dx
adc bp,0
mul di
add cx,ax
adc bx,dx
adc bp,0
mov ax,di
mul si
add cx,dx
adc bx,0
adc bp,0
mov dx,bp
pop bp
mov word[result],ax
mov word[result+2],cx
mov word[result+4],bx
mov word[result+6],dx
mov si,word[f1+4]
mov ax,word[f1+6]
mov bx,word[f2+6]
mov cx,ax
or cx,si
jz @@nover1
mov cx,word[f2+4]
or cx,bx
jnz @@done
jnz @@overflow
@@nover1:
test bx,bx
jz @@nover2
mov bx,word[f1+2]
test bx,bx
jnz @@done
jnz @@overflow
@@nover2:
test ax,ax
jz @@nover3
or bx,word[f2+2]
jnz @@done
jnz @@overflow
@@nover3:
mov di,word[f2]
mul di
test dx,dx
jnz @@done
jnz @@overflow
mov cx,ax
mov ax,word[f2+2]
mul si
test dx,dx
jnz @@done
jnz @@overflow
add cx,ax
jc @@done
jc @@overflow
mov ax,di
mul si
mov bx,ax
add cx,dx
jc @@done
jc @@overflow
mov si,word[f2+4]
mov ax,word[f2+6]
mov di,word[f1]
mul di
test dx,dx
jnz @@done
jnz @@overflow
add cx,ax
jc @@done
jc @@overflow
mov ax,word[f1+2]
mul si
test dx,dx
jnz @@done
jnz @@overflow
add cx,ax
jc @@done
jc @@overflow
mov ax,di
mul si
add bx,ax
adc cx,dx
jc @@done
jc @@overflow
add word[result+4],bx
adc word[result+6],cx
jc @@done
// checked and succeed
xor ax,ax
mov word[checkoverflow],ax
mov word[checkoverflow+2],ax
jnc @@done
@@overflow:
call FPC_OVERFLOW
@@done:
end [ 'ax','bx','cx','dx','si','di' ];
if checkoverflow then
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
end;
{$endif VER3_0}
{$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}

View File

@ -602,8 +602,15 @@ function fpc_div_qword(n,z : qword) : qword; compilerproc;
function fpc_mod_qword(n,z : qword) : qword; compilerproc;
function fpc_div_int64(n,z : int64) : int64; compilerproc;
function fpc_mod_int64(n,z : int64) : int64; compilerproc;
{$ifdef VER3_0}
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc;
function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc;
{$else VER3_0}
function fpc_mul_qword(f1,f2 : qword) : qword; compilerproc;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword; compilerproc;
function fpc_mul_int64(f1,f2 : int64) : int64; compilerproc;
function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64; compilerproc;
{$endif VER3_0}
function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc;
function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc;

View File

@ -1,3 +1,4 @@
{
This file is part of the Free Pascal run time library.
Copyright (c) 1999-2000 by the Free Pascal development team
@ -267,6 +268,7 @@
end;
{$endif FPC_SYSTEM_HAS_MOD_INT64}
{$ifdef VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
{ multiplies two qwords
@ -304,24 +306,77 @@
end;
{$endif FPC_SYSTEM_HAS_MUL_QWORD}
{$else VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_QWORD}
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
var
bitpos : qword;
l : longint;
begin
result:=0;
bitpos:=1;
for l:=0 to 63 do
begin
if (f2 and bitpos)<>0 then
result:=result+f1;
f1:=f1 shl 1;
bitpos:=bitpos shl 1;
end;
end;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
var
_f1,bitpos : qword;
l : longint;
f1overflowed : boolean;
begin
result:=0;
bitpos:=1;
f1overflowed:=false;
for l:=0 to 63 do
begin
if (f2 and bitpos)<>0 then
begin
_f1:=result;
result:=result+f1;
{ if one of the operands is greater than the result an
overflow occurs }
if (f1overflowed or ((_f1<>0) and (f1<>0) and
((_f1>result) or (f1>result)))) then
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
end;
{ when bootstrapping, we forget about overflow checking for qword :) }
f1overflowed:=f1overflowed or ((f1 and (qword(1) shl 63))<>0);
f1:=f1 shl 1;
bitpos:=bitpos shl 1;
end;
end;
{$endif FPC_SYSTEM_HAS_MUL_QWORD}
{$endif VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
function fpc_mul_qword_compilerproc(f1,f2 : qword;checkoverflow : longbool) : qword; external name 'FPC_MUL_QWORD';
function fpc_mul_qword_compilerproc(f1,f2 : qword) : qword; external name 'FPC_MUL_QWORD';
function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc;
begin
fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2,false);
fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2);
end;
{$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD}
{$ifdef VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
var
sign : boolean;
q1,q2,q3 : qword;
begin
{$ifdef EXCLUDE_COMPLEX_PROCS}
runerror(219);
@ -370,9 +425,68 @@
end;
{$endif FPC_SYSTEM_HAS_MUL_INT64}
{$else VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_INT64}
function fpc_mul_int64(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc;
begin
{ there's no difference between signed and unsigned multiplication,
when the destination size is equal to the source size and overflow
checking is off }
{ qword(f1)*qword(f2) is coded as a call to mulqword }
result:=int64(qword(f1)*qword(f2));
end;
function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64_CHECKOVERFLOW']; compilerproc;
{$ifdef EXCLUDE_COMPLEX_PROCS}
begin
runerror(217);
end;
{$else EXCLUDE_COMPLEX_PROCS}
var
sign : boolean;
q1,q2,q3 : qword;
begin
sign:=false;
if f1<0 then
begin
sign:=not(sign);
q1:=qword(-f1);
end
else
q1:=f1;
if f2<0 then
begin
sign:=not(sign);
q2:=qword(-f2);
end
else
q2:=f2;
{ the q1*q2 is coded as call to mulqword }
q3:=q1*q2;
if (q1 <> 0) and (q2 <>0) and
((q1>q3) or (q2>q3) or
{ the bit 63 can be only set if we have $80000000 00000000 }
{ and sign is true }
(q3 shr 63<>0) and
((q3<>qword(qword(1) shl 63)) or not(sign))
) then
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
if sign then
result:=-q3
else
result:=q3;
end;
{$endif EXCLUDE_COMPLEX_PROCS}
{$endif FPC_SYSTEM_HAS_MUL_INT64}
{$endif VER3_0}
{$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64}
function fpc_mul_int64_compilerproc(f1,f2 : int64;checkoverflow : longbool) : int64; external name 'FPC_MUL_INT64';
function fpc_mul_int64_compilerproc(f1,f2 : int64) : int64; external name 'FPC_MUL_INT64';
function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc;
{$ifdef EXCLUDE_COMPLEX_PROCS}
@ -381,7 +495,7 @@
end;
{$else EXCLUDE_COMPLEX_PROCS}
begin
fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2,false);
fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2);
end;
{$endif EXCLUDE_COMPLEX_PROCS}

View File

@ -140,11 +140,9 @@
mr R4,R6
end;
{$ifndef VER3_0}
{$define FPC_SYSTEM_HAS_MUL_QWORD}
{ multiplies two qwords
the longbool for checkoverflow avoids a misaligned stack
}
function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
assembler; nostackframe;
asm
// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
@ -185,6 +183,54 @@
add r9,r9,r12
cmplwi cr7,r9,64 // is the sum now >= 64?
cmplwi cr1,r9,62 // or <= 62?
.LDone:
mullw r4,r4,r6 // lsw of product of lsw's
mr r3,r8 // get msw of product in correct register
end;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
assembler; nostackframe;
asm
// (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7
// res f1 f2
or. r10,r3,r5 // are both msw's 0?
mulhwu r8,r4,r6 // msw of product of lsw's
xor r0,r0,r0 // r0 := 0 for overflow checking
beq .LDone // if both msw's are zero, skip cross products
mullw r9,r4,r5 // lsw of first cross-product
cntlzw r11,r3 // count leading zeroes of msw1
cntlzw r12,r5 // count leading zeroes of msw2
mullw r7,r3,r6 // lsw of second cross-product
add r12,r11,r12 // sum of leading zeroes
mr r10,r8
or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains
add r8,r8,r9 // add
cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow
add r8,r8,r7 // add
bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0)
// there's no overflow, otherwise more thorough check
add r7,r7,r9
mulhwu r3,r6,r3
addc r7,r7,r10 // add the msw of the product of the lsw's, record carry
cntlzw r9,r5
cntlzw r10,r4 // get leading zeroes count of lsw f1
mulhwu r5,r4,r5
addze r3,r3
subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0
cntlzw r7,r6
subfic r11,r9,31 // same for f2
srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0
srawi r11,r11,31
and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0
and r9,r7,r11 // same for f2
or. r5,r5,r3
add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate
add r9,r9,r12
cmplwi cr7,r9,64 // is the sum now >= 64?
cmplwi cr1,r9,62 // or <= 62?
bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow
ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow
// for 63 zeroes, we need additional checks
@ -198,5 +244,4 @@
mullw r4,r4,r6 // lsw of product of lsw's
mr r3,r8 // get msw of product in correct register
end;
{$endif VER3_0}