diff --git a/compiler/nadd.pas b/compiler/nadd.pas index db6f2ee13c..cedd39205c 100644 --- a/compiler/nadd.pas +++ b/compiler/nadd.pas @@ -2944,9 +2944,7 @@ implementation end; { otherwise, create the parameters for the helper } - right := ccallparanode.create( - cordconstnode.create(ord(cs_check_overflow in current_settings.localswitches),pasbool8type,true), - ccallparanode.create(right,ccallparanode.create(left,nil))); + right := ccallparanode.create(right,ccallparanode.create(left,nil)); left := nil; { only qword needs the unsigned code, the signed code is also used for currency } @@ -2954,6 +2952,9 @@ implementation procname := 'fpc_mul_int64' else procname := 'fpc_mul_qword'; + if cs_check_overflow in current_settings.localswitches then + procname := procname + '_checkoverflow'; + result := ccallnode.createintern(procname,right); right := nil; end; diff --git a/rtl/arm/int64p.inc b/rtl/arm/int64p.inc index 0190884b9a..9c3655d27f 100644 --- a/rtl/arm/int64p.inc +++ b/rtl/arm/int64p.inc @@ -13,9 +13,38 @@ **********************************************************************} +{$ifndef VER3_0} {$if (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)} {$define FPC_SYSTEM_HAS_MUL_QWORD} -function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc; +function fpc_mul_qword(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD']; compilerproc; +asm + stmfd sp!,{r4,r5,r6,r14} + mov r6,#0 + // r4 = result lo, r5 = result hi +{$ifdef ENDIAN_LITTLE} + // lo(f1)*lo(f2) + umull r4,r5,r0,r2 + // lo(f1)*hi(f2) + umlal r5,r6,r0,r3 + // hi(f1)*lo(f2) + umlal r5,r6,r1,r2 + mov r0,r4 + mov r1,r5 +{$else} + // lo(f1)*lo(f2) + umull r4,r5,r1,r3 + // lo(f1)*hi(f2) + umlal r5,r6,r1,r2 + // hi(f1)*lo(f2) + umlal r5,r6,r0,r3 + mov r1,r4 + mov r0,r5 +{$endif} + ldmfd sp!,{r4,r5,r6,r15} +end; + + +function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; asm stmfd sp!,{r4,r5,r6,r14} mov r6,#0 @@ -51,10 +80,6 @@ asm {$endif} // no overflow? beq .Lexit - // checkoverflow? - ldr r2,[sp,#16] - cmp r2,#0 - beq .Lexit mov r0,#215 mov r1,fp @@ -63,4 +88,4 @@ asm ldmfd sp!,{r4,r5,r6,r15} end; {$endif (not defined(CPUTHUMB)) and defined(CPUARM_HAS_UMULL)} - +{$endif VER3_0} diff --git a/rtl/i386/int64p.inc b/rtl/i386/int64p.inc index e38927c4f3..35b57ee0e1 100644 --- a/rtl/i386/int64p.inc +++ b/rtl/i386/int64p.inc @@ -361,15 +361,10 @@ movl saveedi,%edi end; +{$ifndef VER3_0} {$define FPC_SYSTEM_HAS_MUL_QWORD} - { multiplies two qwords - the longbool for checkoverflow avoids a misaligned stack - } - function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; - var - overflowed : boolean; + function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; begin - overflowed:=false; { the following piece of code is taken from the AMD Athlon Processor x86 Code Optimization manual } asm @@ -383,8 +378,6 @@ mull %edx jmp .Lqwordmulready .Lqwordmultwomul: - cmpl $0,checkoverflow - jnz .Loverflowchecked imul f1+4,%edx imul %eax,%ecx addl %edx,%ecx @@ -393,6 +386,29 @@ .Lqwordmulready: movl %eax,__RESULT movl %edx,__RESULT+4 + .Lend: + end [ 'eax','edx','ecx']; + end; + + + function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; + var + overflowed : boolean; + begin + overflowed:=false; + { the following piece of code is taken from the + AMD Athlon Processor x86 Code Optimization manual } + asm + movl f1+4,%edx + movl f2+4,%ecx + orl %ecx,%edx + movl f2,%edx + movl f1,%eax + jnz .Loverflowchecked + { if both upper dwords are =0 then it cannot overflow } + mull %edx + movl %eax,__RESULT + movl %edx,__RESULT+4 jmp .Lend .Loverflowchecked: @@ -431,4 +447,4 @@ if overflowed then HandleErrorFrame(215,get_frame); end; - +{$endif VER3_0} \ No newline at end of file diff --git a/rtl/i8086/int64p.inc b/rtl/i8086/int64p.inc index 92400d3640..65abfc5a58 100644 --- a/rtl/i8086/int64p.inc +++ b/rtl/i8086/int64p.inc @@ -1,3 +1,4 @@ + { This file is part of the Free Pascal run time library. Copyright (c) 2013 by the Free Pascal development team @@ -13,8 +14,9 @@ **********************************************************************} +{$ifndef VER3_0} {$define FPC_SYSTEM_HAS_MUL_QWORD} -function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc; +function fpc_mul_qword( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc; begin { routine contributed by Max Nazhalov @@ -97,9 +99,6 @@ begin mov word[result+6],dx mov si,word[f1+4] mov ax,word[f1+6] - mov bx,word[checkoverflow] - or bx,word[checkoverflow+2] - jnz @@checked mov di,word[f2] mul di mov cx,ax @@ -124,74 +123,156 @@ begin adc cx,dx add word[result+4],bx adc word[result+6],cx - jmp @@done -@@checked: + end [ 'ax','bx','cx','dx','si','di' ]; +end; + + +function fpc_mul_qword_checkoverflow( f1, f2: qword): qword; [public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; +begin +{ routine contributed by Max Nazhalov + +64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0) + +//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result: + (A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0) + +(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0) + -- never overflows, forms the base of the final result, name it as "R64" + +(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width. + -- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)" + +(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result + -- to be calculated on steps 2 and 3 as a correction for the "R64" + +//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result): + (A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0) + +((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width. + -- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)" + +((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked. + -- overflows if the hi_word "<>0" + -- overflows if R64+(lo_word<<48) produces C-flag + +((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked. + -- overflows if the hi_word "<>0" + -- overflows if R64+(lo_word<<48) produces C-flag + +(A2*B0)<<32: the whole dword is significand, name it as "X" + -- overflows if R64+(X<<32) produces C-flag + +//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result): + (A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2) + +((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width. + -- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)" + +((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked. + -- overflows if the hi_word "<>0" + -- overflows if R64+(lo_word<<48) produces C-flag + +((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked. + -- overflows if the hi_word "<>0" + -- overflows if R64+(lo_word<<48) produces C-flag + +(A0*B2)<<32: the whole dword is significand, name it as "Y" + -- overflows if R64+(Y<<32) produces C-flag +} + asm + mov di,word[f1] + mov bx,word[f1+2] + mov si,word[f2] + mov ax,word[f2+2] + push bp + mov cx,ax + mul bx + xchg ax,bx + mov bp,dx + mul si + xchg ax,cx + add bx,dx + adc bp,0 + mul di + add cx,ax + adc bx,dx + adc bp,0 + mov ax,di + mul si + add cx,dx + adc bx,0 + adc bp,0 + mov dx,bp + pop bp + mov word[result],ax + mov word[result+2],cx + mov word[result+4],bx + mov word[result+6],dx + mov si,word[f1+4] + mov ax,word[f1+6] mov bx,word[f2+6] mov cx,ax or cx,si jz @@nover1 mov cx,word[f2+4] or cx,bx - jnz @@done + jnz @@overflow @@nover1: test bx,bx jz @@nover2 mov bx,word[f1+2] test bx,bx - jnz @@done + jnz @@overflow @@nover2: test ax,ax jz @@nover3 or bx,word[f2+2] - jnz @@done + jnz @@overflow @@nover3: mov di,word[f2] mul di test dx,dx - jnz @@done + jnz @@overflow mov cx,ax mov ax,word[f2+2] mul si test dx,dx - jnz @@done + jnz @@overflow add cx,ax - jc @@done + jc @@overflow mov ax,di mul si mov bx,ax add cx,dx - jc @@done + jc @@overflow mov si,word[f2+4] mov ax,word[f2+6] mov di,word[f1] mul di test dx,dx - jnz @@done + jnz @@overflow add cx,ax - jc @@done + jc @@overflow mov ax,word[f1+2] mul si test dx,dx - jnz @@done + jnz @@overflow add cx,ax - jc @@done + jc @@overflow mov ax,di mul si add bx,ax adc cx,dx - jc @@done + jc @@overflow add word[result+4],bx adc word[result+6],cx - jc @@done - // checked and succeed - xor ax,ax - mov word[checkoverflow],ax - mov word[checkoverflow+2],ax + jnc @@done +@@overflow: + call FPC_OVERFLOW @@done: end [ 'ax','bx','cx','dx','si','di' ]; - if checkoverflow then - HandleErrorAddrFrameInd(215,get_pc_addr,get_frame); end; +{$endif VER3_0} {$define FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD} diff --git a/rtl/inc/compproc.inc b/rtl/inc/compproc.inc index 6d88da66ef..8178eeccbc 100644 --- a/rtl/inc/compproc.inc +++ b/rtl/inc/compproc.inc @@ -602,8 +602,15 @@ function fpc_div_qword(n,z : qword) : qword; compilerproc; function fpc_mod_qword(n,z : qword) : qword; compilerproc; function fpc_div_int64(n,z : int64) : int64; compilerproc; function fpc_mod_int64(n,z : int64) : int64; compilerproc; +{$ifdef VER3_0} function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword; compilerproc; function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64; compilerproc; +{$else VER3_0} +function fpc_mul_qword(f1,f2 : qword) : qword; compilerproc; +function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword; compilerproc; +function fpc_mul_int64(f1,f2 : int64) : int64; compilerproc; +function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64; compilerproc; +{$endif VER3_0} function fpc_mul_dword_to_qword(f1,f2 : dword) : qword; compilerproc; function fpc_mul_longint_to_int64(f1,f2 : longint) : int64; compilerproc; diff --git a/rtl/inc/int64.inc b/rtl/inc/int64.inc index 384aaa0ae2..066d43ab5f 100644 --- a/rtl/inc/int64.inc +++ b/rtl/inc/int64.inc @@ -1,3 +1,4 @@ + { This file is part of the Free Pascal run time library. Copyright (c) 1999-2000 by the Free Pascal development team @@ -267,6 +268,7 @@ end; {$endif FPC_SYSTEM_HAS_MOD_INT64} +{$ifdef VER3_0} {$ifndef FPC_SYSTEM_HAS_MUL_QWORD} { multiplies two qwords @@ -304,24 +306,77 @@ end; {$endif FPC_SYSTEM_HAS_MUL_QWORD} +{$else VER3_0} + +{$ifndef FPC_SYSTEM_HAS_MUL_QWORD} + function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; + var + bitpos : qword; + l : longint; + begin + result:=0; + bitpos:=1; + + for l:=0 to 63 do + begin + if (f2 and bitpos)<>0 then + result:=result+f1; + f1:=f1 shl 1; + bitpos:=bitpos shl 1; + end; + end; + + + function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; + var + _f1,bitpos : qword; + l : longint; + f1overflowed : boolean; + begin + result:=0; + bitpos:=1; + f1overflowed:=false; + + for l:=0 to 63 do + begin + if (f2 and bitpos)<>0 then + begin + _f1:=result; + result:=result+f1; + + { if one of the operands is greater than the result an + overflow occurs } + if (f1overflowed or ((_f1<>0) and (f1<>0) and + ((_f1>result) or (f1>result)))) then + HandleErrorAddrFrameInd(215,get_pc_addr,get_frame); + end; + { when bootstrapping, we forget about overflow checking for qword :) } + f1overflowed:=f1overflowed or ((f1 and (qword(1) shl 63))<>0); + f1:=f1 shl 1; + bitpos:=bitpos shl 1; + end; + end; +{$endif FPC_SYSTEM_HAS_MUL_QWORD} + +{$endif VER3_0} {$ifndef FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD} - function fpc_mul_qword_compilerproc(f1,f2 : qword;checkoverflow : longbool) : qword; external name 'FPC_MUL_QWORD'; + function fpc_mul_qword_compilerproc(f1,f2 : qword) : qword; external name 'FPC_MUL_QWORD'; function fpc_mul_dword_to_qword(f1,f2 : dword) : qword;[public,alias: 'FPC_MUL_DWORD_TO_QWORD']; compilerproc; begin - fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2,false); + fpc_mul_dword_to_qword:=fpc_mul_qword_compilerproc(f1,f2); end; {$endif FPC_SYSTEM_HAS_MUL_DWORD_TO_QWORD} +{$ifdef VER3_0} + {$ifndef FPC_SYSTEM_HAS_MUL_INT64} function fpc_mul_int64(f1,f2 : int64;checkoverflow : longbool) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc; - var sign : boolean; q1,q2,q3 : qword; - begin {$ifdef EXCLUDE_COMPLEX_PROCS} runerror(219); @@ -370,9 +425,68 @@ end; {$endif FPC_SYSTEM_HAS_MUL_INT64} +{$else VER3_0} + +{$ifndef FPC_SYSTEM_HAS_MUL_INT64} + function fpc_mul_int64(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64']; compilerproc; + begin + { there's no difference between signed and unsigned multiplication, + when the destination size is equal to the source size and overflow + checking is off } + { qword(f1)*qword(f2) is coded as a call to mulqword } + result:=int64(qword(f1)*qword(f2)); + end; + + + function fpc_mul_int64_checkoverflow(f1,f2 : int64) : int64;[public,alias: 'FPC_MUL_INT64_CHECKOVERFLOW']; compilerproc; +{$ifdef EXCLUDE_COMPLEX_PROCS} + begin + runerror(217); + end; +{$else EXCLUDE_COMPLEX_PROCS} + var + sign : boolean; + q1,q2,q3 : qword; + begin + sign:=false; + if f1<0 then + begin + sign:=not(sign); + q1:=qword(-f1); + end + else + q1:=f1; + if f2<0 then + begin + sign:=not(sign); + q2:=qword(-f2); + end + else + q2:=f2; + { the q1*q2 is coded as call to mulqword } + q3:=q1*q2; + + if (q1 <> 0) and (q2 <>0) and + ((q1>q3) or (q2>q3) or + { the bit 63 can be only set if we have $80000000 00000000 } + { and sign is true } + (q3 shr 63<>0) and + ((q3<>qword(qword(1) shl 63)) or not(sign)) + ) then + HandleErrorAddrFrameInd(215,get_pc_addr,get_frame); + + if sign then + result:=-q3 + else + result:=q3; + end; +{$endif EXCLUDE_COMPLEX_PROCS} +{$endif FPC_SYSTEM_HAS_MUL_INT64} + +{$endif VER3_0} {$ifndef FPC_SYSTEM_HAS_MUL_LONGINT_TO_INT64} - function fpc_mul_int64_compilerproc(f1,f2 : int64;checkoverflow : longbool) : int64; external name 'FPC_MUL_INT64'; + function fpc_mul_int64_compilerproc(f1,f2 : int64) : int64; external name 'FPC_MUL_INT64'; function fpc_mul_longint_to_int64(f1,f2 : longint) : int64;[public,alias: 'FPC_MUL_LONGINT_TO_INT64']; compilerproc; {$ifdef EXCLUDE_COMPLEX_PROCS} @@ -381,7 +495,7 @@ end; {$else EXCLUDE_COMPLEX_PROCS} begin - fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2,false); + fpc_mul_longint_to_int64:=fpc_mul_int64_compilerproc(f1,f2); end; {$endif EXCLUDE_COMPLEX_PROCS} diff --git a/rtl/powerpc/int64p.inc b/rtl/powerpc/int64p.inc index 02f809bdb4..1dba7374b4 100644 --- a/rtl/powerpc/int64p.inc +++ b/rtl/powerpc/int64p.inc @@ -140,11 +140,9 @@ mr R4,R6 end; +{$ifndef VER3_0} {$define FPC_SYSTEM_HAS_MUL_QWORD} - { multiplies two qwords - the longbool for checkoverflow avoids a misaligned stack - } - function fpc_mul_qword(f1,f2 : qword;checkoverflow : longbool) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; + function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc; assembler; nostackframe; asm // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 @@ -185,6 +183,54 @@ add r9,r9,r12 cmplwi cr7,r9,64 // is the sum now >= 64? cmplwi cr1,r9,62 // or <= 62? + + .LDone: + mullw r4,r4,r6 // lsw of product of lsw's + mr r3,r8 // get msw of product in correct register + end; + + + function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc; + assembler; nostackframe; + asm + // (r3:r4) = (r3:r4) * (r5:r6), checkoverflow is in r7 + // res f1 f2 + + or. r10,r3,r5 // are both msw's 0? + mulhwu r8,r4,r6 // msw of product of lsw's + xor r0,r0,r0 // r0 := 0 for overflow checking + beq .LDone // if both msw's are zero, skip cross products + mullw r9,r4,r5 // lsw of first cross-product + cntlzw r11,r3 // count leading zeroes of msw1 + cntlzw r12,r5 // count leading zeroes of msw2 + mullw r7,r3,r6 // lsw of second cross-product + add r12,r11,r12 // sum of leading zeroes + mr r10,r8 + or r0,r12,r0 // maximise sum if no overflow checking, otherwise it remains + add r8,r8,r9 // add + cmplwi cr1,r0,64 // >= 64 leading zero bits in total? If so, no overflow + add r8,r8,r7 // add + bge+ cr1,.LDone // if the sum of leading zero's >= 64 (or checkoverflow was 0) + // there's no overflow, otherwise more thorough check + add r7,r7,r9 + mulhwu r3,r6,r3 + addc r7,r7,r10 // add the msw of the product of the lsw's, record carry + cntlzw r9,r5 + cntlzw r10,r4 // get leading zeroes count of lsw f1 + mulhwu r5,r4,r5 + addze r3,r3 + subfic r0,r11,31 // if msw f1 = 0, then r0 := -1, else r0 >= 0 + cntlzw r7,r6 + subfic r11,r9,31 // same for f2 + srawi r0,r0,31 // if msw f1 = 0, then r0 := 1, else r0 := 0 + srawi r11,r11,31 + and r10,r10,r0 // if msw f1 <> 0, the leading zero count lsw f1 := 0 + and r9,r7,r11 // same for f2 + or. r5,r5,r3 + add r9,r9,r10 // add leading zero counts of lsw's to sum if appropriate + add r9,r9,r12 + cmplwi cr7,r9,64 // is the sum now >= 64? + cmplwi cr1,r9,62 // or <= 62? bge+ cr7,.LDone // >= 64 leading zeroes -> no overflow ble+ cr1,.LOverflow // <= 62 leading zeroes -> overflow // for 63 zeroes, we need additional checks @@ -198,5 +244,4 @@ mullw r4,r4,r6 // lsw of product of lsw's mr r3,r8 // get msw of product in correct register end; - - +{$endif VER3_0}