mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-12 01:06:02 +02:00
+ added 32-bit and 64-bit unsigned asm optimized multiplication routines for
i8086, contributed by Max Nazhalov git-svn-id: trunk@26306 -
This commit is contained in:
parent
880201e56c
commit
f2e73b5e6f
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -7945,6 +7945,7 @@ rtl/i386/strings.inc svneol=native#text/plain
|
|||||||
rtl/i386/stringss.inc svneol=native#text/plain
|
rtl/i386/stringss.inc svneol=native#text/plain
|
||||||
rtl/i386/strpas.inc svneol=native#text/plain
|
rtl/i386/strpas.inc svneol=native#text/plain
|
||||||
rtl/i8086/i8086.inc svneol=native#text/plain
|
rtl/i8086/i8086.inc svneol=native#text/plain
|
||||||
|
rtl/i8086/int32p.inc svneol=native#text/plain
|
||||||
rtl/i8086/int64p.inc svneol=native#text/plain
|
rtl/i8086/int64p.inc svneol=native#text/plain
|
||||||
rtl/i8086/makefile.cpu svneol=native#text/plain
|
rtl/i8086/makefile.cpu svneol=native#text/plain
|
||||||
rtl/i8086/math.inc svneol=native#text/plain
|
rtl/i8086/math.inc svneol=native#text/plain
|
||||||
|
78
rtl/i8086/int32p.inc
Normal file
78
rtl/i8086/int32p.inc
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
{
|
||||||
|
This file is part of the Free Pascal run time library.
|
||||||
|
Copyright (c) 2013 by the Free Pascal development team
|
||||||
|
|
||||||
|
This file contains some helper routines for longint and dword
|
||||||
|
|
||||||
|
See the file COPYING.FPC, included in this distribution,
|
||||||
|
for details about the copyright.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
**********************************************************************}
|
||||||
|
|
||||||
|
{$define FPC_SYSTEM_HAS_MUL_DWORD}
|
||||||
|
function fpc_mul_dword( f1, f2: dword; checkoverflow: boolean ): dword; [public,alias: 'FPC_MUL_DWORD']; compilerproc;
|
||||||
|
begin
|
||||||
|
{ routine contributed by Max Nazhalov
|
||||||
|
|
||||||
|
//////// 16-bit multiplications summary:
|
||||||
|
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
|
||||||
|
|
||||||
|
A1*B1 [only needed for overflow checking; overflow if <>0]
|
||||||
|
A1*B0
|
||||||
|
A0*B1
|
||||||
|
A0:B0
|
||||||
|
A3*B0 [only lo_word is needed; overflow if hi_word<>0]
|
||||||
|
A2*B1 [only lo_word is needed; overflow if hi_word<>0]
|
||||||
|
A2*B0
|
||||||
|
A1*B2 [only lo_word is needed; overflow if hi_word<>0]
|
||||||
|
A0*B3 [only lo_word is needed; overflow if hi_word<>0]
|
||||||
|
A0*B2
|
||||||
|
}
|
||||||
|
asm
|
||||||
|
mov cx,word[f1]
|
||||||
|
mov ax,word[f1+2]
|
||||||
|
mov di,word[f2]
|
||||||
|
mov si,word[f2+2]
|
||||||
|
cmp checkoverflow,0
|
||||||
|
jne @@checked
|
||||||
|
mul di
|
||||||
|
xchg ax,si
|
||||||
|
mul cx
|
||||||
|
add si,ax
|
||||||
|
mov ax,di
|
||||||
|
mul cx
|
||||||
|
add dx,si
|
||||||
|
jmp @@done
|
||||||
|
@@checked:
|
||||||
|
test ax,ax
|
||||||
|
jz @@skip
|
||||||
|
test si,si
|
||||||
|
jnz @@done
|
||||||
|
mul di
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
@@skip:
|
||||||
|
xchg ax,si
|
||||||
|
mul cx
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
add si,ax
|
||||||
|
jc @@done
|
||||||
|
mov ax,di
|
||||||
|
mul cx
|
||||||
|
add dx,si
|
||||||
|
jc @@done
|
||||||
|
// checked and succeed
|
||||||
|
mov checkoverflow,0
|
||||||
|
@@done:
|
||||||
|
mov word[result],ax
|
||||||
|
mov word[result+2],dx
|
||||||
|
end [ 'ax','cx','dx','si','di' ];
|
||||||
|
if checkoverflow then
|
||||||
|
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||||
|
end;
|
||||||
|
|
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
This file is part of the Free Pascal run time library.
|
This file is part of the Free Pascal run time library.
|
||||||
Copyright (c) 1999-2000 by the Free Pascal development team
|
Copyright (c) 2013 by the Free Pascal development team
|
||||||
|
|
||||||
This file contains some helper routines for int64 and qword
|
This file contains some helper routines for int64 and qword
|
||||||
|
|
||||||
@ -12,3 +12,185 @@
|
|||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
**********************************************************************}
|
**********************************************************************}
|
||||||
|
|
||||||
|
{$I int32p.inc}
|
||||||
|
|
||||||
|
{$define FPC_SYSTEM_HAS_MUL_QWORD}
|
||||||
|
function fpc_mul_qword( f1, f2: qword; checkoverflow: longbool ): qword; [public,alias: 'FPC_MUL_QWORD']; compilerproc;
|
||||||
|
begin
|
||||||
|
{ routine contributed by Max Nazhalov
|
||||||
|
|
||||||
|
64-bit multiplication via 16-bit digits: (A3:A2:A1:A0)*(B3:B2:B1:B0)
|
||||||
|
|
||||||
|
//////// STEP 1; break-down to 32-bit multiplications, each of them generates 64-bit result:
|
||||||
|
(A3:A2*B3:B2)<<64 + (A3:A2*B1:B0)<<32 + (A1:A0*B3:B2)<<32 + (A1:A0*B1:B0)
|
||||||
|
|
||||||
|
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0:B0)
|
||||||
|
-- never overflows, forms the base of the final result, name it as "R64"
|
||||||
|
|
||||||
|
(A3:A2*B3:B2) is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||||
|
-- always overflows if "<>0", so can be checked as "((A2|A3)<>0)&&(B2|B3)<>0)"
|
||||||
|
|
||||||
|
(A3:A2*B1:B0) and (A1:A0*B3:B2) are partially required for the final result
|
||||||
|
-- to be calculated on steps 2 and 3 as a correction for the "R64"
|
||||||
|
|
||||||
|
//////// STEP 2; calculate "R64+=(A3:A2*B1:B0)<<32" (16-bit multiplications, each of them generates 32-bit result):
|
||||||
|
(A3*B1)<<32 + (A3*B0)<<16 + (A2*B1)<<16 + (A2*B0)
|
||||||
|
|
||||||
|
((A3*B1)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||||
|
-- always overflows if "<>0", so can be checked as "(A3<>0)&&(B1<>0)"
|
||||||
|
|
||||||
|
((A3*B0)<<16)<<32: only low word of "A3*B0" contributes to the final result if overflow is not checked.
|
||||||
|
-- overflows if the hi_word "<>0"
|
||||||
|
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||||
|
|
||||||
|
((A2*B1)<<16)<<32: only low word of "A2*B1" contributes to the final result if overflow is not checked.
|
||||||
|
-- overflows if the hi_word "<>0"
|
||||||
|
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||||
|
|
||||||
|
(A2*B0)<<32: the whole dword is significand, name it as "X"
|
||||||
|
-- overflows if R64+(X<<32) produces C-flag
|
||||||
|
|
||||||
|
//////// STEP 3; calculate "R64+=(A1:A0*B3:B2)<<32" (16-bit multiplications, each of them generates 32-bit result):
|
||||||
|
(A1*B3)<<32 + (A1*B2)<<16 + (A0*B3)<<16 + (A0*B2)
|
||||||
|
|
||||||
|
((A1*B3)<<32)<<32 is not required for the 64-bit result if overflow is not checked, since it is completely beyond the resulting width.
|
||||||
|
-- always overflows if "<>0", so can be checked as "(A1<>0)&&(B3<>0)"
|
||||||
|
|
||||||
|
((A1*B2)<<16)<<32: only low word of "A1*B2" contributes to the final result if overflow is not checked.
|
||||||
|
-- overflows if the hi_word "<>0"
|
||||||
|
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||||
|
|
||||||
|
((A0*B3)<<16)<<32: only low word "A0*B3" contributes to the final result if overflow is not checked.
|
||||||
|
-- overflows if the hi_word "<>0"
|
||||||
|
-- overflows if R64+(lo_word<<48) produces C-flag
|
||||||
|
|
||||||
|
(A0*B2)<<32: the whole dword is significand, name it as "Y"
|
||||||
|
-- overflows if R64+(Y<<32) produces C-flag
|
||||||
|
}
|
||||||
|
asm
|
||||||
|
mov di,word[f1]
|
||||||
|
mov bx,word[f1+2]
|
||||||
|
mov si,word[f2]
|
||||||
|
mov ax,word[f2+2]
|
||||||
|
push bp
|
||||||
|
mov cx,ax
|
||||||
|
mul bx
|
||||||
|
xchg ax,bx
|
||||||
|
mov bp,dx
|
||||||
|
mul si
|
||||||
|
xchg ax,cx
|
||||||
|
add bx,dx
|
||||||
|
adc bp,0
|
||||||
|
mul di
|
||||||
|
add cx,ax
|
||||||
|
adc bx,dx
|
||||||
|
adc bp,0
|
||||||
|
mov ax,di
|
||||||
|
mul si
|
||||||
|
add cx,dx
|
||||||
|
adc bx,0
|
||||||
|
adc bp,0
|
||||||
|
mov dx,bp
|
||||||
|
pop bp
|
||||||
|
mov word[result],ax
|
||||||
|
mov word[result+2],cx
|
||||||
|
mov word[result+4],bx
|
||||||
|
mov word[result+6],dx
|
||||||
|
mov si,word[f1+4]
|
||||||
|
mov ax,word[f1+6]
|
||||||
|
mov bx,word[checkoverflow]
|
||||||
|
or bx,word[checkoverflow+2]
|
||||||
|
jnz @@checked
|
||||||
|
mov di,word[f2]
|
||||||
|
mul di
|
||||||
|
mov cx,ax
|
||||||
|
mov ax,word[f2+2]
|
||||||
|
mul si
|
||||||
|
add cx,ax
|
||||||
|
mov ax,di
|
||||||
|
mul si
|
||||||
|
mov bx,ax
|
||||||
|
add cx,dx
|
||||||
|
mov si,word[f2+4]
|
||||||
|
mov ax,word[f2+6]
|
||||||
|
mov di,word[f1]
|
||||||
|
mul di
|
||||||
|
add cx,ax
|
||||||
|
mov ax,word[f1+2]
|
||||||
|
mul si
|
||||||
|
add cx,ax
|
||||||
|
mov ax,di
|
||||||
|
mul si
|
||||||
|
add bx,ax
|
||||||
|
adc cx,dx
|
||||||
|
add word[result+4],bx
|
||||||
|
adc word[result+6],cx
|
||||||
|
jmp @@done
|
||||||
|
@@checked:
|
||||||
|
mov bx,word[f2+6]
|
||||||
|
mov cx,ax
|
||||||
|
or cx,si
|
||||||
|
jz @@nover1
|
||||||
|
mov cx,word[f2+4]
|
||||||
|
or cx,bx
|
||||||
|
jnz @@done
|
||||||
|
@@nover1:
|
||||||
|
test bx,bx
|
||||||
|
jz @@nover2
|
||||||
|
mov bx,word[f1+2]
|
||||||
|
test bx,bx
|
||||||
|
jnz @@done
|
||||||
|
@@nover2:
|
||||||
|
test ax,ax
|
||||||
|
jz @@nover3
|
||||||
|
or bx,word[f2+2]
|
||||||
|
jnz @@done
|
||||||
|
@@nover3:
|
||||||
|
mov di,word[f2]
|
||||||
|
mul di
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
mov cx,ax
|
||||||
|
mov ax,word[f2+2]
|
||||||
|
mul si
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
add cx,ax
|
||||||
|
jc @@done
|
||||||
|
mov ax,di
|
||||||
|
mul si
|
||||||
|
mov bx,ax
|
||||||
|
add cx,dx
|
||||||
|
jc @@done
|
||||||
|
mov si,word[f2+4]
|
||||||
|
mov ax,word[f2+6]
|
||||||
|
mov di,word[f1]
|
||||||
|
mul di
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
add cx,ax
|
||||||
|
jc @@done
|
||||||
|
mov ax,word[f1+2]
|
||||||
|
mul si
|
||||||
|
test dx,dx
|
||||||
|
jnz @@done
|
||||||
|
add cx,ax
|
||||||
|
jc @@done
|
||||||
|
mov ax,di
|
||||||
|
mul si
|
||||||
|
add bx,ax
|
||||||
|
adc cx,dx
|
||||||
|
jc @@done
|
||||||
|
add word[result+4],bx
|
||||||
|
adc word[result+6],cx
|
||||||
|
jc @@done
|
||||||
|
// checked and succeed
|
||||||
|
xor ax,ax
|
||||||
|
mov word[checkoverflow],ax
|
||||||
|
mov word[checkoverflow+2],ax
|
||||||
|
@@done:
|
||||||
|
end [ 'ax','bx','cx','dx','si','di' ];
|
||||||
|
if checkoverflow then
|
||||||
|
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||||
|
end;
|
||||||
|
Loading…
Reference in New Issue
Block a user