fpc/rtl/i386/int64p.inc
2024-08-25 09:44:11 +00:00

427 lines
12 KiB
PHP

{
This file is part of the Free Pascal run time library.
Copyright (c) 1999-2000 by the Free Pascal development team
This file contains some helper routines for int64 and qword
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$Q- no overflow checking }
{$R- no range checking }
function div_qword_throwdivbyzero(n,z : qword) : qword;
begin
HandleErrorFrame(200,get_frame);
end;
{$define FPC_SYSTEM_HAS_DIV_INT64}
function fpc_div_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_DIV_INT64']; compilerproc;
{ n = [esp + 12], z = [esp + 4]. }
asm
push %ebx
push %esi
push %edi
{ the following piece of code is taken from the }
{ AMD Athlon Processor x86 Code Optimization manual }
movl 12+16(%esp),%ecx { ecx = hi(n) }
movl 12+12(%esp),%ebx { ebx = lo(n) }
movl 12+8(%esp),%edx { edx = hi(z) }
movl 12+4(%esp),%eax { eax = lo(z) }
movl %ecx,%esi
xorl %edx,%esi
sarl $31,%esi
movl %edx,%edi
sarl $31,%edi
xorl %edi,%eax
xorl %edi,%edx
subl %edi,%eax
sbbl %edi,%edx
movl %ecx,%edi
sarl $31,%edi
xorl %edi,%ebx
xorl %edi,%ecx
subl %edi,%ebx
sbbl %edi,%ecx
jnz .Lbigdivisor
cmpl %ebx,%edx
jae .Ltwo_divs
divl %ebx
.Lmake_sign_zero_hi:
xorl %edx,%edx
.Lmake_sign:
xorl %esi,%eax
xorl %esi,%edx
subl %esi,%eax
sbbl %esi,%edx
pop %edi
pop %esi
pop %ebx
ret $16
.Ltwo_divs:
test %ebx,%ebx { Zero division ends up here with ebx = 0. }
jz .Ldivzero
movl %eax,%ecx
movl %edx,%eax
xorl %edx,%edx
divl %ebx
xchgl %ecx,%eax
divl %ebx
movl %ecx,%edx
jmp .Lmake_sign
.Lbigdivisor:
movl %eax,12+4(%esp) { Reuse n~z stack space. }
movl %ebx,12+8(%esp)
movl %edx,12+12(%esp)
movl %ecx,%edi
shrl $1,%edx
rcrl $1,%eax
rorl $1,%edi
rcrl $1,%ebx
bsrl %ecx,%ecx
shrdl %cl,%edi,%ebx
shrdl %cl,%edx,%eax
shrl %cl,%edx
roll $1,%edi
divl %ebx
movl 12+4(%esp),%ebx
movl %eax,%ecx
imull %eax,%edi
mull 12+8(%esp)
addl %edi,%edx
subl %eax,%ebx
movl %ecx,%eax
movl 12+12(%esp),%ecx
sbbl %edx,%ecx
sbbl $0,%eax
jmp .Lmake_sign_zero_hi
.Ldivzero:
pop %edi
pop %esi
pop %ebx
jmp div_qword_throwdivbyzero
end;
{$define FPC_SYSTEM_HAS_MOD_INT64}
function fpc_mod_int64(n,z : int64) : int64;assembler;nostackframe;[public,alias: 'FPC_MOD_INT64']; compilerproc;
{ n = [esp + 12], z = [esp + 4]. }
asm
push %ebx
push %esi
push %edi
{ the following piece of code is taken from the }
{ AMD Athlon Processor x86 Code Optimization manual }
movl 12+16(%esp),%ecx
movl 12+12(%esp),%ebx
movl 12+8(%esp),%edx
movl 12+4(%esp),%eax
movl %edx,%esi
sarl $31,%esi
movl %edx,%edi
sarl $31,%edi
xorl %edi,%eax
xorl %edi,%edx
subl %edi,%eax
sbbl %edi,%edx
movl %ecx,%edi
sarl $31,%edi
xorl %edi,%ebx
xorl %edi,%ecx
subl %edi,%ebx
sbbl %edi,%ecx
jnz .Lbig_divisor
cmpl %ebx,%edx
jae .Ltwo_divs
divl %ebx
movl %edx,%eax
movl %ecx,%edx
.Lmake_sign:
xorl %esi,%eax
xorl %esi,%edx
subl %esi,%eax
sbbl %esi,%edx
pop %edi
pop %esi
pop %ebx
ret $16
.Ltwo_divs:
test %ebx,%ebx { Zero division ends up here with ebx = 0. }
jz .Ldivzero
movl %eax,%ecx
movl %edx,%eax
xorl %edx,%edx
divl %ebx
movl %ecx,%eax
divl %ebx
movl %edx,%eax
xorl %edx,%edx
jmp .Lmake_sign
.Lbig_divisor:
movl %eax,12+4(%esp) { Reuse n~z stack space. }
movl %ebx,12+8(%esp)
movl %edx,12+12(%esp)
movl %ecx,12+16(%esp)
movl %ecx,%edi
shrl $1,%edx
rcrl $1,%eax
rorl $1,%edi
rcrl $1,%ebx
bsrl %ecx,%ecx
shrdl %cl,%edi,%ebx
shrdl %cl,%edx,%eax
shrl %cl,%edx
roll $1,%edi
divl %ebx
movl 12+4(%esp),%ebx
movl %eax,%ecx
imull %eax,%edi
mull 12+8(%esp)
addl %edi,%edx
subl %eax,%ebx
movl 12+12(%esp),%ecx
sbbl %edx,%ecx
sbbl %eax,%eax
movl 12+16(%esp),%edx
andl %eax,%edx
andl 12+8(%esp),%eax
addl %ebx,%eax
adcl %ecx,%edx
jmp .Lmake_sign
.Ldivzero:
pop %edi
pop %esi
pop %ebx
jmp div_qword_throwdivbyzero
end;
{$define FPC_SYSTEM_HAS_DIV_QWORD}
function fpc_div_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_DIV_QWORD']; compilerproc;
{ n = [esp + 12], z = [esp + 4]. }
asm
{ the following piece of code is taken from the }
{ AMD Athlon Processor x86 Code Optimization manual }
movl 16(%esp),%ecx { ecx = hi(n) }
test %ecx,%ecx
jnz .Lqworddivbigdivisor
movl 12(%esp),%ecx { ecx = lo(n) }
movl 8(%esp),%edx { edx = hi(z) }
cmpl %ecx,%edx
jae .Lqworddivtwo_divs
movl 4(%esp),%eax { eax = lo(z) }
divl %ecx
xorl %edx,%edx
ret $16
.Lqworddivtwo_divs:
test %ecx,%ecx { Zero division ends up here with ecx = 0. }
jz div_qword_throwdivbyzero
movl %edx,%eax
xorl %edx,%edx
divl %ecx
push %eax { eax = future hi(result); remember }
movl 4+4(%esp),%eax { eax = lo(z) }
divl %ecx
pop %edx
ret $16
.Lqworddivbigdivisor:
push %ebx
push %esi
push %edi
movl 12+12(%esp),%ebx { ebx = lo(n) }
movl 12+8(%esp),%edx { edx = hi(z) }
movl 12+4(%esp),%eax { eax = lo(z) }
movl %ecx,%edi
shrl $1,%edx
rcrl $1,%eax
rorl $1,%edi
rcrl $1,%ebx
bsrl %ecx,%ecx
shrdl %cl,%edi,%ebx
shrdl %cl,%edx,%eax
shrl %cl,%edx
roll $1,%edi
divl %ebx
movl 12+4(%esp),%ebx
movl %eax,%esi // save quotient to esi
imull %eax,%edi
mull 12+12(%esp)
addl %edi,%edx
setcb %cl // cl:edx:eax = 65 bits quotient*divisor
movl 12+8(%esp),%edi // edi:ebx = dividend
subl %eax,%ebx
movb $0,%al
sbbl %edx,%edi
sbbb %cl,%al
sbbl $0,%esi
xorl %edx,%edx
movl %esi,%eax
pop %edi
pop %esi
pop %ebx
end;
{$define FPC_SYSTEM_HAS_MOD_QWORD}
function fpc_mod_qword(n,z : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MOD_QWORD']; compilerproc;
{ n = [esp + 12], z = [esp + 4]. }
asm
{ the following piece of code is taken from the }
{ AMD Athlon Processor x86 Code Optimization manual }
movl 16(%esp),%ecx { ecx = hi(n) }
movl 8(%esp),%edx { edx = hi(z) }
test %ecx,%ecx
jnz .Lqwordmodr_big_divisior
movl 12(%esp),%ecx { ecx = lo(n) }
movl 4(%esp),%eax { eax = lo(z) }
cmpl %ecx,%edx
jae .Lqwordmodr_two_divs
divl %ecx
movl %edx,%eax
xorl %edx,%edx
ret $16
.Lqwordmodr_two_divs:
test %ecx,%ecx { Zero division ends up here with ecx = 0. }
jz div_qword_throwdivbyzero
movl %edx,%eax
xorl %edx,%edx
divl %ecx
movl 4(%esp),%eax { eax = lo(z) }
divl %ecx
movl %edx,%eax
xorl %edx,%edx
ret $16
.Lqwordmodr_big_divisior:
push %ebx
push %edi
movl 8+12(%esp),%ebx { ebx = lo(n) }
movl 8+4(%esp),%eax { eax = lo(z) }
movl %ecx,%edi
shrl $1,%edx
rcrl $1,%eax
rorl $1,%edi
rcrl $1,%ebx
bsrl %ecx,%ecx
shrdl %cl,%edi,%ebx
shrdl %cl,%edx,%eax
shrl %cl,%edx
roll $1,%edi
divl %ebx
movl 8+4(%esp),%ebx { lo(z) }
imull %eax,%edi
mull 8+12(%esp) { lo(n) }
addl %edi,%edx
setcb %cl // cl:edx:eax = 65 bits quotient*divisor
movl 8+8(%esp),%edi { hi(z) }
subl %eax,%ebx // subtract (quotient*divisor) from dividend
movb $0,%al
sbbl %edx,%edi
sbbb %cl,%al // if carry is set now, the quotient was off by 1,
// and we need to add divisor to result
movl 8+12(%esp),%eax { lo(n) }
sbbl %edx,%edx
andl %edx,%eax
andl 8+16(%esp),%edx { hi(n) }
addl %ebx,%eax
adcl %edi,%edx
pop %edi
pop %ebx
end;
{$define FPC_SYSTEM_HAS_MUL_QWORD}
function fpc_mul_qword(f1,f2 : qword) : qword;[public,alias: 'FPC_MUL_QWORD']; compilerproc;
begin
{ the following piece of code is taken from the
AMD Athlon Processor x86 Code Optimization manual }
asm
movl f1+4,%edx
movl f2+4,%ecx
orl %ecx,%edx
movl f2,%edx
movl f1,%eax
jnz .Lqwordmultwomul
{ if both upper dwords are =0 then it cannot overflow }
mull %edx
jmp .Lqwordmulready
.Lqwordmultwomul:
imul f1+4,%edx
imul %eax,%ecx
addl %edx,%ecx
mull f2
add %ecx,%edx
.Lqwordmulready:
movl %eax,__RESULT
movl %edx,__RESULT+4
.Lend:
end [ 'eax','edx','ecx'];
end;
function mul_qword_throwoverflow(f1,f2 : qword) : qword;
begin
HandleErrorFrame(215,get_frame);
end;
function fpc_mul_qword_checkoverflow(f1,f2 : qword) : qword;assembler;nostackframe;[public,alias: 'FPC_MUL_QWORD_CHECKOVERFLOW']; compilerproc;
{ f1 = [esp + 12], f2 = [esp + 4]. }
asm
{ the following piece of code is taken from the
AMD Athlon Processor x86 Code Optimization manual }
movl 16(%esp),%edx { edx = hi(f1) }
movl 8(%esp),%ecx { ecx = hi(f2) }
orl %ecx,%edx
movl 4(%esp),%edx { edx = lo(f2) }
movl 12(%esp),%eax { eax = lo(f1) }
jnz .Loverflowchecked
{ if both upper dwords are =0 then it cannot overflow }
mull %edx
ret $16
.Loverflowed:
jmp mul_qword_throwoverflow
.Loverflowchecked:
{ if both upper dwords are <>0 then it overflows always }
test %ecx,%ecx
jz .Loverok1
cmpl $0,16(%esp)
jnz .Loverflowed
.Loverok1:
{ overflow checked code }
movl 16(%esp),%eax { eax = hi(f1) }
mull 4(%esp)
movl %eax,%ecx
jc .Loverflowed
movl 12(%esp),%eax { eax = lo(f1) }
mull 8(%esp)
jc .Loverflowed
addl %eax,%ecx
jc .Loverflowed
movl 4(%esp),%eax
mull 12(%esp)
addl %ecx,%edx
jc .Loverflowed
end;