mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-11 18:08:15 +02:00
+ added 32-bit asm optimized division helpers for i8086 by Max Nazhalov
git-svn-id: trunk@26512 -
This commit is contained in:
parent
78e726b34f
commit
4a107dcfa6
@ -72,3 +72,198 @@ begin
|
||||
HandleErrorAddrFrameInd(215,get_pc_addr,get_frame);
|
||||
end;
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_DIV_DWORD}
|
||||
function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
|
||||
begin
|
||||
{ routine contributed by Max Nazhalov }
|
||||
result := 0;
|
||||
if n=0 then
|
||||
HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
|
||||
asm
|
||||
mov ax,word [z]
|
||||
mov dx,word [z+2]
|
||||
mov bx,word [n]
|
||||
mov cx,word [n+2]
|
||||
// check for underflow: z<n
|
||||
mov si,dx
|
||||
cmp ax,bx
|
||||
sbb si,cx
|
||||
jc @@3
|
||||
// select one of 3 trivial cases
|
||||
test cx,cx
|
||||
jnz @@1
|
||||
cmp dx,bx
|
||||
jnc @@0
|
||||
// (i) single division: n<=0xFFFF, z<=(n<<16)-1
|
||||
div bx
|
||||
mov word [result],ax
|
||||
jmp @@3
|
||||
@@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
|
||||
// q1 := [0:z1] div n; r := [0:z1] mod n;
|
||||
// q0 := [r:z0] div n;
|
||||
xchg ax,cx
|
||||
xchg ax,dx
|
||||
{ dx=0, ax=z1, cx=z0 }
|
||||
div bx
|
||||
xchg ax,cx
|
||||
{ dx=r, ax=z0, cx=q1 }
|
||||
div bx
|
||||
mov word [result],ax
|
||||
mov word [result+2],cx
|
||||
jmp @@3
|
||||
@@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
|
||||
// Special case of the generic "schoolbook" division [see e.g. Knuth]:
|
||||
// 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
|
||||
// n>=0x10000 -> m<=15
|
||||
// 2. adjust divident accordingly: [z2:z1:z0] := z<<m
|
||||
// m<=15 -> z2<=0x7FFF
|
||||
// implementation: instead do >> dropping n0 and z0
|
||||
mov si,bx // save n0
|
||||
mov di,cx // save n1
|
||||
test ch,ch
|
||||
jz @@2
|
||||
mov bl,bh
|
||||
mov bh,cl
|
||||
mov cl,ch
|
||||
mov al,ah
|
||||
mov ah,dl
|
||||
mov dl,dh
|
||||
xor dh,dh
|
||||
@@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
|
||||
shr cl,1
|
||||
rcr bx,1
|
||||
shr dx,1
|
||||
rcr ax,1
|
||||
test cl,cl
|
||||
jnz @@2
|
||||
// 3. estimate quotient: q_hat := [z2:z1]/n1
|
||||
// Division never overflows since z2<=0x7FFF and n1>0x7FFF
|
||||
div bx
|
||||
// 4. multiply & subtract calculating remainder:
|
||||
// r := z-n*q_hat (z and n are original)
|
||||
// 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
|
||||
// theoretically, 0..2 iterations are required [see e.g. Knuth];
|
||||
// in practice, with such initial data, at most one iteration
|
||||
// is needed (no disproof has been found yet; and if it will
|
||||
// ever be found -- it also should raise doubts about the i386
|
||||
// fpc_div_qword helper again; see FPC mantis #23963)
|
||||
mov cx,ax // save q_hat
|
||||
mul si
|
||||
mov bx,ax
|
||||
mov si,dx
|
||||
mov ax,cx
|
||||
mul di
|
||||
xor di,di
|
||||
add ax,si
|
||||
adc dx,di // [dx:ax:bx] := n*q_hat; di=0
|
||||
mov si,word [z]
|
||||
sub si,bx
|
||||
mov si,word [z+2]
|
||||
sbb si,ax
|
||||
sbb di,dx
|
||||
sbb cx,0
|
||||
// 6. done: q := [0:cx]
|
||||
mov word [result],cx
|
||||
@@3:
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
{$define FPC_SYSTEM_HAS_MOD_DWORD}
|
||||
function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
|
||||
begin
|
||||
{ routine contributed by Max Nazhalov }
|
||||
result := z;
|
||||
if n=0 then
|
||||
HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
|
||||
asm
|
||||
mov ax,word [z]
|
||||
mov dx,word [z+2]
|
||||
mov bx,word [n]
|
||||
mov cx,word [n+2]
|
||||
// check for underflow: z<n
|
||||
mov si,dx
|
||||
cmp ax,bx
|
||||
sbb si,cx
|
||||
jc @@4
|
||||
// select one of 3 trivial cases
|
||||
test cx,cx
|
||||
jnz @@1
|
||||
cmp dx,bx
|
||||
jnc @@0
|
||||
// (i) single division: n<=0xFFFF, z<=(n<<16)-1
|
||||
div bx
|
||||
jmp @@3 // r=cx:dx (cx=0)
|
||||
@@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
|
||||
// q1 := [0:z1] div n; r := [0:z1] mod n;
|
||||
// q0 := [r:z0] div n; r := [r:z0] mod n;
|
||||
xchg ax,cx
|
||||
xchg ax,dx
|
||||
{ dx=0, ax=z1, cx=z0 }
|
||||
div bx
|
||||
mov ax,cx
|
||||
xor cx,cx
|
||||
{ dx=r, ax=z0, cx=0 }
|
||||
div bx
|
||||
jmp @@3 // r=cx:dx (cx=0)
|
||||
@@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
|
||||
// Special case of the generic "schoolbook" division [see e.g. Knuth]:
|
||||
// 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
|
||||
// n>=0x10000 -> m<=15
|
||||
// 2. adjust divident accordingly: [z2:z1:z0] := z<<m
|
||||
// m<=15 -> z2<=0x7FFF
|
||||
// implementation: instead do >> dropping n0 and z0
|
||||
mov si,bx // save n0
|
||||
mov di,cx // save n1
|
||||
test ch,ch
|
||||
jz @@2
|
||||
mov bl,bh
|
||||
mov bh,cl
|
||||
mov cl,ch
|
||||
mov al,ah
|
||||
mov ah,dl
|
||||
mov dl,dh
|
||||
xor dh,dh
|
||||
@@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
|
||||
shr cl,1
|
||||
rcr bx,1
|
||||
shr dx,1
|
||||
rcr ax,1
|
||||
test cl,cl
|
||||
jnz @@2
|
||||
// 3. estimate quotient: q_hat := [z2:z1]/n1
|
||||
// Division never overflows since z2<=0x7FFF and n1>0x7FFF
|
||||
div bx
|
||||
// 4. multiply & subtract calculating remainder:
|
||||
// r := z-n*q_hat (z and n are original)
|
||||
// 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
|
||||
// theoretically, 0..2 iterations are required [see e.g. Knuth];
|
||||
// in practice, with such initial data, at most one iteration
|
||||
// is needed (no disproof has been found yet; and if it will
|
||||
// ever be found -- it also should raise doubts about the i386
|
||||
// fpc_div_qword helper again; see FPC mantis #23963)
|
||||
mov cx,ax // save q_hat
|
||||
mul si
|
||||
mov bx,ax
|
||||
mov si,dx
|
||||
mov ax,cx
|
||||
mul di
|
||||
xor di,di
|
||||
add ax,si
|
||||
adc dx,di // [dx:ax:bx] := n*q_hat; di=0
|
||||
mov si,word [z]
|
||||
mov cx,word [z+2]
|
||||
sub si,bx
|
||||
sbb cx,ax
|
||||
sbb di,dx
|
||||
mov dx,si
|
||||
jnc @@3
|
||||
add dx,word [n]
|
||||
adc cx,word [n+2]
|
||||
@@3: // done: r=cx:dx
|
||||
mov word [result],dx
|
||||
mov word [result+2],cx
|
||||
@@4:
|
||||
end;
|
||||
end;
|
||||
|
Loading…
Reference in New Issue
Block a user