mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-10 11:32:46 +02:00
291 lines
8.1 KiB
PHP
291 lines
8.1 KiB
PHP
{
|
|
This file is part of the Free Pascal run time library.
|
|
Copyright (c) 2013 by the Free Pascal development team
|
|
|
|
This file contains some helper routines for longint and dword
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
**********************************************************************}
|
|
|
|
{$define FPC_SYSTEM_HAS_MUL_DWORD}
|
|
function fpc_mul_dword( f1, f2: dword ): dword; [public,alias: 'FPC_MUL_DWORD']; compilerproc;
|
|
begin
|
|
{ routine contributed by Max Nazhalov
|
|
|
|
32-bit multiplications summary:
|
|
f1 = A1*$10000+A0
|
|
f2 = B1*$10000+B0
|
|
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
|
|
|
|
A1*B1 [only needed for overflow checking; overflow if <>0]
|
|
A1*B0
|
|
A0*B1
|
|
A0:B0
|
|
}
|
|
asm
|
|
mov cx,word[f1]
|
|
mov ax,word[f1+2]
|
|
mov di,word[f2]
|
|
mov si,word[f2+2]
|
|
mul di
|
|
xchg ax,si
|
|
mul cx
|
|
add si,ax
|
|
xchg ax,di { 1 byte shorter than mov }
|
|
mul cx
|
|
add dx,si
|
|
mov word[result],ax
|
|
mov word[result+2],dx
|
|
end [ 'ax','cx','dx','si','di' ];
|
|
end;
|
|
|
|
|
|
function fpc_mul_dword_checkoverflow( f1, f2: dword ): dword; [public,alias: 'FPC_MUL_DWORD_CHECKOVERFLOW']; compilerproc;
|
|
begin
|
|
{ routine contributed by Max Nazhalov
|
|
|
|
32-bit multiplications summary:
|
|
f1 = A1*$10000+A0
|
|
f2 = B1*$10000+B0
|
|
(A1:A0*B1:B0) = (A1*B1)<<32 + (A1*B0)<<16 + (A0*B1)<<16 + (A0*B0)
|
|
|
|
A1*B1 [only needed for overflow checking; overflow if <>0]
|
|
A1*B0
|
|
A0*B1
|
|
A0:B0
|
|
}
|
|
asm
|
|
mov cx,word[f1]
|
|
mov ax,word[f1+2]
|
|
mov di,word[f2]
|
|
mov si,word[f2+2]
|
|
test ax,ax
|
|
jz @@skip
|
|
test si,si
|
|
jnz @@overflow
|
|
mul di
|
|
test dx,dx
|
|
jnz @@overflow
|
|
@@skip:
|
|
xchg ax,si
|
|
mul cx
|
|
test dx,dx
|
|
jnz @@overflow
|
|
add si,ax
|
|
jc @@overflow
|
|
xchg ax,di { 1 byte shorter than mov }
|
|
mul cx
|
|
add dx,si
|
|
jc @@overflow
|
|
// checked and succeed
|
|
mov word[result],ax
|
|
mov word[result+2],dx
|
|
jmp @@done
|
|
@@overflow:
|
|
call FPC_OVERFLOW
|
|
@@done:
|
|
end [ 'ax','cx','dx','si','di' ];
|
|
end;
|
|
|
|
|
|
{$define FPC_SYSTEM_HAS_DIV_DWORD}
|
|
function fpc_div_dword( n, z: dword ): dword; [public, alias:'FPC_DIV_DWORD']; compilerproc;
|
|
begin
|
|
{ routine contributed by Max Nazhalov }
|
|
result := 0;
|
|
if n=0 then
|
|
HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
|
|
asm
|
|
mov ax,word [z]
|
|
mov dx,word [z+2]
|
|
mov bx,word [n]
|
|
mov cx,word [n+2]
|
|
// check for underflow: z<n
|
|
mov si,dx
|
|
cmp ax,bx
|
|
sbb si,cx
|
|
jc @@3
|
|
// select one of 3 trivial cases
|
|
test cx,cx
|
|
jnz @@1
|
|
cmp dx,bx
|
|
jnc @@0
|
|
// (i) single division: n<=0xFFFF, z<=(n<<16)-1
|
|
div bx
|
|
mov word [result],ax
|
|
jmp @@3
|
|
@@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
|
|
// q1 := [0:z1] div n; r := [0:z1] mod n;
|
|
// q0 := [r:z0] div n;
|
|
xchg ax,cx
|
|
xchg ax,dx
|
|
{ dx=0, ax=z1, cx=z0 }
|
|
div bx
|
|
xchg ax,cx
|
|
{ dx=r, ax=z0, cx=q1 }
|
|
div bx
|
|
mov word [result],ax
|
|
mov word [result+2],cx
|
|
jmp @@3
|
|
@@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
|
|
// Special case of the generic "schoolbook" division [see e.g. Knuth]:
|
|
// 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
|
|
// n>=0x10000 -> m<=15
|
|
// 2. adjust divident accordingly: [z2:z1:z0] := z<<m
|
|
// m<=15 -> z2<=0x7FFF
|
|
// implementation: instead do >> dropping n0 and z0
|
|
mov si,bx // save n0
|
|
mov di,cx // save n1
|
|
test ch,ch
|
|
jz @@2
|
|
mov bl,bh
|
|
mov bh,cl
|
|
mov cl,ch
|
|
mov al,ah
|
|
mov ah,dl
|
|
mov dl,dh
|
|
xor dh,dh
|
|
@@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
|
|
shr cl,1
|
|
rcr bx,1
|
|
shr dx,1
|
|
rcr ax,1
|
|
test cl,cl
|
|
jnz @@2
|
|
// 3. estimate quotient: q_hat := [z2:z1]/n1
|
|
// Division never overflows since z2<=0x7FFF and n1>0x7FFF
|
|
div bx
|
|
// 4. multiply & subtract calculating remainder:
|
|
// r := z-n*q_hat (z and n are original)
|
|
// 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
|
|
// theoretically, 0..2 iterations are required [see e.g. Knuth];
|
|
// in practice, with such initial data, at most one iteration
|
|
// is needed (no disproof has been found yet; and if it will
|
|
// ever be found -- it also should raise doubts about the i386
|
|
// fpc_div_qword helper again; see FPC mantis #23963)
|
|
mov cx,ax // save q_hat
|
|
mul si
|
|
mov bx,ax
|
|
mov si,dx
|
|
mov ax,cx
|
|
mul di
|
|
xor di,di
|
|
add ax,si
|
|
adc dx,di // [dx:ax:bx] := n*q_hat; di=0
|
|
mov si,word [z]
|
|
sub si,bx
|
|
mov si,word [z+2]
|
|
sbb si,ax
|
|
sbb di,dx
|
|
sbb cx,0
|
|
// 6. done: q := [0:cx]
|
|
mov word [result],cx
|
|
@@3:
|
|
end;
|
|
end;
|
|
|
|
|
|
{$define FPC_SYSTEM_HAS_MOD_DWORD}
|
|
function fpc_mod_dword( n, z: dword ): dword; [public, alias:'FPC_MOD_DWORD']; compilerproc;
|
|
begin
|
|
{ routine contributed by Max Nazhalov }
|
|
result := z;
|
|
if n=0 then
|
|
HandleErrorAddrFrameInd(200,get_pc_addr,get_frame);
|
|
asm
|
|
mov ax,word [z]
|
|
mov dx,word [z+2]
|
|
mov bx,word [n]
|
|
mov cx,word [n+2]
|
|
// check for underflow: z<n
|
|
mov si,dx
|
|
cmp ax,bx
|
|
sbb si,cx
|
|
jc @@4
|
|
// select one of 3 trivial cases
|
|
test cx,cx
|
|
jnz @@1
|
|
cmp dx,bx
|
|
jnc @@0
|
|
// (i) single division: n<=0xFFFF, z<=(n<<16)-1
|
|
div bx
|
|
jmp @@3 // r=cx:dx (cx=0)
|
|
@@0: // (ii) two divisions: n<=0xFFFF, z>(n<<16)-1
|
|
// q1 := [0:z1] div n; r := [0:z1] mod n;
|
|
// q0 := [r:z0] div n; r := [r:z0] mod n;
|
|
xchg ax,cx
|
|
xchg ax,dx
|
|
{ dx=0, ax=z1, cx=z0 }
|
|
div bx
|
|
mov ax,cx
|
|
xor cx,cx
|
|
{ dx=r, ax=z0, cx=0 }
|
|
div bx
|
|
jmp @@3 // r=cx:dx (cx=0)
|
|
@@1: // (iii) long divisor: n>=0x10000 (hence q<=0xFFFF)
|
|
// Special case of the generic "schoolbook" division [see e.g. Knuth]:
|
|
// 1. normalize divisor: [n1:n0] := n<<m, so that 0x8000<=n1<=0xFFFF
|
|
// n>=0x10000 -> m<=15
|
|
// 2. adjust divident accordingly: [z2:z1:z0] := z<<m
|
|
// m<=15 -> z2<=0x7FFF
|
|
// implementation: instead do >> dropping n0 and z0
|
|
mov si,bx // save n0
|
|
mov di,cx // save n1
|
|
test ch,ch
|
|
jz @@2
|
|
mov bl,bh
|
|
mov bh,cl
|
|
mov cl,ch
|
|
mov al,ah
|
|
mov ah,dl
|
|
mov dl,dh
|
|
xor dh,dh
|
|
@@2: // repeat >> 1..8 times resulting in [dx:ax]=[z2:z1] and bx=n1
|
|
shr cl,1
|
|
rcr bx,1
|
|
shr dx,1
|
|
rcr ax,1
|
|
test cl,cl
|
|
jnz @@2
|
|
// 3. estimate quotient: q_hat := [z2:z1]/n1
|
|
// Division never overflows since z2<=0x7FFF and n1>0x7FFF
|
|
div bx
|
|
// 4. multiply & subtract calculating remainder:
|
|
// r := z-n*q_hat (z and n are original)
|
|
// 5. adjust quotient: while (r<0) do { q_hat-=1; r+=n };
|
|
// theoretically, 0..2 iterations are required [see e.g. Knuth];
|
|
// in practice, with such initial data, at most one iteration
|
|
// is needed (no disproof has been found yet; and if it will
|
|
// ever be found -- it also should raise doubts about the i386
|
|
// fpc_div_qword helper again; see FPC mantis #23963)
|
|
mov cx,ax // save q_hat
|
|
mul si
|
|
mov bx,ax
|
|
mov si,dx
|
|
mov ax,cx
|
|
mul di
|
|
xor di,di
|
|
add ax,si
|
|
adc dx,di // [dx:ax:bx] := n*q_hat; di=0
|
|
mov si,word [z]
|
|
mov cx,word [z+2]
|
|
sub si,bx
|
|
sbb cx,ax
|
|
sbb di,dx
|
|
mov dx,si
|
|
jnc @@3
|
|
add dx,word [n]
|
|
adc cx,word [n+2]
|
|
@@3: // done: r=cx:dx
|
|
mov word [result],dx
|
|
mov word [result+2],cx
|
|
@@4:
|
|
end;
|
|
end;
|