* slightly modified patch by J. Gareth Moreton: Optimization for 'mod' on i386/x86-64, resolves #32945

git-svn-id: trunk@37922 -
This commit is contained in:
florian 2018-01-06 14:58:28 +00:00
parent 1934285c20
commit 81b2cf5d65
4 changed files with 314 additions and 8 deletions

2
.gitattributes vendored
View File

@ -11948,6 +11948,8 @@ tests/test/cg/tmanypar.pp svneol=native#text/plain
tests/test/cg/tmoddiv.pp svneol=native#text/plain
tests/test/cg/tmoddiv1.pp svneol=native#text/plain
tests/test/cg/tmoddiv2.pp svneol=native#text/plain
tests/test/cg/tmoddiv3.pp svneol=native#text/pascal
tests/test/cg/tmoddiv4.pp svneol=native#text/pascal
tests/test/cg/tmul3264.pp svneol=native#text/plain
tests/test/cg/tneg.pp svneol=native#text/plain
tests/test/cg/tnegnotassign1.pp svneol=native#text/plain

View File

@ -55,10 +55,10 @@ interface
constexp,
cutils,verbose,globals,
symconst,symdef,
aasmbase,aasmtai,aasmdata,defutil,
aasmbase,aasmtai,aasmcpu,aasmdata,defutil,
cgbase,pass_1,pass_2,
ncon,
cpubase,
cpubase,cpuinfo,
cga,cgobj,hlcgobj,cgx86,cgutils;
@ -378,8 +378,9 @@ interface
procedure tx86moddivnode.pass_generate_code;
var
hreg1,hreg2,rega,regd:Tregister;
hreg1,hreg2,hreg3,rega,regd:Tregister;
power:longint;
instr:TAiCpu;
op:Tasmop;
cgsize:TCgSize;
opsize:topsize;
@ -387,6 +388,8 @@ interface
d,m: aword;
m_add, invertsign: boolean;
s: byte;
label
DefaultDiv;
begin
secondpass(left);
if codegenerror then
@ -522,15 +525,103 @@ interface
end;
end
{ unsigned modulus by a (+/-)power-of-2 constant? }
else if (nodetype=modn) and (right.nodetype=ordconstn) and
isabspowerof2(tordconstnode(right).value,power) and
not(is_signed(left.resultdef)) then
else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then
begin
emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
location.register:=hreg1;
if isabspowerof2(tordconstnode(right).value,power) then
begin
emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
location.register:=hreg1;
end
else
begin
d:=tordconstnode(right).value.svalue;
if d>=aword(1) shl (left.resultdef.size*8-1) then
begin
if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
goto DefaultDiv;
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
m := aword(-aint(d)); { Two's complement of d }
if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
begin
hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_const_reg(A_MOV,opsize,aint(d),hreg2);
emit_const_reg(A_MOV,opsize,aint(m),hreg3);
emit_reg_reg(A_XOR,opsize,location.register,location.register);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
{ Emit conditional move that depends on the carry flag }
instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
instr.condition := C_AE;
current_asmdata.CurrAsmList.concat(instr);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end
else
begin
emit_const_reg(A_MOV,opsize,aint(m),hreg3);
emit_reg_reg(A_XOR,opsize,location.register,location.register);
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_const_reg(A_CMP,opsize,aint(d),hreg1);
{ Emit conditional move that depends on the carry flag }
instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
instr.condition := C_AE;
current_asmdata.CurrAsmList.concat(instr);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
end;
emit_reg_reg(A_ADD,opsize,hreg1,location.register);
end
else
begin
{ Convert the division to a multiplication }
calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
cg.getcpuregister(current_asmdata.CurrAsmList,rega);
emit_const_reg(A_MOV,opsize,aint(m),rega);
cg.getcpuregister(current_asmdata.CurrAsmList,regd);
emit_reg(A_MUL,opsize,hreg1);
cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
if m_add then
begin
{ addition can overflow, shift first bit considering carry,
then shift remaining bits in regular way. }
cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
emit_reg_reg(A_ADD,opsize,hreg1,regd);
emit_const_reg(A_RCR,opsize,1,regd);
cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
dec(s);
end;
if s<>0 then
emit_const_reg(A_SHR,opsize,aint(s),regd);
if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
begin
hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
emit_const_reg(A_MOV,opsize,aint(d),hreg3);
emit_reg_reg(A_IMUL,opsize,hreg3,regd);
end
else
emit_const_reg(A_IMUL,opsize,aint(d),regd);
emit_reg_reg(A_SUB,opsize,regd,hreg2);
cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,hreg2,location.register)
end;
end;
end
else
begin
DefaultDiv:
{Bring denominator to a register.}
cg.getcpuregister(current_asmdata.CurrAsmList,rega);
emit_reg_reg(A_MOV,opsize,hreg1,rega);

103
tests/test/cg/tmoddiv3.pp Normal file
View File

@ -0,0 +1,103 @@
program testfile2;
const
TestValues: array[0..9] of LongWord = (500, 1, 0, 995, $7FFFFFFF, $80000000, $80000001, $80000002, $FFFFFFFF, 1000000);
const
ExpectedResults: array[0..9,1..16] of LongWord = (
(0,500,500,0,166,2,0,500,0,500,0,500,0,500,0,500),
(0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,995,995,0,331,2,0,995,0,995,0,995,0,995,0,995),
(2147483,647,2147483647,0,715827882,1,524287,4095,1,0,0,2147483647,0,2147483647,0,2147483647),
(2147483,648,2147483648,0,715827882,2,524288,0,1,1,1,0,0,2147483648,0,2147483648),
(2147483,649,2147483649,0,715827883,0,524288,1,1,2,1,1,1,0,0,2147483649),
(2147483,650,2147483650,0,715827883,1,524288,2,1,3,1,2,1,1,0,2147483650),
(4294967,295,4294967295,0,1431655765,0,1048575,4095,2,1,1,2147483647,1,2147483646,1,0),
(1000,0,1000000,0,333333,1,244,576,0,1000000,0,1000000,0,1000000,0,1000000));
var
X, Y, C, Col: LongWord;
procedure DoCheck;
begin
if Y<>ExpectedResults[C,Col] then
begin
writeln('Error at ',C,' ',Col);
halt(1);
end;
Inc(Col);
end;
begin
for C := Low(TestValues) to High(TestValues) do
begin
X := TestValues[C];
Col := 1;
Y := X div 1000;
Write(Y,',');
DoCheck;
Y := X mod 1000;
Write(Y,',');
DoCheck;
Y := X div 1;
Write(Y,',');
DoCheck;
Y := X mod 1;
Write(Y,',');
DoCheck;
Y := X div 3;
Write(Y,',');
DoCheck;
Y := X mod 3;
Write(Y,',');
DoCheck;
Y := X div $1000;
Write(Y,',');
DoCheck;
Y := X mod $1000;
Write(Y,',');
DoCheck;
Y := X div $7FFFFFFF;
Write(Y,',');
DoCheck;
Y := X mod $7FFFFFFF;
Write(Y,',');
DoCheck;
Y := X div $80000000;
Write(Y,',');
DoCheck;
Y := X mod $80000000;
Write(Y,',');
DoCheck;
Y := X div $80000001;
Write(Y,',');
DoCheck;
Y := X mod $80000001;
Write(Y,',');
DoCheck;
Y := X div $FFFFFFFF;
Write(Y,',');
DoCheck;
Y := X mod $FFFFFFFF;
Writeln(Y);
DoCheck;
end;
writeln('ok');
end.

110
tests/test/cg/tmoddiv4.pp Normal file
View File

@ -0,0 +1,110 @@
const
TestValues: array[0..10] of QWord = (500, 1, 0, 995, $100000000, $100000001, $7FFFFFFFFFFFFFFF, QWord($8000000000000000), QWord($8000000000000001), QWord($8000000000000002), 1000000);
const
ExpectedResults: array[0..10,1..18] of QWord = (
(0,500,500,0,166,2,0,500,0,500,0,500,0,500,0,500,0,500),
(0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1),
(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
(0,995,995,0,331,2,0,995,0,995,0,995,0,995,0,995,0,995),
(4294967,296,4294967296,0,1431655765,1,1048576,0,2,2,2,0,0,4294967296,0,4294967296,0,4294967296),
(4294967,297,4294967297,0,1431655765,2,1048576,1,2,3,2,1,0,4294967297,0,4294967297,0,4294967297),
(9223372036854775,807,9223372036854775807,0,3074457345618258602,1,2251799813685247,4095,4294967298,1,4294967295,2147483647,1,0,0,9223372036854775807,18446744073709551615,0),
(9223372036854775,808,9223372036854775808,0,3074457345618258602,2,2251799813685248,0,4294967298,2,4294967296,0,1,1,1,0,1,18446744073709551615),
(9223372036854775,809,9223372036854775809,0,3074457345618258603,0,2251799813685248,1,4294967298,3,4294967296,1,1,2,0,9223372036854775809,1,0),
(9223372036854775,810,9223372036854775810,0,3074457345618258603,1,2251799813685248,2,4294967298,4,4294967296,2,1,3,0,9223372036854775810,0,9223372036854775810),
(1000,0,1000000,0,333333,1,244,576,0,1000000,0,1000000,0,1000000,0,1000000,0,1000000));
var
X, Y: QWord;
C, Col: LongWord;
procedure DoCheck;
begin
if Y<>ExpectedResults[C,Col] then
begin
writeln('Error at ',C,' ',Col);
halt(1);
end;
Inc(Col);
end;
begin
for C := Low(TestValues) to High(TestValues) do
begin
X := TestValues[C];
Col := 1;
Y := QWord(X) div 1000;
Write(Y,',');
DoCheck;
Y := QWord(X) mod 1000;
Write(Y,',');
DoCheck;
Y := QWord(X) div 1;
Write(Y,',');
DoCheck;
Y := QWord(X) mod 1;
Write(Y,',');
DoCheck;
Y := QWord(X) div 3;
Write(Y,',');
DoCheck;
Y := QWord(X) mod 3;
Write(Y,',');
DoCheck;
Y := QWord(X) div $1000;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $1000;
Write(Y,',');
DoCheck;
Y := QWord(X) div $7FFFFFFF;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $7FFFFFFF;
Write(Y,',');
DoCheck;
Y := QWord(X) div $80000000;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $80000000;
Write(Y,',');
DoCheck;
Y := QWord(X) div $7FFFFFFFFFFFFFFF;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $7FFFFFFFFFFFFFFF;
Write(Y,',');
DoCheck;
Y := QWord(X) div $8000000000000000;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $8000000000000000;
Write(Y,',');
DoCheck;
Y := QWord(X) div $8000000000000001;
Write(Y,',');
DoCheck;
Y := QWord(X) mod $8000000000000001;
Writeln(Y);
DoCheck;
end;
end.