From 81b2cf5d65d9969277b2dbee61848c7b238ed350 Mon Sep 17 00:00:00 2001 From: florian <florian@freepascal.org> Date: Sat, 6 Jan 2018 14:58:28 +0000 Subject: [PATCH] * slightly modified patch by J. Gareth Moreton: Optimization for 'mod' on i386/x86-64, resolves #32945 git-svn-id: trunk@37922 - --- .gitattributes | 2 + compiler/x86/nx86mat.pas | 107 +++++++++++++++++++++++++++++++++--- tests/test/cg/tmoddiv3.pp | 103 +++++++++++++++++++++++++++++++++++ tests/test/cg/tmoddiv4.pp | 110 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 tests/test/cg/tmoddiv3.pp create mode 100644 tests/test/cg/tmoddiv4.pp diff --git a/.gitattributes b/.gitattributes index 94b0bcfc93..190aaca598 100644 --- a/.gitattributes +++ b/.gitattributes @@ -11948,6 +11948,8 @@ tests/test/cg/tmanypar.pp svneol=native#text/plain tests/test/cg/tmoddiv.pp svneol=native#text/plain tests/test/cg/tmoddiv1.pp svneol=native#text/plain tests/test/cg/tmoddiv2.pp svneol=native#text/plain +tests/test/cg/tmoddiv3.pp svneol=native#text/pascal +tests/test/cg/tmoddiv4.pp svneol=native#text/pascal tests/test/cg/tmul3264.pp svneol=native#text/plain tests/test/cg/tneg.pp svneol=native#text/plain tests/test/cg/tnegnotassign1.pp svneol=native#text/plain diff --git a/compiler/x86/nx86mat.pas b/compiler/x86/nx86mat.pas index b13f5c85d4..d0b20aa562 100644 --- a/compiler/x86/nx86mat.pas +++ b/compiler/x86/nx86mat.pas @@ -55,10 +55,10 @@ interface constexp, cutils,verbose,globals, symconst,symdef, - aasmbase,aasmtai,aasmdata,defutil, + aasmbase,aasmtai,aasmcpu,aasmdata,defutil, cgbase,pass_1,pass_2, ncon, - cpubase, + cpubase,cpuinfo, cga,cgobj,hlcgobj,cgx86,cgutils; @@ -378,8 +378,9 @@ interface procedure tx86moddivnode.pass_generate_code; var - hreg1,hreg2,rega,regd:Tregister; + hreg1,hreg2,hreg3,rega,regd:Tregister; power:longint; + instr:TAiCpu; op:Tasmop; cgsize:TCgSize; opsize:topsize; @@ -387,6 +388,8 @@ interface d,m: aword; m_add, invertsign: boolean; s: byte; + label + DefaultDiv; begin secondpass(left); if codegenerror then @@ -522,15 +525,103 @@ interface end; end { unsigned modulus by a (+/-)power-of-2 constant? } - else if (nodetype=modn) and (right.nodetype=ordconstn) and - isabspowerof2(tordconstnode(right).value,power) and - not(is_signed(left.resultdef)) then + else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then begin - emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1); - location.register:=hreg1; + if isabspowerof2(tordconstnode(right).value,power) then + begin + emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1); + location.register:=hreg1; + end + else + begin + d:=tordconstnode(right).value.svalue; + if d>=aword(1) shl (left.resultdef.size*8-1) then + begin + + if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then + goto DefaultDiv; + + location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + + m := aword(-aint(d)); { Two's complement of d } + + if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP } + begin + hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + emit_const_reg(A_MOV,opsize,aint(d),hreg2); + emit_const_reg(A_MOV,opsize,aint(m),hreg3); + emit_reg_reg(A_XOR,opsize,location.register,location.register); + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + emit_reg_reg(A_CMP,opsize,hreg2,hreg1); + + { Emit conditional move that depends on the carry flag } + instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register); + instr.condition := C_AE; + current_asmdata.CurrAsmList.concat(instr); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + end + else + begin + emit_const_reg(A_MOV,opsize,aint(m),hreg3); + emit_reg_reg(A_XOR,opsize,location.register,location.register); + + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + emit_const_reg(A_CMP,opsize,aint(d),hreg1); + + { Emit conditional move that depends on the carry flag } + instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register); + instr.condition := C_AE; + current_asmdata.CurrAsmList.concat(instr); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + end; + + emit_reg_reg(A_ADD,opsize,hreg1,location.register); + end + else + begin + { Convert the division to a multiplication } + calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s); + cg.getcpuregister(current_asmdata.CurrAsmList,rega); + emit_const_reg(A_MOV,opsize,aint(m),rega); + cg.getcpuregister(current_asmdata.CurrAsmList,regd); + emit_reg(A_MUL,opsize,hreg1); + cg.ungetcpuregister(current_asmdata.CurrAsmList,rega); + hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + emit_reg_reg(A_MOV,opsize,hreg1,hreg2); + if m_add then + begin + { addition can overflow, shift first bit considering carry, + then shift remaining bits in regular way. } + cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + emit_reg_reg(A_ADD,opsize,hreg1,regd); + emit_const_reg(A_RCR,opsize,1,regd); + cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS); + dec(s); + end; + if s<>0 then + emit_const_reg(A_SHR,opsize,aint(s),regd); + + if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL } + begin + hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + emit_const_reg(A_MOV,opsize,aint(d),hreg3); + emit_reg_reg(A_IMUL,opsize,hreg3,regd); + end + else + emit_const_reg(A_IMUL,opsize,aint(d),regd); + + emit_reg_reg(A_SUB,opsize,regd,hreg2); + cg.ungetcpuregister(current_asmdata.CurrAsmList,regd); + location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize); + cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,hreg2,location.register) + end; + + end; end else begin +DefaultDiv: {Bring denominator to a register.} cg.getcpuregister(current_asmdata.CurrAsmList,rega); emit_reg_reg(A_MOV,opsize,hreg1,rega); diff --git a/tests/test/cg/tmoddiv3.pp b/tests/test/cg/tmoddiv3.pp new file mode 100644 index 0000000000..bfcd59d9d6 --- /dev/null +++ b/tests/test/cg/tmoddiv3.pp @@ -0,0 +1,103 @@ +program testfile2; + +const + TestValues: array[0..9] of LongWord = (500, 1, 0, 995, $7FFFFFFF, $80000000, $80000001, $80000002, $FFFFFFFF, 1000000); + +const + ExpectedResults: array[0..9,1..16] of LongWord = ( + (0,500,500,0,166,2,0,500,0,500,0,500,0,500,0,500), + (0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1), + (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), + (0,995,995,0,331,2,0,995,0,995,0,995,0,995,0,995), + (2147483,647,2147483647,0,715827882,1,524287,4095,1,0,0,2147483647,0,2147483647,0,2147483647), + (2147483,648,2147483648,0,715827882,2,524288,0,1,1,1,0,0,2147483648,0,2147483648), + (2147483,649,2147483649,0,715827883,0,524288,1,1,2,1,1,1,0,0,2147483649), + (2147483,650,2147483650,0,715827883,1,524288,2,1,3,1,2,1,1,0,2147483650), + (4294967,295,4294967295,0,1431655765,0,1048575,4095,2,1,1,2147483647,1,2147483646,1,0), + (1000,0,1000000,0,333333,1,244,576,0,1000000,0,1000000,0,1000000,0,1000000)); + +var + X, Y, C, Col: LongWord; + +procedure DoCheck; + begin + if Y<>ExpectedResults[C,Col] then + begin + writeln('Error at ',C,' ',Col); + halt(1); + end; + Inc(Col); + end; + +begin + for C := Low(TestValues) to High(TestValues) do + begin + X := TestValues[C]; + Col := 1; + + Y := X div 1000; + Write(Y,','); + DoCheck; + + Y := X mod 1000; + Write(Y,','); + DoCheck; + + Y := X div 1; + Write(Y,','); + DoCheck; + + Y := X mod 1; + Write(Y,','); + DoCheck; + + Y := X div 3; + Write(Y,','); + DoCheck; + + Y := X mod 3; + Write(Y,','); + DoCheck; + + Y := X div $1000; + Write(Y,','); + DoCheck; + + Y := X mod $1000; + Write(Y,','); + DoCheck; + + Y := X div $7FFFFFFF; + Write(Y,','); + DoCheck; + + Y := X mod $7FFFFFFF; + Write(Y,','); + DoCheck; + + Y := X div $80000000; + Write(Y,','); + DoCheck; + + Y := X mod $80000000; + Write(Y,','); + DoCheck; + + Y := X div $80000001; + Write(Y,','); + DoCheck; + + Y := X mod $80000001; + Write(Y,','); + DoCheck; + + Y := X div $FFFFFFFF; + Write(Y,','); + DoCheck; + + Y := X mod $FFFFFFFF; + Writeln(Y); + DoCheck; + end; + writeln('ok'); +end. diff --git a/tests/test/cg/tmoddiv4.pp b/tests/test/cg/tmoddiv4.pp new file mode 100644 index 0000000000..3ad24e46a2 --- /dev/null +++ b/tests/test/cg/tmoddiv4.pp @@ -0,0 +1,110 @@ + +const + TestValues: array[0..10] of QWord = (500, 1, 0, 995, $100000000, $100000001, $7FFFFFFFFFFFFFFF, QWord($8000000000000000), QWord($8000000000000001), QWord($8000000000000002), 1000000); + +const + ExpectedResults: array[0..10,1..18] of QWord = ( + (0,500,500,0,166,2,0,500,0,500,0,500,0,500,0,500,0,500), + (0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1), + (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), + (0,995,995,0,331,2,0,995,0,995,0,995,0,995,0,995,0,995), + (4294967,296,4294967296,0,1431655765,1,1048576,0,2,2,2,0,0,4294967296,0,4294967296,0,4294967296), + (4294967,297,4294967297,0,1431655765,2,1048576,1,2,3,2,1,0,4294967297,0,4294967297,0,4294967297), + (9223372036854775,807,9223372036854775807,0,3074457345618258602,1,2251799813685247,4095,4294967298,1,4294967295,2147483647,1,0,0,9223372036854775807,18446744073709551615,0), + (9223372036854775,808,9223372036854775808,0,3074457345618258602,2,2251799813685248,0,4294967298,2,4294967296,0,1,1,1,0,1,18446744073709551615), + (9223372036854775,809,9223372036854775809,0,3074457345618258603,0,2251799813685248,1,4294967298,3,4294967296,1,1,2,0,9223372036854775809,1,0), + (9223372036854775,810,9223372036854775810,0,3074457345618258603,1,2251799813685248,2,4294967298,4,4294967296,2,1,3,0,9223372036854775810,0,9223372036854775810), + (1000,0,1000000,0,333333,1,244,576,0,1000000,0,1000000,0,1000000,0,1000000,0,1000000)); + +var + X, Y: QWord; + C, Col: LongWord; + +procedure DoCheck; + begin + if Y<>ExpectedResults[C,Col] then + begin + writeln('Error at ',C,' ',Col); + halt(1); + end; + Inc(Col); + end; + +begin + for C := Low(TestValues) to High(TestValues) do + begin + X := TestValues[C]; + Col := 1; + Y := QWord(X) div 1000; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod 1000; + Write(Y,','); + DoCheck; + + Y := QWord(X) div 1; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod 1; + Write(Y,','); + DoCheck; + + Y := QWord(X) div 3; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod 3; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $1000; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $1000; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $7FFFFFFF; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $7FFFFFFF; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $80000000; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $80000000; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $7FFFFFFFFFFFFFFF; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $7FFFFFFFFFFFFFFF; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $8000000000000000; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $8000000000000000; + Write(Y,','); + DoCheck; + + Y := QWord(X) div $8000000000000001; + Write(Y,','); + DoCheck; + + Y := QWord(X) mod $8000000000000001; + Writeln(Y); + DoCheck; + end; +end.