mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-09-04 18:10:26 +02:00
* use add/sub instead of inc/dec on modern CPUs when optimizing for speed
git-svn-id: trunk@25057 -
This commit is contained in:
parent
af1d33767d
commit
507edb16de
@ -41,7 +41,8 @@ uses
|
||||
{$ifdef finaldestdebug}
|
||||
cobjects,
|
||||
{$endif finaldestdebug}
|
||||
cpuinfo,cpubase,cgutils,daopt386;
|
||||
cpuinfo,cpubase,cgutils,daopt386,
|
||||
cgx86;
|
||||
|
||||
|
||||
function isFoldableArithOp(hp1: taicpu; reg: tregister): boolean;
|
||||
@ -960,13 +961,13 @@ begin
|
||||
if (base = taicpu(p).oper[1]^.reg) then
|
||||
begin
|
||||
l := offset;
|
||||
if (l=1) then
|
||||
if (l=1) and UseIncDec then
|
||||
begin
|
||||
taicpu(p).opcode := A_INC;
|
||||
taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
|
||||
taicpu(p).ops := 1
|
||||
end
|
||||
else if (l=-1) then
|
||||
else if (l=-1) and UseIncDec then
|
||||
begin
|
||||
taicpu(p).opcode := A_DEC;
|
||||
taicpu(p).loadreg(0,taicpu(p).oper[1]^.reg);
|
||||
@ -2121,6 +2122,8 @@ begin
|
||||
end;
|
||||
case taicpu(p).opcode Of
|
||||
A_CALL:
|
||||
{ don't do this on modern CPUs, this really hurts them due to
|
||||
broken call/ret pairing }
|
||||
if (current_settings.optimizecputype < cpu_Pentium2) and
|
||||
not(cs_create_pic in current_settings.moduleswitches) and
|
||||
GetNextInstruction(p, hp1) and
|
||||
|
@ -167,6 +167,8 @@ unit cgx86;
|
||||
|
||||
function UseAVX: boolean;
|
||||
|
||||
function UseIncDec: boolean;
|
||||
|
||||
implementation
|
||||
|
||||
uses
|
||||
@ -180,6 +182,21 @@ unit cgx86;
|
||||
Result:=current_settings.fputype in fpu_avx_instructionsets;
|
||||
end;
|
||||
|
||||
|
||||
{ modern CPUs prefer add/sub over inc/dec because add/sub break instructions dependencies on flags
|
||||
because they modify all flags }
|
||||
function UseIncDec: boolean;
|
||||
begin
|
||||
{$if defined(x86_64)}
|
||||
Result:=cs_opt_size in current_settings.optimizerswitches;
|
||||
{$elseif defined(i386)}
|
||||
Result:=(cs_opt_size in current_settings.optimizerswitches) or (current_settings.cputype in [cpu_386]);
|
||||
{$elseif defined(i8086)}
|
||||
Result:=(cs_opt_size in current_settings.optimizerswitches) or (current_settings.cputype in [cpu_8086..cpu_386]);
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
|
||||
const
|
||||
TOpCG2AsmOp: Array[topcg] of TAsmOp = (A_NONE,A_MOV,A_ADD,A_AND,A_DIV,
|
||||
A_IDIV,A_IMUL,A_MUL,A_NEG,A_NOT,A_OR,
|
||||
@ -1596,11 +1613,14 @@ unit cgx86;
|
||||
OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
|
||||
if not(cs_check_overflow in current_settings.localswitches) and
|
||||
(a = 1) and
|
||||
(op in [OP_ADD,OP_SUB]) then
|
||||
if op = OP_ADD then
|
||||
list.concat(taicpu.op_reg(A_INC,TCgSize2OpSize[size],reg))
|
||||
else
|
||||
list.concat(taicpu.op_reg(A_DEC,TCgSize2OpSize[size],reg))
|
||||
(op in [OP_ADD,OP_SUB]) and
|
||||
UseIncDec then
|
||||
begin
|
||||
if op = OP_ADD then
|
||||
list.concat(taicpu.op_reg(A_INC,TCgSize2OpSize[size],reg))
|
||||
else
|
||||
list.concat(taicpu.op_reg(A_DEC,TCgSize2OpSize[size],reg))
|
||||
end
|
||||
else if (a = 0) then
|
||||
if (op <> OP_AND) then
|
||||
exit
|
||||
@ -1727,11 +1747,14 @@ unit cgx86;
|
||||
OP_ADD, OP_AND, OP_OR, OP_SUB, OP_XOR:
|
||||
if not(cs_check_overflow in current_settings.localswitches) and
|
||||
(a = 1) and
|
||||
(op in [OP_ADD,OP_SUB]) then
|
||||
if op = OP_ADD then
|
||||
list.concat(taicpu.op_ref(A_INC,TCgSize2OpSize[size],tmpref))
|
||||
else
|
||||
list.concat(taicpu.op_ref(A_DEC,TCgSize2OpSize[size],tmpref))
|
||||
(op in [OP_ADD,OP_SUB]) and
|
||||
UseIncDec then
|
||||
begin
|
||||
if op = OP_ADD then
|
||||
list.concat(taicpu.op_ref(A_INC,TCgSize2OpSize[size],tmpref))
|
||||
else
|
||||
list.concat(taicpu.op_ref(A_DEC,TCgSize2OpSize[size],tmpref))
|
||||
end
|
||||
else if (a = 0) then
|
||||
if (op <> OP_AND) then
|
||||
exit
|
||||
@ -2371,7 +2394,10 @@ unit cgx86;
|
||||
a_label(list,again);
|
||||
decrease_sp(winstackpagesize-4);
|
||||
list.concat(Taicpu.op_reg(A_PUSH,S_L,NR_EAX));
|
||||
list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI));
|
||||
if UseIncDec then
|
||||
list.concat(Taicpu.op_reg(A_DEC,S_L,NR_EDI))
|
||||
else
|
||||
list.concat(Taicpu.op_const_reg(A_SUB,S_L,1,NR_EDI));
|
||||
a_jmp_cond(list,OC_NE,again);
|
||||
decrease_sp(localsize mod winstackpagesize-4);
|
||||
reference_reset_base(href,NR_ESP,localsize-4,4);
|
||||
@ -2409,7 +2435,10 @@ unit cgx86;
|
||||
decrease_sp(winstackpagesize);
|
||||
reference_reset_base(href,NR_RSP,0,4);
|
||||
list.concat(Taicpu.op_reg_ref(A_MOV,S_L,NR_EAX,href));
|
||||
list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10));
|
||||
if UseIncDec then
|
||||
list.concat(Taicpu.op_reg(A_DEC,S_Q,NR_R10))
|
||||
else
|
||||
list.concat(Taicpu.op_const_reg(A_SUB,S_Q,1,NR_R10));
|
||||
a_jmp_cond(list,OC_NE,again);
|
||||
decrease_sp(localsize mod winstackpagesize);
|
||||
ungetcpuregister(list,NR_R10);
|
||||
|
@ -143,7 +143,8 @@ unit nx86add;
|
||||
if (op=A_SUB) and
|
||||
(right.location.loc=LOC_CONSTANT) and
|
||||
(right.location.value=1) and
|
||||
not(cs_check_overflow in current_settings.localswitches) then
|
||||
not(cs_check_overflow in current_settings.localswitches) and
|
||||
UseIncDec then
|
||||
begin
|
||||
emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
|
||||
end
|
||||
|
Loading…
Reference in New Issue
Block a user