diff --git a/compiler/globtype.pas b/compiler/globtype.pas index 1e2262927b..0c9eafc486 100644 --- a/compiler/globtype.pas +++ b/compiler/globtype.pas @@ -267,7 +267,7 @@ interface type { optimizer } toptimizerswitch = (cs_opt_none, - cs_opt_level1,cs_opt_level2,cs_opt_level3, + cs_opt_level1,cs_opt_level2,cs_opt_level3,cs_opt_level4, cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe, cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse, cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp, @@ -313,7 +313,7 @@ interface const OptimizerSwitchStr : array[toptimizerswitch] of string[17] = ('', - 'LEVEL1','LEVEL2','LEVEL3', + 'LEVEL1','LEVEL2','LEVEL3','LEVEL4', 'REGVAR','UNCERTAIN','SIZE','STACKFRAME', 'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE', 'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP', @@ -345,7 +345,7 @@ interface genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole]; genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc]; genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa]; - genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath]; + genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath]; { whole program optimizations whose information generation requires information from all loaded units diff --git a/compiler/i386/popt386.pas b/compiler/i386/popt386.pas index f485a39e59..b1ccefb71c 100644 --- a/compiler/i386/popt386.pas +++ b/compiler/i386/popt386.pas @@ -2331,22 +2331,47 @@ begin end; case taicpu(p).opcode Of A_CALL: - { don't do this on modern CPUs, this really hurts them due to - broken call/ret pairing } - if (current_settings.optimizecputype < cpu_Pentium2) and - not(cs_create_pic in current_settings.moduleswitches) and - GetNextInstruction(p, hp1) and - (hp1.typ = ait_instruction) and - (taicpu(hp1).opcode = A_JMP) and - ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then - begin - hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol); - InsertLLItem(asml, p.previous, p, hp2); - taicpu(p).opcode := A_JMP; - taicpu(p).is_jmp := true; - asml.remove(hp1); - hp1.free; - end; + begin + { don't do this on modern CPUs, this really hurts them due to + broken call/ret pairing } + if (current_settings.optimizecputype < cpu_Pentium2) and + not(cs_create_pic in current_settings.moduleswitches) and + GetNextInstruction(p, hp1) and + (hp1.typ = ait_instruction) and + (taicpu(hp1).opcode = A_JMP) and + ((taicpu(hp1).oper[0]^.typ=top_ref) and (taicpu(hp1).oper[0]^.ref^.refaddr=addr_full)) then + begin + hp2 := taicpu.Op_sym(A_PUSH,S_L,taicpu(hp1).oper[0]^.ref^.symbol); + InsertLLItem(asml, p.previous, p, hp2); + taicpu(p).opcode := A_JMP; + taicpu(p).is_jmp := true; + asml.remove(hp1); + hp1.free; + end + { replace + call procname + ret + by + jmp procname + + this should never hurt except when pic is used, not sure + how to handle it then + + but do it only on level 4 because it destroys stack back traces + } + else if (cs_opt_level4 in current_settings.optimizerswitches) and + not(cs_create_pic in current_settings.moduleswitches) and + GetNextInstruction(p, hp1) and + (hp1.typ = ait_instruction) and + (taicpu(hp1).opcode = A_RET) and + (taicpu(hp1).ops=0) then + begin + taicpu(p).opcode := A_JMP; + taicpu(p).is_jmp := true; + asml.remove(hp1); + hp1.free; + end; + end; A_CMP: begin if (taicpu(p).oper[0]^.typ = top_const) and