* the optimization of converting a multiplication with a power of two to

a shl is moved from n386add/secondpass to nadd/resulttypepass
This commit is contained in:
Jonas Maebe 2002-07-26 11:17:52 +00:00
parent 185fa10345
commit ed2bdb31a6
2 changed files with 82 additions and 73 deletions

View File

@ -1443,77 +1443,55 @@ interface
{ the location.register will be filled in later (JM) } { the location.register will be filled in later (JM) }
location_reset(location,LOC_REGISTER,OS_INT); location_reset(location,LOC_REGISTER,OS_INT);
{$IfNDef NoShlMul} regstopush := all_registers;
if right.nodetype=ordconstn then remove_non_regvars_from_loc(right.location,regstopush);
swapleftright; remove_non_regvars_from_loc(left.location,regstopush);
If (left.nodetype = ordconstn) and { now, regstopush does NOT contain EAX and/or EDX if they are }
ispowerof2(tordconstnode(left).value, power) and { used in either the left or the right location, excepts if }
not(cs_check_overflow in aktlocalswitches) then {they are regvars. It DOES contain them if they are used in }
Begin { another location (JM) }
{ This release will be moved after the next } if not(R_EAX in rg.unusedregsint) and
{ instruction by the optimizer. No need to } (R_EAX in regstopush) then
{ release left.location, since it's a } begin
{ constant (JM) } emit_reg(A_PUSH,S_L,R_EAX);
location_release(exprasmlist,right.location); popeax:=true;
location.register:=rg.getregisterint(exprasmlist); end;
cg.a_load_loc_reg(exprasmlist,right.location,location.register); if not(R_EDX in rg.unusedregsint) and
cg.a_op_const_reg(exprasmlist,OP_SHL,power,location.register); (R_EDX in regstopush) then
End begin
Else emit_reg(A_PUSH,S_L,R_EDX);
Begin popedx:=true;
{$EndIf NoShlMul} end;
regstopush := all_registers; { left.location can be R_EAX !!! }
remove_non_regvars_from_loc(right.location,regstopush); rg.getexplicitregisterint(exprasmlist,R_EDI);
remove_non_regvars_from_loc(left.location,regstopush); { load the left value }
{ now, regstopush does NOT contain EAX and/or EDX if they are } cg.a_load_loc_reg(exprasmlist,left.location,R_EDI);
{ used in either the left or the right location, excepts if } location_release(exprasmlist,left.location);
{they are regvars. It DOES contain them if they are used in } { allocate EAX }
{ another location (JM) } if R_EAX in rg.unusedregsint then
if not(R_EAX in rg.unusedregsint) and exprasmList.concat(tai_regalloc.Alloc(R_EAX));
(R_EAX in regstopush) then { load he right value }
begin cg.a_load_loc_reg(exprasmlist,right.location,R_EAX);
emit_reg(A_PUSH,S_L,R_EAX); location_release(exprasmlist,right.location);
popeax:=true; { allocate EAX if it isn't yet allocated (JM) }
end; if (R_EAX in rg.unusedregsint) then
if not(R_EDX in rg.unusedregsint) and exprasmList.concat(tai_regalloc.Alloc(R_EAX));
(R_EDX in regstopush) then { also allocate EDX, since it is also modified by }
begin { a mul (JM) }
emit_reg(A_PUSH,S_L,R_EDX); if R_EDX in rg.unusedregsint then
popedx:=true; exprasmList.concat(tai_regalloc.Alloc(R_EDX));
end; emit_reg(A_MUL,S_L,R_EDI);
{ left.location can be R_EAX !!! } rg.ungetregisterint(exprasmlist,R_EDI);
rg.getexplicitregisterint(exprasmlist,R_EDI); if R_EDX in rg.unusedregsint then
{ load the left value } exprasmList.concat(tai_regalloc.DeAlloc(R_EDX));
cg.a_load_loc_reg(exprasmlist,left.location,R_EDI); if R_EAX in rg.unusedregsint then
location_release(exprasmlist,left.location); exprasmList.concat(tai_regalloc.DeAlloc(R_EAX));
{ allocate EAX } location.register:=rg.getregisterint(exprasmlist);
if R_EAX in rg.unusedregsint then emit_reg_reg(A_MOV,S_L,R_EAX,location.register);
exprasmList.concat(tai_regalloc.Alloc(R_EAX)); if popedx then
{ load he right value } emit_reg(A_POP,S_L,R_EDX);
cg.a_load_loc_reg(exprasmlist,right.location,R_EAX); if popeax then
location_release(exprasmlist,right.location); emit_reg(A_POP,S_L,R_EAX);
{ allocate EAX if it isn't yet allocated (JM) }
if (R_EAX in rg.unusedregsint) then
exprasmList.concat(tai_regalloc.Alloc(R_EAX));
{ also allocate EDX, since it is also modified by }
{ a mul (JM) }
if R_EDX in rg.unusedregsint then
exprasmList.concat(tai_regalloc.Alloc(R_EDX));
emit_reg(A_MUL,S_L,R_EDI);
rg.ungetregisterint(exprasmlist,R_EDI);
if R_EDX in rg.unusedregsint then
exprasmList.concat(tai_regalloc.DeAlloc(R_EDX));
if R_EAX in rg.unusedregsint then
exprasmList.concat(tai_regalloc.DeAlloc(R_EAX));
location.register:=rg.getregisterint(exprasmlist);
emit_reg_reg(A_MOV,S_L,R_EAX,location.register);
if popedx then
emit_reg(A_POP,S_L,R_EDX);
if popeax then
emit_reg(A_POP,S_L,R_EAX);
{$IfNDef NoShlMul}
End;
{$endif NoShlMul}
location_freetemp(exprasmlist,left.location); location_freetemp(exprasmlist,left.location);
location_freetemp(exprasmlist,right.location); location_freetemp(exprasmlist,right.location);
exit; exit;
@ -1573,7 +1551,11 @@ begin
end. end.
{ {
$Log$ $Log$
Revision 1.44 2002-07-20 11:58:00 florian Revision 1.45 2002-07-26 11:17:52 jonas
* the optimization of converting a multiplication with a power of two to
a shl is moved from n386add/secondpass to nadd/resulttypepass
Revision 1.44 2002/07/20 11:58:00 florian
* types.pas renamed to defbase.pas because D6 contains a types * types.pas renamed to defbase.pas because D6 contains a types
unit so this would conflicts if D6 programms are compiled unit so this would conflicts if D6 programms are compiled
+ Willamette/SSE2 instructions to assembler added + Willamette/SSE2 instructions to assembler added

View File

@ -605,6 +605,29 @@ implementation
{ if both are orddefs then check sub types } { if both are orddefs then check sub types }
else if (ld.deftype=orddef) and (rd.deftype=orddef) then else if (ld.deftype=orddef) and (rd.deftype=orddef) then
begin begin
{ optimize multiplacation by a power of 2 }
if not(cs_check_overflow in aktlocalswitches) and
(nodetype = muln) and
(((left.nodetype = ordconstn) and
ispowerof2(tordconstnode(left).value,i)) or
((right.nodetype = ordconstn) and
ispowerof2(tordconstnode(right).value,i))) then
begin
if left.nodetype = ordconstn then
begin
tordconstnode(left).value := i;
result := cshlshrnode.create(shln,right,left);
end
else
begin
tordconstnode(right).value := i;
result := cshlshrnode.create(shln,left,right);
end;
left := nil;
right := nil;
exit;
end;
{ 2 booleans? Make them equal to the largest boolean } { 2 booleans? Make them equal to the largest boolean }
if is_boolean(ld) and is_boolean(rd) then if is_boolean(ld) and is_boolean(rd) then
begin begin
@ -1708,7 +1731,11 @@ begin
end. end.
{ {
$Log$ $Log$
Revision 1.57 2002-07-23 13:08:16 jonas Revision 1.58 2002-07-26 11:17:52 jonas
* the optimization of converting a multiplication with a power of two to
a shl is moved from n386add/secondpass to nadd/resulttypepass
Revision 1.57 2002/07/23 13:08:16 jonas
* fixed constant set evaluation of new set handling for non-commutative * fixed constant set evaluation of new set handling for non-commutative
operators operators