From 6dfe323e6fc26fa7cc35710bf3e369be33b4d025 Mon Sep 17 00:00:00 2001 From: "J. Gareth \"Curious Kit\" Moreton" Date: Mon, 15 Apr 2024 23:06:20 +0100 Subject: [PATCH] - AND and DeepMovOpt optimisations upgraded. --- compiler/x86/aoptx86.pas | 731 +++++++++++++++++++++------------------ 1 file changed, 386 insertions(+), 345 deletions(-) diff --git a/compiler/x86/aoptx86.pas b/compiler/x86/aoptx86.pas index 5961599014..88687e6f61 100644 --- a/compiler/x86/aoptx86.pas +++ b/compiler/x86/aoptx86.pas @@ -3225,24 +3225,19 @@ unit aoptx86; begin Result:=false; - GetNextInstruction_p:=GetNextInstruction(p, hp1); - { remove mov reg1,reg1? } if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) then begin DebugMsg(SPeepholeOptimization + 'Mov2Nop 1 done',p); { take care of the register (de)allocs following p } - RemoveCurrentP(p, hp1); - Result:=true; + RemoveCurrentP(p); + Result := True; exit; end; - { All the next optimisations require a next instruction } - if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then - Exit; - { Prevent compiler warnings } + p_SourceReg := NR_NO; p_TargetReg := NR_NO; if taicpu(p).oper[1]^.typ = top_reg then @@ -3250,110 +3245,405 @@ unit aoptx86; { Saves on a large number of dereferences } p_TargetReg := taicpu(p).oper[1]^.reg; - { Look for: - mov %reg1,%reg2 - ??? %reg2,r/m - Change to: - mov %reg1,%reg2 - ??? %reg1,r/m - } - if taicpu(p).oper[0]^.typ = top_reg then + if NotFirstIteration and (cs_opt_level3 in current_settings.optimizerswitches) then + GetNextInstruction_p := GetNextInstructionUsingReg(p, hp1, p_TargetReg) + else + GetNextInstruction_p := GetNextInstruction(p, hp1); + + if GetNextInstruction_p and (hp1.typ = ait_instruction) then begin - if RegReadByInstruction(p_TargetReg, hp1) and - DeepMOVOpt(taicpu(p), taicpu(hp1)) then + if (taicpu(hp1).opcode = A_AND) and + MatchOpType(taicpu(hp1),top_const,top_reg) then begin - { A change has occurred, just not in p } - Result := True; - - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); - - if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and - { Just in case something didn't get modified (e.g. an - implicit register) } - not RegReadByInstruction(p_TargetReg, hp1) then + if MatchOperand(taicpu(hp1).oper[1]^, p_TargetReg) then begin - { We can remove the original MOV } - DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p); - RemoveCurrentp(p, hp1); - - { UsedRegs got updated by RemoveCurrentp } - Result := True; - Exit; - end; - - { If we know a MOV instruction has become a null operation, we might as well - get rid of it now to save time. } - if (taicpu(hp1).opcode = A_MOV) and - (taicpu(hp1).oper[1]^.typ = top_reg) and - SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and - { Just being a register is enough to confirm it's a null operation } - (taicpu(hp1).oper[0]^.typ = top_reg) then - begin - - Result := True; - - { Speed-up to reduce a pipeline stall... if we had something like... - - movl %eax,%edx - movw %dx,%ax - - ... the second instruction would change to movw %ax,%ax, but - given that it is now %ax that's active rather than %eax, - penalties might occur due to a partial register write, so instead, - change it to a MOVZX instruction when optimising for speed. - } - if not (cs_opt_size in current_settings.optimizerswitches) and - IsMOVZXAcceptable and - (taicpu(hp1).opsize < taicpu(p).opsize) + case taicpu(p).opsize of + S_L: + if (taicpu(hp1).oper[0]^.val = $ffffffff) then + begin + { Optimize out: + mov x, %reg + and ffffffffh, %reg + } + DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p); + RemoveInstruction(hp1); + Result:=true; + exit; + end; + S_Q: { TODO: Confirm if this is even possible } + if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then + begin + { Optimize out: + mov x, %reg + and ffffffffffffffffh, %reg + } + DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p); + RemoveInstruction(hp1); + Result:=true; + exit; + end; + else + ; + end; + if ( + { Make sure that if a reference is used, its registers + are not modified in between } + ( + (taicpu(p).oper[0]^.typ = top_reg) and + not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1) + ) or + ( + (taicpu(p).oper[0]^.typ = top_ref) and + (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) and + not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) + ) + ) and + GetNextInstruction(hp1,hp2) and + MatchInstruction(hp2,A_TEST,[]) and + ( + MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) or + ( + { If the register being tested is smaller than the one + that received a bitwise AND, permit it if the constant + fits into the smaller size } + (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and + SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) and + (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.val >= 0) and + (GetSubReg(taicpu(hp2).oper[1]^.reg) < GetSubReg(taicpu(hp1).oper[1]^.reg)) and + ( + ( + (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBL) and + (taicpu(hp1).oper[0]^.val <= $FF) + ) or + ( + (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBW) and + (taicpu(hp1).oper[0]^.val <= $FFFF) {$ifdef x86_64} - { operations already implicitly set the upper 64 bits to zero } - and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q)) + ) or + ( + (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBD) and + (taicpu(hp1).oper[0]^.val <= $FFFFFFFF) {$endif x86_64} - then + ) + ) + ) + ) and + ( + MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or + MatchOperand(taicpu(hp2).oper[0]^,-1) + ) and + GetNextInstruction(hp2,hp3) and + MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and + (taicpu(hp3).condition in [C_E,C_NE]) then begin - DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1); - case taicpu(p).opsize of - S_W: - if taicpu(hp1).opsize = S_B then - taicpu(hp1).opsize := S_BL - else - InternalError(2020012911); - S_L{$ifdef x86_64}, S_Q{$endif x86_64}: - case taicpu(hp1).opsize of - S_B: - taicpu(hp1).opsize := S_BL; - S_W: - taicpu(hp1).opsize := S_WL; + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1); + UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); + if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then + begin + DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p); + taicpu(hp1).loadoper(1,taicpu(p).oper[0]^); + taicpu(hp1).opcode:=A_TEST; + + { Shrink the TEST instruction down to the smallest possible size } + case taicpu(hp1).oper[0]^.val of + 0..255: + if (taicpu(hp1).opsize <> S_B) +{$ifndef x86_64} + and ( + (taicpu(hp1).oper[1]^.typ <> top_reg) or + { Cannot encode byte-sized ESI, EDI, EBP or ESP under i386 } + (GetSupReg(taicpu(hp1).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) + ) +{$endif x86_64} + then + begin + if taicpu(hp1).opsize <> taicpu(hp2).opsize then + { Only print debug message if the TEST instruction + is a different size before and after } + DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testb to reduce instruction size (Test2Test 1a)' , p); + + taicpu(hp1).opsize := S_B; + if (taicpu(hp1).oper[1]^.typ = top_reg) then + setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBL); + end; + 256..65535: + if (taicpu(hp1).opsize <> S_W) then + begin + if taicpu(hp1).opsize <> taicpu(hp2).opsize then + { Only print debug message if the TEST instruction + is a different size before and after } + DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testw to reduce instruction size (Test2Test 1b)' , p); + + taicpu(hp1).opsize := S_W; + if (taicpu(hp1).oper[1]^.typ = top_reg) then + setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBW); + end; +{$ifdef x86_64} + 65536..$7FFFFFFF: + if (taicpu(hp1).opsize <> S_L) then + begin + if taicpu(hp1).opsize <> taicpu(hp2).opsize then + { Only print debug message if the TEST instruction + is a different size before and after } + DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testl to reduce instruction size (Test2Test 1c)' , p); + + taicpu(hp1).opsize := S_L; + if (taicpu(hp1).oper[1]^.typ = top_reg) then + setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD); + end; +{$endif x86_64} else - InternalError(2020012912); + ; end; - else - InternalError(2020012910); - end; - taicpu(hp1).opcode := A_MOVZX; - setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD); - end - else - begin - GetNextInstruction_p := GetNextInstruction(hp1, hp2); - DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1); - RemoveInstruction(hp1); - - { The instruction after what was hp1 is now the immediate next instruction, - so we can continue to make optimisations if it's present } - if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then - Exit; - - hp1 := hp2; + RemoveInstruction(hp2); + RemoveCurrentP(p); + Result:=true; + exit; + end; end; end; + if IsMOVZXAcceptable and + (taicpu(hp1).oper[1]^.typ = top_reg) and + (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) } + (getsupreg(p_TargetReg) = getsupreg(taicpu(hp1).oper[1]^.reg)) + then + begin + InputVal := debug_operstr(taicpu(p).oper[0]^); + MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val); + + case taicpu(p).opsize of + S_B: + if (taicpu(hp1).oper[0]^.val = $ff) then + begin + { Convert: + movb x, %regl movb x, %regl + andw ffh, %regw andl ffh, %regd + To: + movzbw x, %regd movzbl x, %regd + + (Identical registers, just different sizes) + } + RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name } + RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name } + + case taicpu(hp1).opsize of + S_W: NewSize := S_BW; + S_L: NewSize := S_BL; +{$ifdef x86_64} + S_Q: NewSize := S_BQ; +{$endif x86_64} + else + InternalError(2018011510); + end; + end + else + NewSize := S_NO; + S_W: + if (taicpu(hp1).oper[0]^.val = $ffff) then + begin + { Convert: + movw x, %regw + andl ffffh, %regd + To: + movzwl x, %regd + + (Identical registers, just different sizes) + } + RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name } + RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name } + + case taicpu(hp1).opsize of + S_L: NewSize := S_WL; +{$ifdef x86_64} + S_Q: NewSize := S_WQ; +{$endif x86_64} + else + InternalError(2018011511); + end; + end + else + NewSize := S_NO; + else + NewSize := S_NO; + end; + + if NewSize <> S_NO then + begin + PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1; + + { The actual optimization } + taicpu(p).opcode := A_MOVZX; + taicpu(p).changeopsize(NewSize); + taicpu(p).loadoper(1, taicpu(hp1).oper[1]^); + { Make sure we deal with any reference counts that were increased } + if taicpu(hp1).oper[1]^.typ = top_ref then + begin + if Assigned(taicpu(hp1).oper[1]^.ref^.symbol) then + taicpu(hp1).oper[1]^.ref^.symbol.decrefs; + if Assigned(taicpu(hp1).oper[1]^.ref^.relsymbol) then + taicpu(hp1).oper[1]^.ref^.relsymbol.decrefs; + end; + + { Safeguard if "and" is followed by a conditional command } + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegsBetween(TmpUsedRegs, tai(p.next), hp1); + + if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then + begin + { At this point, the "and" command is effectively equivalent to + "test %reg,%reg". This will be handled separately by the + Peephole Optimizer. [Kit] } + + DebugMsg(SPeepholeOptimization + PreMessage + + ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p); + end + else + begin + DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 + + ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p); + + RemoveInstruction(hp1); + end; + + Result := True; + Exit; + + end; + end; + end; + + { Ensure nothing in between p and hp1 would make optimisations + invalid, such as the source register being modified } + if ( + (taicpu(p).oper[0]^.typ = top_reg) and + not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1) + ) or + ( + (taicpu(p).oper[0]^.typ = top_ref) and + (taicpu(p).oper[0]^.ref^.refaddr <> addr_full) and + { Make sure we check separately whether p_TargetReg is used as part of the reference when required } + not RefModifiedBetween(taicpu(p).oper[0]^.ref^, topsize2memsize[taicpu(p).opsize] shr 3, p, hp1) + ) or + (taicpu(p).oper[0]^.typ = top_const) then + begin + + if taicpu(p).oper[0]^.typ = top_reg then + begin + p_SourceReg := taicpu(p).oper[0]^.reg; + + { Look for: + mov %reg1,%reg2 + ??? %reg2,r/m + Change to: + mov %reg1,%reg2 + ??? %reg1,r/m + } + if RegReadByInstruction(p_TargetReg, hp1) and + not RegModifiedBetween(p_SourceReg, p, hp1) and + DeepMOVOpt(taicpu(p), taicpu(hp1)) then + begin + { A change has occurred, just not in p } + Include(OptsToCheck, aoc_ForceNewIteration); + + TransferUsedRegs(TmpUsedRegs); + UpdateUsedRegsBetween(TmpUsedRegs, tai(p.Next), hp1); + + if not RegUsedAfterInstruction(p_TargetReg, hp1, TmpUsedRegs) and + { Just in case something didn't get modified (e.g. an + implicit register) } + not RegReadByInstruction(p_TargetReg, hp1) then + begin + { We can remove the original MOV } + DebugMsg(SPeepholeOptimization + 'Mov2Nop 3 done',p); + RemoveCurrentP(p); + + { UsedRegs got updated by RemoveCurrentp } + Result := True; + Exit; + end; + + { If we know a MOV instruction has become a null operation, we might as well + get rid of it now to save time. } + if (taicpu(hp1).opcode = A_MOV) and + (taicpu(hp1).oper[1]^.typ = top_reg) and + SuperRegistersEqual(taicpu(hp1).oper[1]^.reg, taicpu(p).oper[0]^.reg) and + { Just being a register is enough to confirm it's a null operation } + (taicpu(hp1).oper[0]^.typ = top_reg) then + begin + + Result := True; + + { Speed-up to reduce a pipeline stall... if we had something like... + + movl %eax,%edx + movw %dx,%ax + + ... the second instruction would change to movw %ax,%ax, but + given that it is now %ax that's active rather than %eax, + penalties might occur due to a partial register write, so instead, + change it to a MOVZX instruction when optimising for speed. + } + if not (cs_opt_size in current_settings.optimizerswitches) and + IsMOVZXAcceptable and + (taicpu(hp1).opsize < taicpu(p).opsize) +{$ifdef x86_64} + { operations already implicitly set the upper 64 bits to zero } + and not ((taicpu(hp1).opsize = S_L) and (taicpu(p).opsize = S_Q)) +{$endif x86_64} + then + begin + DebugMsg(SPeepholeOptimization + 'Zero-extension to minimise pipeline stall (Mov2Movz)',hp1); + case taicpu(p).opsize of + S_W: + if taicpu(hp1).opsize = S_B then + taicpu(hp1).opsize := S_BL + else + InternalError(2020012911); + S_L{$ifdef x86_64}, S_Q{$endif x86_64}: + case taicpu(hp1).opsize of + S_B: + taicpu(hp1).opsize := S_BL; + S_W: + taicpu(hp1).opsize := S_WL; + else + InternalError(2020012912); + end; + else + InternalError(2020012910); + end; + + taicpu(hp1).opcode := A_MOVZX; + setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD); + end + else + begin + GetNextInstruction_p := GetNextInstruction(hp1, hp2); + DebugMsg(SPeepholeOptimization + 'Mov2Nop 4 done',hp1); + RemoveInstruction(hp1); + + { The instruction after what was hp1 is now the immediate next instruction, + so we can continue to make optimisations if it's present } + if not GetNextInstruction_p or (hp2.typ <> ait_instruction) then + Exit; + + hp1 := hp2; + end; + end; + end; + end; end; end; end; + GetNextInstruction_p:=GetNextInstruction(p, hp1); + + { All the next optimisations require a next instruction } + if not GetNextInstruction_p or (hp1.typ <> ait_instruction) then + Exit; + { Depending on the DeepMOVOpt above, it may turn out that hp1 completely overwrites the original destination register. e.g. @@ -3478,255 +3768,6 @@ unit aoptx86; end; end; - if (taicpu(hp1).opcode = A_AND) and - (taicpu(p).oper[1]^.typ = top_reg) and - MatchOpType(taicpu(hp1),top_const,top_reg) then - begin - if MatchOperand(taicpu(p).oper[1]^,taicpu(hp1).oper[1]^) then - begin - case taicpu(p).opsize of - S_L: - if (taicpu(hp1).oper[0]^.val = $ffffffff) then - begin - { Optimize out: - mov x, %reg - and ffffffffh, %reg - } - DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 1 done',p); - RemoveInstruction(hp1); - Result:=true; - exit; - end; - S_Q: { TODO: Confirm if this is even possible } - if (taicpu(hp1).oper[0]^.val = $ffffffffffffffff) then - begin - { Optimize out: - mov x, %reg - and ffffffffffffffffh, %reg - } - DebugMsg(SPeepholeOptimization + 'MovAnd2Mov 2 done',p); - RemoveInstruction(hp1); - Result:=true; - exit; - end; - else - ; - end; - if ( - (taicpu(p).oper[0]^.typ=top_reg) or - ( - (taicpu(p).oper[0]^.typ=top_ref) and - (taicpu(p).oper[0]^.ref^.refaddr<>addr_full) - ) - ) and - GetNextInstruction(hp1,hp2) and - MatchInstruction(hp2,A_TEST,[]) and - ( - MatchOperand(taicpu(hp1).oper[1]^,taicpu(hp2).oper[1]^) or - ( - { If the register being tested is smaller than the one - that received a bitwise AND, permit it if the constant - fits into the smaller size } - (taicpu(hp1).oper[1]^.typ = top_reg) and (taicpu(hp2).oper[1]^.typ = top_reg) and - SuperRegistersEqual(taicpu(hp1).oper[1]^.reg,taicpu(hp2).oper[1]^.reg) and - (taicpu(hp1).oper[0]^.typ = top_const) and (taicpu(hp1).oper[0]^.val >= 0) and - (GetSubReg(taicpu(hp2).oper[1]^.reg) < GetSubReg(taicpu(hp1).oper[1]^.reg)) and - ( - ( - (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBL) and - (taicpu(hp1).oper[0]^.val <= $FF) - ) or - ( - (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBW) and - (taicpu(hp1).oper[0]^.val <= $FFFF) -{$ifdef x86_64} - ) or - ( - (GetSubReg(taicpu(hp2).oper[1]^.reg) = R_SUBD) and - (taicpu(hp1).oper[0]^.val <= $FFFFFFFF) -{$endif x86_64} - ) - ) - ) - ) and - ( - MatchOperand(taicpu(hp2).oper[0]^,taicpu(hp2).oper[1]^) or - MatchOperand(taicpu(hp2).oper[0]^,-1) - ) and - GetNextInstruction(hp2,hp3) and - MatchInstruction(hp3,A_Jcc,A_Setcc,[]) and - (taicpu(hp3).condition in [C_E,C_NE]) then - begin - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs, tai(p.Next)); - UpdateUsedRegs(TmpUsedRegs, tai(hp1.Next)); - if not(RegUsedAfterInstruction(taicpu(hp2).oper[1]^.reg, hp2, TmpUsedRegs)) then - begin - DebugMsg(SPeepholeOptimization + 'MovAndTest2Test done',p); - taicpu(hp1).loadoper(1,taicpu(p).oper[0]^); - taicpu(hp1).opcode:=A_TEST; - - { Shrink the TEST instruction down to the smallest possible size } - case taicpu(hp1).oper[0]^.val of - 0..255: - if (taicpu(hp1).opsize <> S_B) -{$ifndef x86_64} - and ( - (taicpu(hp1).oper[1]^.typ <> top_reg) or - { Cannot encode byte-sized ESI, EDI, EBP or ESP under i386 } - (GetSupReg(taicpu(hp1).oper[1]^.reg) in [RS_EAX, RS_EBX, RS_ECX, RS_EDX]) - ) -{$endif x86_64} - then - begin - if taicpu(hp1).opsize <> taicpu(hp2).opsize then - { Only print debug message if the TEST instruction - is a different size before and after } - DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testb to reduce instruction size (Test2Test 1a)' , p); - - taicpu(hp1).opsize := S_B; - if (taicpu(hp1).oper[1]^.typ = top_reg) then - setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBL); - end; - 256..65535: - if (taicpu(hp1).opsize <> S_W) then - begin - if taicpu(hp1).opsize <> taicpu(hp2).opsize then - { Only print debug message if the TEST instruction - is a different size before and after } - DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testw to reduce instruction size (Test2Test 1b)' , p); - - taicpu(hp1).opsize := S_W; - if (taicpu(hp1).oper[1]^.typ = top_reg) then - setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBW); - end; -{$ifdef x86_64} - 65536..$7FFFFFFF: - if (taicpu(hp1).opsize <> S_L) then - begin - if taicpu(hp1).opsize <> taicpu(hp2).opsize then - { Only print debug message if the TEST instruction - is a different size before and after } - DebugMsg(SPeepholeOptimization + 'test' + debug_opsize2str(taicpu(hp1).opsize) + ' -> testl to reduce instruction size (Test2Test 1c)' , p); - - taicpu(hp1).opsize := S_L; - if (taicpu(hp1).oper[1]^.typ = top_reg) then - setsubreg(taicpu(hp1).oper[1]^.reg, R_SUBD); - end; -{$endif x86_64} - else - ; - end; - - RemoveInstruction(hp2); - RemoveCurrentP(p, hp1); - Result:=true; - exit; - end; - end; - end - else if IsMOVZXAcceptable and - (taicpu(p).oper[1]^.typ = top_reg) and (taicpu(hp1).oper[1]^.typ = top_reg) and - (taicpu(p).oper[0]^.typ <> top_const) and { MOVZX only supports registers and memory, not immediates (use MOV for that!) } - (getsupreg(taicpu(p).oper[1]^.reg) = getsupreg(taicpu(hp1).oper[1]^.reg)) - then - begin - InputVal := debug_operstr(taicpu(p).oper[0]^); - MaskNum := debug_tostr(taicpu(hp1).oper[0]^.val); - - case taicpu(p).opsize of - S_B: - if (taicpu(hp1).oper[0]^.val = $ff) then - begin - { Convert: - movb x, %regl movb x, %regl - andw ffh, %regw andl ffh, %regd - To: - movzbw x, %regd movzbl x, %regd - - (Identical registers, just different sizes) - } - RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 8-bit register name } - RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 16/32-bit register name } - - case taicpu(hp1).opsize of - S_W: NewSize := S_BW; - S_L: NewSize := S_BL; -{$ifdef x86_64} - S_Q: NewSize := S_BQ; -{$endif x86_64} - else - InternalError(2018011510); - end; - end - else - NewSize := S_NO; - S_W: - if (taicpu(hp1).oper[0]^.val = $ffff) then - begin - { Convert: - movw x, %regw - andl ffffh, %regd - To: - movzwl x, %regd - - (Identical registers, just different sizes) - } - RegName1 := debug_regname(taicpu(p).oper[1]^.reg); { 16-bit register name } - RegName2 := debug_regname(taicpu(hp1).oper[1]^.reg); { 32-bit register name } - - case taicpu(hp1).opsize of - S_L: NewSize := S_WL; -{$ifdef x86_64} - S_Q: NewSize := S_WQ; -{$endif x86_64} - else - InternalError(2018011511); - end; - end - else - NewSize := S_NO; - else - NewSize := S_NO; - end; - - if NewSize <> S_NO then - begin - PreMessage := 'mov' + debug_opsize2str(taicpu(p).opsize) + ' ' + InputVal + ',' + RegName1; - - { The actual optimization } - taicpu(p).opcode := A_MOVZX; - taicpu(p).changeopsize(NewSize); - taicpu(p).oper[1]^ := taicpu(hp1).oper[1]^; - - { Safeguard if "and" is followed by a conditional command } - TransferUsedRegs(TmpUsedRegs); - UpdateUsedRegs(TmpUsedRegs,tai(p.next)); - - if (RegUsedAfterInstruction(NR_DEFAULTFLAGS, hp1, TmpUsedRegs)) then - begin - { At this point, the "and" command is effectively equivalent to - "test %reg,%reg". This will be handled separately by the - Peephole Optimizer. [Kit] } - - DebugMsg(SPeepholeOptimization + PreMessage + - ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p); - end - else - begin - DebugMsg(SPeepholeOptimization + PreMessage + '; and' + debug_opsize2str(taicpu(hp1).opsize) + ' $' + MaskNum + ',' + RegName2 + - ' -> movz' + debug_opsize2str(NewSize) + ' ' + InputVal + ',' + RegName2, p); - - RemoveInstruction(hp1); - end; - - Result := True; - Exit; - - end; - end; - end; - if (taicpu(hp1).opcode = A_OR) and (taicpu(p).oper[1]^.typ = top_reg) and MatchOperand(taicpu(p).oper[0]^, 0) and