diff --git a/compiler/arm/aasmcpu.pas b/compiler/arm/aasmcpu.pas index 2a4c38e34c..2932d6d6b3 100644 --- a/compiler/arm/aasmcpu.pas +++ b/compiler/arm/aasmcpu.pas @@ -270,7 +270,7 @@ uses implementation uses - cutils,rgobj,itcpugas; + cutils,rgobj,itcpugas,aoptcpu; procedure taicpu.loadshifterop(opidx:longint;const so:tshifterop); @@ -1098,13 +1098,110 @@ implementation end; end; + + function getMergedInstruction(FirstOp,LastOp:TAsmOp;InvertLast:boolean) : TAsmOp; + const + opTable: array[A_IT..A_ITTTT] of string = + ('T','TE','TT','TEE','TTE','TET','TTT', + 'TEEE','TTEE','TETE','TTTE', + 'TEET','TTET','TETT','TTTT'); + invertedOpTable: array[A_IT..A_ITTTT] of string = + ('E','ET','EE','ETT','EET','ETE','EEE', + 'ETTT','EETT','ETET','EEET', + 'ETTE','EETE','ETEE','EEEE'); + var + resStr : string; + i : TAsmOp; + begin + if InvertLast then + resStr := opTable[FirstOp]+invertedOpTable[LastOp] + else + resStr := opTable[FirstOp]+opTable[LastOp]; + if length(resStr) > 4 then + internalerror(2012100805); + + for i := low(opTable) to high(opTable) do + if opTable[i] = resStr then + exit(i); + + internalerror(2012100806); + end; + + procedure foldITInstructions(list: TAsmList); + var + curtai,hp1 : tai; + levels,i : LongInt; + begin + curtai:=tai(list.First); + while assigned(curtai) do + begin + case curtai.typ of + ait_instruction: + if IsIT(taicpu(curtai).opcode) then + begin + levels := GetITLevels(taicpu(curtai).opcode); + if levels < 4 then + begin + i:=levels; + hp1:=tai(curtai.Next); + while assigned(hp1) and + (i > 0) do + begin + if hp1.typ=ait_instruction then + begin + dec(i); + if (i = 0) and + mustbelast(hp1) then + begin + hp1:=nil; + break; + end; + end; + hp1:=tai(hp1.Next); + end; + + if assigned(hp1) then + begin + // We are pointing at the first instruction after the IT block + while assigned(hp1) and + (hp1.typ<>ait_instruction) do + hp1:=tai(hp1.Next); + + if assigned(hp1) and + (hp1.typ=ait_instruction) and + IsIT(taicpu(hp1).opcode) then + begin + if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and + ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or + (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then + begin + taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode, + taicpu(hp1).opcode, + taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc)); + + list.Remove(hp1); + hp1.Free; + end; + end; + end; + end; + end; + end; + + curtai:=tai(curtai.Next); + end; + end; + procedure finalizearmcode(list, listtoinsert: TAsmList); begin - insertpcrelativedata(list, listtoinsert); - { Do Thumb-2 16bit -> 32bit transformations } if current_settings.cputype in cpu_thumb2 then - ensurethumb2encodings(list); + begin + ensurethumb2encodings(list); + foldITInstructions(list); + end; + + insertpcrelativedata(list, listtoinsert); end; procedure InsertPData; diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas index 31adec9816..14eed494ba 100644 --- a/compiler/arm/aoptcpu.pas +++ b/compiler/arm/aoptcpu.pas @@ -66,6 +66,8 @@ Type procedure PeepHoleOptPass2;override; End; + function MustBeLast(p : tai) : boolean; + Implementation uses @@ -1199,9 +1201,16 @@ Implementation p:=hp1; end; end; - A_UXTB, - A_SXTB: + A_UXTB: begin + { + change + uxtb reg2,reg1 + strb reg2,[...] + dealloc reg2 + to + strb reg1,[...] + } if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and @@ -1211,15 +1220,37 @@ Implementation { reg1 might not be modified inbetween } not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then begin - DebugMsg('Peephole xXTBStrb2Strb done', p); + DebugMsg('Peephole UxtbStrb2Strb done', p); taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg); asml.remove(p); p.free; p:=hp1; + end + { + change + uxtb reg2,reg1 + uxth reg3,reg2 + dealloc reg2 + to + uxtb reg3,reg1 + } + else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and + GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and + MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and + (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or + (taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg)) and + { reg1 might not be modified inbetween } + not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then + begin + DebugMsg('Peephole UxtbUxth2Uxtb done', p); + taicpu(hp1).opcode:=A_UXTB; + taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg); + asml.remove(p); + p.free; + p:=hp1; end; end; - A_UXTH, - A_SXTH: + A_UXTH: begin if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and @@ -1230,7 +1261,7 @@ Implementation { reg1 might not be modified inbetween } not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then begin - DebugMsg('Peephole xXTBStrh2Strh done', p); + DebugMsg('Peephole UXTHStrh2Strh done', p); taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg); asml.remove(p); p.free; diff --git a/compiler/arm/cpubase.pas b/compiler/arm/cpubase.pas index 4aae1da0f2..5d4dce56d8 100644 --- a/compiler/arm/cpubase.pas +++ b/compiler/arm/cpubase.pas @@ -361,6 +361,9 @@ unit cpubase; function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean; function dwarf_reg(r:tregister):shortint; + function IsIT(op: TAsmOp) : boolean; + function GetITLevels(op: TAsmOp) : longint; + implementation uses @@ -606,4 +609,35 @@ unit cpubase; result:=RS_R0; end; + function IsIT(op: TAsmOp) : boolean; + begin + case op of + A_IT, + A_ITE, A_ITT, + A_ITEE, A_ITTE, A_ITET, A_ITTT, + A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE, + A_ITEET, A_ITTET, A_ITETT, A_ITTTT: + result:=true; + else + result:=false; + end; + end; + + function GetITLevels(op: TAsmOp) : longint; + begin + case op of + A_IT: + result:=1; + A_ITE, A_ITT: + result:=2; + A_ITEE, A_ITTE, A_ITET, A_ITTT: + result:=3; + A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE, + A_ITEET, A_ITTET, A_ITETT, A_ITTTT: + result:=4; + else + result:=0; + end; + end; + end. diff --git a/compiler/arm/rgcpu.pas b/compiler/arm/rgcpu.pas index d3d104e6b3..ace3a850c7 100644 --- a/compiler/arm/rgcpu.pas +++ b/compiler/arm/rgcpu.pas @@ -255,37 +255,6 @@ unit rgcpu; result:=getsubreg(r); end; - function IsIT(op: TAsmOp) : boolean; - begin - case op of - A_IT, - A_ITE, A_ITT, - A_ITEE, A_ITTE, A_ITET, A_ITTT, - A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE, - A_ITEET, A_ITTET, A_ITETT, A_ITTTT: - result:=true; - else - result:=false; - end; - end; - - function GetITLevels(op: TAsmOp) : longint; - begin - case op of - A_IT: - result:=1; - A_ITE, A_ITT: - result:=2; - A_ITEE, A_ITTE, A_ITET, A_ITTT: - result:=3; - A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE, - A_ITEET, A_ITTET, A_ITETT, A_ITTTT: - result:=4; - else - result:=0; - end; - end; - function GetITRemainderOp(originalOp:TAsmOp;remLevels:longint;var newOp: TAsmOp;var NeedsCondSwap:boolean) : TAsmOp; const remOps : array[1..3] of array[A_ITE..A_ITTTT] of TAsmOp = (