From 9ec9b44784989d63655cebcb74fb0f11a77dacfd Mon Sep 17 00:00:00 2001
From: Jeppe Johansen <jeppe@j-software.dk>
Date: Mon, 8 Oct 2012 12:30:00 +0000
Subject: [PATCH] Add CBNZ/CBZ instructions Create preliminary Thumb-2
 PeepHoleOptPass2 code, hacked together from the ARM mode code Added a number
 of simple size optimizations for common Thumb-2 instructions

git-svn-id: branches/laksen/arm-embedded@22590 -
---
 compiler/arm/aasmcpu.pas |   8 +
 compiler/arm/aoptcpu.pas | 422 ++++++++++++++++++++++++++++++++++++++-
 compiler/arm/armatt.inc  |  12 +-
 compiler/arm/armatts.inc |   2 +
 compiler/arm/armins.dat  |  19 +-
 compiler/arm/armop.inc   |  12 +-
 compiler/arm/cpubase.pas |   2 +-
 compiler/arm/rgcpu.pas   |   7 +
 8 files changed, 462 insertions(+), 22 deletions(-)

diff --git a/compiler/arm/aasmcpu.pas b/compiler/arm/aasmcpu.pas
index b9b95d6eed..2a4c38e34c 100644
--- a/compiler/arm/aasmcpu.pas
+++ b/compiler/arm/aasmcpu.pas
@@ -175,6 +175,7 @@ uses
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
 
+         constructor op_regset(op:tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
          constructor op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
@@ -415,6 +416,13 @@ implementation
          loadconst(1,aint(_op2));
       end;
 
+    constructor taicpu.op_regset(op: tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
+      begin
+        inherited create(op);
+        ops:=1;
+        loadregset(0,regtype,subreg,_op1);
+      end;
+
 
     constructor taicpu.op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
       begin
diff --git a/compiler/arm/aoptcpu.pas b/compiler/arm/aoptcpu.pas
index 58cdcb190c..31adec9816 100644
--- a/compiler/arm/aoptcpu.pas
+++ b/compiler/arm/aoptcpu.pas
@@ -30,7 +30,7 @@ Unit aoptcpu;
 
 Interface
 
-uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj;
+uses cgbase, cpubase, aasmtai, aasmcpu,aopt, aoptcpub, aoptobj, cclasses;
 
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
@@ -62,6 +62,7 @@ Type
 
   TCpuThumb2AsmOptimizer = class(TCpuAsmOptimizer)
     { uses the same constructor as TAopObj }
+    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
   End;
 
@@ -80,6 +81,8 @@ Implementation
         (p.typ=ait_instruction) and
         (taicpu(p).condition=C_None) and
         ((taicpu(p).opcode<A_IT) or (taicpu(p).opcode>A_ITTTT)) and
+        (taicpu(p).opcode<>A_CBZ) and
+        (taicpu(p).opcode<>A_CBNZ) and
         (taicpu(p).opcode<>A_PLD) and
         ((taicpu(p).opcode<>A_BLX) or
          (taicpu(p).oper[0]^.typ=top_reg));
@@ -327,6 +330,9 @@ Implementation
          (taicpu(movp).oper[0]^.reg<>NR_R14) and
          { the destination register of the mov might not be used beween p and movp }
          not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+         { cb[n]z are thumb instructions which require specific registers, with no wide forms }
+         (taicpu(p).opcode<>A_CBZ) and
+         (taicpu(p).opcode<>A_CBNZ) and
          {There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same}
          not (
            (taicpu(p).opcode in [A_MLA, A_MUL]) and
@@ -1152,6 +1158,85 @@ Implementation
                     if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
                       RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
                   end;
+                A_MVN:
+                  begin
+                    {
+                      change
+                      mvn reg2,reg1
+                      and reg3,reg4,reg2
+                      dealloc reg2
+                      to
+                      bic reg3,reg4,reg1
+                    }
+                    if (taicpu(p).oper[1]^.typ = top_reg) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1,A_AND,[],[]) and
+                      (((taicpu(hp1).ops=3) and
+                        (taicpu(hp1).oper[2]^.typ=top_reg) and
+                        (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
+                         MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) or
+                       ((taicpu(hp1).ops=2) and
+                        (taicpu(hp1).oper[1]^.typ=top_reg) and
+                        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
+                      assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+                      { reg1 might not be modified inbetween }
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole MvnAnd2Bic done', p);
+                        taicpu(hp1).opcode:=A_BIC;
+
+                        if taicpu(hp1).ops=3 then
+                          begin
+                            if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
+                              taicpu(hp1).loadReg(1,taicpu(hp1).oper[2]^.reg); // Swap operands
+
+                            taicpu(hp1).loadReg(2,taicpu(p).oper[1]^.reg);
+                          end
+                        else
+                          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                      end;
+                  end;
+                A_UXTB,
+                A_SXTB:
+                  begin
+                    if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+                      assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+                      { the reference in strb might not use reg2 }
+                      not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+                      { reg1 might not be modified inbetween }
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole xXTBStrb2Strb done', p);
+                        taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                      end;
+                  end;
+                A_UXTH,
+                A_SXTH:
+                  begin
+                    if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
+                      assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+                      { the reference in strb might not use reg2 }
+                      not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+                      { reg1 might not be modified inbetween }
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole xXTBStrh2Strh done', p);
+                        taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                      end;
+                  end;
                 A_CMP:
                   begin
                     {
@@ -1566,9 +1651,340 @@ Implementation
     end;
 
 
-  procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
+  procedure DecrementPreceedingIT(list: TAsmList; p: tai);
+    var
+      hp : tai;
+      l : longint;
     begin
-      { TODO: Add optimizer code }
+      hp := tai(p.Previous);
+      l := 1;
+
+      while assigned(hp) and
+        (l <= 4) do
+        begin
+          if hp.typ=ait_instruction then
+            begin
+              if (taicpu(hp).opcode>=A_IT) and
+                (taicpu(hp).opcode <= A_ITTTT) then
+                begin
+                  if (taicpu(hp).opcode = A_IT) and
+                     (l=1) then
+                    list.Remove(hp)
+                  else
+                    case taicpu(hp).opcode of
+                      A_ITE:
+                        if l=2 then taicpu(hp).opcode := A_IT;
+                      A_ITT:
+                        if l=2 then taicpu(hp).opcode := A_IT;
+                      A_ITEE:
+                        if l=3 then taicpu(hp).opcode := A_ITE;
+                      A_ITTE:
+                        if l=3 then taicpu(hp).opcode := A_ITT;
+                      A_ITET:
+                        if l=3 then taicpu(hp).opcode := A_ITE;
+                      A_ITTT:
+                        if l=3 then taicpu(hp).opcode := A_ITT;
+                      A_ITEEE:
+                        if l=4 then taicpu(hp).opcode := A_ITEE;
+                      A_ITTEE:
+                        if l=4 then taicpu(hp).opcode := A_ITTE;
+                      A_ITETE:
+                        if l=4 then taicpu(hp).opcode := A_ITET;
+                      A_ITTTE:
+                        if l=4 then taicpu(hp).opcode := A_ITTT;
+                      A_ITEET:
+                        if l=4 then taicpu(hp).opcode := A_ITEE;
+                      A_ITTET:
+                        if l=4 then taicpu(hp).opcode := A_ITTE;
+                      A_ITETT:
+                        if l=4 then taicpu(hp).opcode := A_ITET;
+                      A_ITTTT:
+                        if l=4 then taicpu(hp).opcode := A_ITTT;
+                    end;
+
+                  break;
+                end;
+              {else if (taicpu(hp).condition<>taicpu(p).condition) or
+                (taicpu(hp).condition<>inverse_cond(taicpu(p).condition)) then
+                break;}
+
+              inc(l);
+            end;
+          hp := tai(hp.Previous);
+        end;
+    end;
+
+  function TCpuThumb2AsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+    var
+      hp : taicpu;
+      hp1,hp2 : tai;
+    begin
+      if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_STM, [C_None], [PF_FD,PF_DB]) and
+        (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
+        (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
+        ((taicpu(p).oper[1]^.regset^*[8..13,15])=[]) then
+        begin
+          hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
+          AsmL.InsertAfter(hp, p);
+          asml.Remove(p);
+          p:=hp;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_STR, [C_None], [PF_None]) and
+        (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
+        (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
+        (taicpu(p).oper[1]^.ref^.offset=-4) and
+        (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,14]) then
+        begin
+          hp := taicpu.op_regset(A_PUSH, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
+          asml.InsertAfter(hp, p);
+          asml.Remove(p);
+          p.Free;
+          p:=hp;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
+        (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
+        (taicpu(p).oper[0]^.ref^.index=NR_STACK_POINTER_REG) and
+        ((taicpu(p).oper[1]^.regset^*[8..14])=[]) then
+        begin
+          hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, taicpu(p).oper[1]^.regset^);
+          asml.InsertBefore(hp, p);
+          asml.Remove(p);
+          p.Free;
+          p:=hp;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
+        (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
+        (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
+        (taicpu(p).oper[1]^.ref^.offset=4) and
+        (getsupreg(taicpu(p).oper[0]^.reg) in [0..7,15]) then
+        begin
+          hp := taicpu.op_regset(A_POP, R_INTREGISTER, R_SUBWHOLE, [getsupreg(taicpu(p).oper[0]^.reg)]);
+          asml.InsertBefore(hp, p);
+          asml.Remove(p);
+          p.Free;
+          p:=hp;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
+        (taicpu(p).oper[1]^.typ=top_const) and
+        (taicpu(p).oper[1]^.val >= 0) and
+        (taicpu(p).oper[1]^.val < 256) and
+        (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
+        begin
+          taicpu(p).oppostfix:=PF_S;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, A_MVN, [], [PF_None]) and
+        (taicpu(p).oper[1]^.typ=top_reg) and
+        (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
+        begin
+          taicpu(p).oppostfix:=PF_S;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
+        (taicpu(p).ops = 3) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+        (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
+        (taicpu(p).oper[2]^.typ=top_const) and
+        (taicpu(p).oper[2]^.val >= 0) and
+        (taicpu(p).oper[2]^.val < 256) and
+        (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
+        begin
+          taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
+          taicpu(p).oppostfix:=PF_S;
+          taicpu(p).ops := 2;
+          result:=true;
+        end
+      {else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_LSL,A_LSR,A_ASR,A_ROR], [], [PF_None,PF_S]) and
+        (taicpu(p).ops = 3) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+        (taicpu(p).oper[2]^.typ=top_reg) and
+        (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
+        begin
+          taicpu(p).ops := 2;
+          taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
+          taicpu(p).oppostfix:=PF_S;
+          result:=true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_AND,A_ORR,A_EOR], [], [PF_None,PF_S]) and
+        (taicpu(p).ops = 3) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
+        (not Assigned(FindRegDeAlloc(NR_DEFAULTFLAGS, p))) then
+        begin
+          taicpu(p).oppostfix:=PF_S;
+          taicpu(p).ops := 2;
+          result:=true;
+        end}
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_AND], [], [PF_None]) and
+        (taicpu(p).ops = 2) and
+        (taicpu(p).oper[1]^.typ=top_const) and
+        ((taicpu(p).oper[1]^.val=255) or
+         (taicpu(p).oper[1]^.val=65535)) then
+        begin
+          if taicpu(p).oper[1]^.val=255 then
+            taicpu(p).opcode:=A_UXTB
+          else
+            taicpu(p).opcode:=A_UXTH;
+
+          taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
+
+          result := true;
+        end
+      else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_AND], [], [PF_None]) and
+        (taicpu(p).ops = 3) and
+        (taicpu(p).oper[2]^.typ=top_const) and
+        ((taicpu(p).oper[2]^.val=255) or
+         (taicpu(p).oper[2]^.val=65535)) then
+        begin
+          if taicpu(p).oper[2]^.val=255 then
+            taicpu(p).opcode:=A_UXTB
+          else
+            taicpu(p).opcode:=A_UXTH;
+
+          taicpu(p).ops:=2;
+
+          result := true;
+        end
+      {else if (p.typ=ait_instruction) and
+        MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
+        (taicpu(p).oper[1]^.typ=top_const) and
+        (taicpu(p).oper[1]^.val=0) and
+        GetNextInstruction(p,hp1) and
+        (taicpu(hp1).opcode=A_B) and
+        (taicpu(hp1).condition in [C_EQ,C_NE]) then
+        begin
+          if taicpu(hp1).condition = C_EQ then
+            hp2:=taicpu.op_reg_ref(A_CBZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^)
+          else
+            hp2:=taicpu.op_reg_ref(A_CBNZ, taicpu(p).oper[0]^.reg, taicpu(hp1).oper[0]^.ref^);
+
+          taicpu(hp2).is_jmp := true;
+
+          asml.InsertAfter(hp2, hp1);
+
+          asml.Remove(hp1);
+          hp1.Free;
+          asml.Remove(p);
+          p.Free;
+
+          p := hp2;
+
+          result := true;
+        end}
+      else
+        Result := inherited PeepHoleOptPass1Cpu(p);
+    end;
+
+  procedure TCpuThumb2AsmOptimizer.PeepHoleOptPass2;
+    var
+      p,hp1,hp2: tai;
+      l,l2 : longint;
+      condition : tasmcond;
+      hp3: tai;
+      WasLast: boolean;
+      { UsedRegs, TmpUsedRegs: TRegSet; }
+
+    begin
+      p := BlockStart;
+      { UsedRegs := []; }
+      while (p <> BlockEnd) Do
+        begin
+          { UpdateUsedRegs(UsedRegs, tai(p.next)); }
+          case p.Typ Of
+            Ait_Instruction:
+              begin
+                case taicpu(p).opcode Of
+                  A_B:
+                    if taicpu(p).condition<>C_None then
+                      begin
+                         { check for
+                                Bxx   xxx
+                                <several instructions>
+                             xxx:
+                         }
+                         l:=0;
+                         GetNextInstruction(p, hp1);
+                         while assigned(hp1) and
+                           (l<=4) and
+                           CanBeCond(hp1) and
+                           { stop on labels }
+                           not(hp1.typ=ait_label) do
+                           begin
+                              inc(l);
+                              if MustBeLast(hp1) then
+                                begin
+                                  //hp1:=nil;
+                                  GetNextInstruction(hp1,hp1);
+                                  break;
+                                end
+                              else
+                                GetNextInstruction(hp1,hp1);
+                           end;
+                         if assigned(hp1) then
+                           begin
+                              if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol),hp1) then
+                                begin
+                                  if (l<=4) and (l>0) then
+                                    begin
+                                      condition:=inverse_cond(taicpu(p).condition);
+                                      hp2:=p;
+                                      GetNextInstruction(p,hp1);
+                                      p:=hp1;
+                                      repeat
+                                        if hp1.typ=ait_instruction then
+                                          taicpu(hp1).condition:=condition;
+                                        if MustBeLast(hp1) then
+                                          begin
+                                            GetNextInstruction(hp1,hp1);
+                                            break;
+                                          end
+                                        else
+                                          GetNextInstruction(hp1,hp1);
+                                      until not(assigned(hp1)) or
+                                        not(CanBeCond(hp1)) or
+                                        (hp1.typ=ait_label);
+                                      { wait with removing else GetNextInstruction could
+                                        ignore the label if it was the only usage in the
+                                        jump moved away }
+
+                                      asml.InsertAfter(tai_comment.create(strpnew('Collapsed')), hp2);
+
+                                      DecrementPreceedingIT(asml, hp2);
+
+                                      case l of
+                                        1: asml.InsertAfter(taicpu.op_cond(A_IT,condition), hp2);
+                                        2: asml.InsertAfter(taicpu.op_cond(A_ITT,condition), hp2);
+                                        3: asml.InsertAfter(taicpu.op_cond(A_ITTT,condition), hp2);
+                                        4: asml.InsertAfter(taicpu.op_cond(A_ITTTT,condition), hp2);
+                                      end;
+
+                                      tasmlabel(taicpu(hp2).oper[0]^.ref^.symbol).decrefs;
+                                      asml.remove(hp2);
+                                      hp2.free;
+                                      continue;
+                                    end;
+                                end;
+                           end;
+                      end;
+                end;
+              end;
+          end;
+          p := tai(p.next)
+        end;
     end;
 
 begin
diff --git a/compiler/arm/armatt.inc b/compiler/arm/armatt.inc
index 8d08467ca4..02ca509e03 100644
--- a/compiler/arm/armatt.inc
+++ b/compiler/arm/armatt.inc
@@ -206,6 +206,10 @@
 'sel',
 'setend',
 'sev',
+'asr',
+'lsr',
+'lsl',
+'ror',
 'shadd16',
 'shadd8',
 'shasx',
@@ -270,12 +274,8 @@
 'wfe',
 'wfi',
 'yield',
-'asr',
-'lsr',
-'lsl',
 'pop',
 'push',
-'ror',
 'sdiv',
 'udiv',
 'movt',
@@ -295,5 +295,7 @@
 'itett',
 'itttt',
 'tbb',
-'tbh'
+'tbh',
+'cbz',
+'cbnz'
 );
diff --git a/compiler/arm/armatts.inc b/compiler/arm/armatts.inc
index 97bb27f230..a308f1c381 100644
--- a/compiler/arm/armatts.inc
+++ b/compiler/arm/armatts.inc
@@ -295,5 +295,7 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );
diff --git a/compiler/arm/armins.dat b/compiler/arm/armins.dat
index 3a5051199d..201d7c92a9 100644
--- a/compiler/arm/armins.dat
+++ b/compiler/arm/armins.dat
@@ -618,6 +618,14 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 [SEVcc]
 
+[ASRcc]
+
+[LSRcc]
+
+[LSLcc]
+
+[RORcc]
+
 [SHADD16cc]
 [SHADD8cc]
 [SHASXcc]
@@ -702,18 +710,10 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 ; Thumb-2
 
-[ASRcc]
-
-[LSRcc]
-
-[LSLcc]
-
 [POP]
 
 [PUSH]
 
-[RORcc]
-
 [SDIVcc]
 
 [UDIVcc]
@@ -752,3 +752,6 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 [TBB]
 [TBH]
+
+[CBZ]
+[CBNZ]
\ No newline at end of file
diff --git a/compiler/arm/armop.inc b/compiler/arm/armop.inc
index 1657861561..20b1956805 100644
--- a/compiler/arm/armop.inc
+++ b/compiler/arm/armop.inc
@@ -206,6 +206,10 @@ A_SBFX,
 A_SEL,
 A_SETEND,
 A_SEV,
+A_ASR,
+A_LSR,
+A_LSL,
+A_ROR,
 A_SHADD16,
 A_SHADD8,
 A_SHASX,
@@ -270,12 +274,8 @@ A_UXTH,
 A_WFE,
 A_WFI,
 A_YIELD,
-A_ASR,
-A_LSR,
-A_LSL,
 A_POP,
 A_PUSH,
-A_ROR,
 A_SDIV,
 A_UDIV,
 A_MOVT,
@@ -295,5 +295,7 @@ A_ITTET,
 A_ITETT,
 A_ITTTT,
 A_TBB,
-A_TBH
+A_TBH,
+A_CBZ,
+A_CBNZ
 );
diff --git a/compiler/arm/cpubase.pas b/compiler/arm/cpubase.pas
index 14eee67b8e..4aae1da0f2 100644
--- a/compiler/arm/cpubase.pas
+++ b/compiler/arm/cpubase.pas
@@ -48,7 +48,7 @@ unit cpubase;
       TAsmOp= {$i armop.inc}
       {This is a bit of a hack, because there are more than 256 ARM Assembly Ops
        But FPC currently can't handle more than 256 elements in a set.}
-      TCommonAsmOps = Set of A_None .. A_UQSADA8;
+      TCommonAsmOps = Set of A_None .. A_UQSAX;
 
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
diff --git a/compiler/arm/rgcpu.pas b/compiler/arm/rgcpu.pas
index 77e3d33f51..d3d104e6b3 100644
--- a/compiler/arm/rgcpu.pas
+++ b/compiler/arm/rgcpu.pas
@@ -70,10 +70,17 @@ unit rgcpu;
     procedure trgintcputhumb2.add_cpu_interferences(p: tai);
       var
         r : tregister;
+        hr : longint;
       begin
         if p.typ=ait_instruction then
           begin
             case taicpu(p).opcode of
+              A_CBNZ,
+              A_CBZ:
+                begin
+                  for hr := RS_R8 to RS_R15 do
+                    add_edge(getsupreg(taicpu(p).oper[0]^.reg), hr);
+                end;
               A_ADD:
                 begin
                   if taicpu(p).ops = 3 then