From 248082347fc619a7c4381218e01b96ef32545594 Mon Sep 17 00:00:00 2001
From: ccrause <christo.crause@gmail.com>
Date: Fri, 24 Nov 2023 22:05:33 +0200
Subject: [PATCH] AVR optimization LdsMov2Lds

---
 compiler/avr/aoptcpu.pas | 89 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/compiler/avr/aoptcpu.pas b/compiler/avr/aoptcpu.pas
index 3ad7b9a40a..0ee8498473 100644
--- a/compiler/avr/aoptcpu.pas
+++ b/compiler/avr/aoptcpu.pas
@@ -401,6 +401,8 @@ Implementation
 
 
   function TCpuAsmOptimizer.OptPass1LDS(var p : tai) : boolean;
+    var
+      hp1, hp2, hp3, alloc, dealloc: tai;
     begin
       Result:=false;
       if (taicpu(p).oper[1]^.ref^.symbol=nil) and
@@ -424,6 +426,93 @@ Implementation
           taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
 
         result:=true;
+      end
+
+      { turn
+          alloc reg0
+          alloc reg1
+          lds reg0, label
+          lds reg1, label
+          mov reg2, reg0
+          mov reg3, reg1
+          dealloc reg0
+          dealloc reg1
+
+        into
+          lds reg2, label
+          lds reg3, label
+      }
+      else if not(cs_opt_level3 in current_settings.optimizerswitches) and
+      (taicpu(p).oper[0]^.typ=top_reg) and
+      (GetNextInstruction(p,hp1)) and MatchInstruction(hp1,A_LDS) and
+      (taicpu(hp1).oper[0]^.typ=top_reg) and
+      (GetNextInstruction(hp1, hp2)) and MatchInstruction(hp2,A_MOV) and
+      (taicpu(hp2).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+      (GetNextInstruction(hp2, hp3)) and MatchInstruction(hp3,A_MOV) and
+      (taicpu(hp3).oper[1]^.reg=taicpu(hp1).oper[0]^.reg) then
+      begin
+        DebugMsg('Peephole LdsLdsMovMov2LdsLds performed', p);
+
+        alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
+        dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
+        if assigned(alloc) and assigned(dealloc) then
+          begin
+            asml.Remove(alloc);
+            alloc.Free;
+            asml.Remove(dealloc);
+            dealloc.Free;
+          end;
+        taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
+        RemoveInstruction(hp2);
+
+        alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
+        dealloc:=FindRegDeAlloc(taicpu(hp1).oper[0]^.reg,tai(hp3.Next));
+        if assigned(alloc) and assigned(dealloc) then
+          begin
+            asml.Remove(alloc);
+            alloc.Free;
+            asml.Remove(dealloc);
+            dealloc.Free;
+          end;
+
+        taicpu(hp1).oper[0]^.reg:=taicpu(hp3).oper[0]^.reg;
+        RemoveInstruction(hp3);
+        Result:=true;
+      end
+
+      { turn
+          alloc reg0
+          lds reg0, label
+          ...
+          mov reg1, reg0
+          dealloc reg0
+
+        into
+          lds reg1, label
+      }
+      else if (cs_opt_level3 in current_settings.optimizerswitches) and
+      (taicpu(p).oper[0]^.typ=top_reg) and
+      (GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg)) and
+      MatchInstruction(hp1,A_MOV) and
+      (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+      (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+      (not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) then
+      begin
+        DebugMsg('Peephole LdsMov2Lds performed', p);
+
+        alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
+        dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
+        if assigned(alloc) and assigned(dealloc) then
+          begin
+            asml.Remove(alloc);
+            alloc.Free;
+            asml.Remove(dealloc);
+            dealloc.Free;
+          end;
+
+        taicpu(p).oper[0]^.reg:=taicpu(hp1).oper[0]^.reg;
+        RemoveInstruction(hp1);
+        result:=true;
       end;
     end;