From 3ab7af52fba553b4a63e6d42532f709d603bf8ac Mon Sep 17 00:00:00 2001
From: Kia <kia@special-circumstanc.es>
Date: Wed, 14 Apr 2021 11:52:39 -0600
Subject: [PATCH] start working on downstream transaction flow control (the
 case where we present data but downstream isn't ready yet)

---
 rtl_lib/arbitrary_width_memory.py | 43 +++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/rtl_lib/arbitrary_width_memory.py b/rtl_lib/arbitrary_width_memory.py
index 95c39bd..464a923 100644
--- a/rtl_lib/arbitrary_width_memory.py
+++ b/rtl_lib/arbitrary_width_memory.py
@@ -101,7 +101,7 @@ class ArbitraryWidthMemory(Elaboratable):
 
             # regs to persist output of last read onto the output bus until it's accepted by the downstream consumer
             last_r_data       = Signal(self.fake_data_width)
-            last_r_data_valid = Signal(1)
+            output_txn_not_happened = Signal(1)
 
 
 
@@ -183,7 +183,7 @@ class ArbitraryWidthMemory(Elaboratable):
                         # since the memory data width is a power of two, this is the K-least-significant-bits
                         # of the unwrapped bit index
                         m.d.comb += LS_bit_index_internal.eq(unwrapped_bit_index[:self.backing_memory_data_width_bits])
-                        m.d.sync += LS_bit_index.eq(unwrapped_bit_index[:self.backing_memory_data_width_bits])
+                        m.d.sync += LS_bit_index.eq(         unwrapped_bit_index[:self.backing_memory_data_width_bits])
 
 
                         # Here's where they start trying to trick you. We need to handle the case where the end of the
@@ -197,7 +197,6 @@ class ArbitraryWidthMemory(Elaboratable):
                         with m.If(additional_words == 0):
                             # No additional words, calculate which bits we need from the sole word we're fetching:
                             m.d.sync += MS_bit_index.eq(end_bit_pseudo_index[:self.backing_memory_data_width_bits])
-                            m.d.sync += last_r_data_valid.eq(1)
 
                         with m.Else():
                             # Additional words needed, so we fetch the entire remaining part of this word
@@ -209,35 +208,51 @@ class ArbitraryWidthMemory(Elaboratable):
                             # maybe that saves a few LUTs by avoiding wide compares, who knows.
                             m.d.sync += additional_words_regd.eq(additional_words)
                             m.d.sync += end_bit_pseudo_index_regd.eq(end_bit_pseudo_index)
-                            m.d.sync += last_r_data_valid.eq(0)
 
-                            m.next="ADD"
+                            m.d.sync += shreg.eq(shreg << (shreg_new_bits) | current_slice)
 
 
-                with m.State("ADD"):
+                            m.next="MULTIPLE"
+
+
+                with m.State("MULTIPLE"):
                     m.d.comb += bus.ready_out.eq(0)
                     m.d.sync += shreg.eq(shreg << (shreg_new_bits) | current_slice)
 
-                    # we handle both the full-word fetches and the final (potentially partial word) fetch here
+
                     with m.If(additional_words_regd == 1):
-                        # We start from zero...
+                        # We handle the final (potentially partial word) fetch here
+
                         m.d.sync += LS_bit_index.eq(0)
+
                         # But this is the last word, so we may not have to include the whole word!
                         m.d.sync += MS_bit_index.eq(end_bit_pseudo_index_regd[:self.backing_memory_data_width_bits])
                         m.d.comb += fetch_address.eq(next_address)
-                        m.d.sync += last_r_data_valid.eq(1)
-                        m.next = "STALL"
+
+                        m.d.sync += lingering_txn.eq(0)
+                        m.next = "FINALIZE"
                     with m.Else():
-                        # non-special case, fetch the whole word
+                        # We handle the non-special case here, so fetch the whole word
                         m.d.sync += LS_bit_index.eq(0)
                         m.d.sync += MS_bit_index.eq(self.backing_memory_data_width-1)
                         # and increment the address and decrement the remaining words counter
+
                         m.d.sync += next_address.eq(next_address + 1)
                         m.d.sync += additional_words_regd.eq(additional_words_regd - 1)
 
                         m.d.comb += fetch_address.eq(next_address)
-                with m.State("STALL"):
-                    m.next="STALL"
+
+                with m.State("FINALIZE"):
+                    # final word fetch has happened, we now slice it and present it on the downstream data interface
+                    with m.If(lingering_txn == 0):
+                        m.d.comb += bus.r_data.eq(     shreg << (shreg_new_bits) | current_slice)
+                        m.d.sync += last_r_data.eq(shreg << (shreg_new_bits) | current_slice)
+                        m.d.sync += lingering_txn.eq(1)
+                    with m.If(lingering_txn == 1):
+                        m.d.comb += bus.r_data.eq(last_r_data)
+                    m.d.comb += bus.valid_out.eq(1)
+
+
             return m
 
 
@@ -284,7 +299,7 @@ class DummyPlug(Elaboratable):
         m = Module()
 
         m.submodules.FakeAWMem = FakeAWMem = ArbitraryWidthMemory(fake_data_width=16,
-                            fake_address_width=8, initial_data=[0xAB, 0xCD], # refolder([10,9,8,7,6,5,4,3,2,1],4, 8),
+                            fake_address_width=8, initial_data=[0xBA, 0xDC,0xFE], # refolder([10,9,8,7,6,5,4,3,2,1],4, 8),
                             backing_memory_data_width=8, backing_memory_address_width=8)
         counter = Signal(8, reset=0)
 
-- 
GitLab