diff --git a/rtl_lib/arbitrary_width_memory.py b/rtl_lib/arbitrary_width_memory.py
index 5a13750a0ca08fd1c50c5d7cda0ab5b3e351eb35..e492e55be80d8ddcac6da5674a7f6dbaaec7ade5 100644
--- a/rtl_lib/arbitrary_width_memory.py
+++ b/rtl_lib/arbitrary_width_memory.py
@@ -106,38 +106,63 @@ class ArbitraryWidthMemory(Elaboratable):
                         m.d.comb += bus.r_data.eq(last_r_data)
 
                     with m.If(bus.valid_in == 1):
-                        # the index and bit-index computation goes as follows:
+                        # the memory address and bit-index computation goes as follows:
 
-                        # 1) We calculate an unwrapped bit index by multiplying the fake index by the fake data width
+                        # We calculate an unwrapped bit index by multiplying the fake index by the fake data width
                         m.d.comb += unwrapped_bit_index.eq(bus.r_addr * self.fake_data_width)
+
+                        # The memory address is simply the unwrapped bit index divided by the memory data width
+                        # since the memory data width is a power of two, this is a shift!
                         m.d.comb += starting_word.eq(unwrapped_bit_index[self.backing_memory_data_width_bits:])
+
+                        # We say we'll do a fetch at that address:
                         m.d.comb += fetch_address.eq(starting_word)
+
+                        # We start our cut at the unwrapped bit index modulo the memory data width.
+                        # since the memory data width is a power of two, this is the K-least-significant-bits
+                        # of the unwrapped bit index
                         m.d.comb += left_bit_index.eq(unwrapped_bit_index[:self.backing_memory_data_width_bits])
+
+                        # Here's where they start trying to trick you. We need to handle the case where the end of the
+                        # fake word goes beyond a real memory word.
                         m.d.comb += end_bit_pseudo_index.eq(left_bit_index + self.fake_data_width-1)
+
+                        # So here we determine if there's any need for additional memory words:
                         m.d.comb += additional_words.eq(end_bit_pseudo_index[self.backing_memory_data_width_bits:])
 
                         with m.If(additional_words == 0):
+                            # No additional words, calculate which bits we need from the sole word we're fetching:
                             m.d.comb += right_bit_index.eq(end_bit_pseudo_index[:self.backing_memory_data_width_bits])
                         with m.Else():
+                            # Additional words needed, so we fetch the entire remaining part of this word
                             m.d.comb += right_bit_index.eq(self.backing_memory_data_width-1)
+                            # and we register state for the next cycle[s]
                             m.d.sync += next_address.eq(fetch_address + 1)
+                            # rather than keeping track of the next address and the final address,
+                            # we keep track of the current address and how many additional words are left,
+                            # maybe that saves a few LUTs by avoiding wide compares, who knows.
                             m.d.sync += additional_words_regd.eq(additional_words)
                             m.next="ADD"
 
 
                 with m.State("ADD"):
                     # we handle both the full-word fetches and the final (potentially partial word) fetch here
-                    with m.If(additional_words_regd == 1): # special case, we may not have to include the whole word!
+                    with m.If(additional_words_regd == 1):
+                        # We start from zero...
                         m.d.comb += left_bit_index.eq(0)
+                        # But this is the last word, so we may not have to include the whole word!
                         m.d.comb += right_bit_index.eq(end_bit_pseudo_index[:self.backing_memory_data_width_bits])
                         m.d.comb += fetch_address.eq(next_address)
                         m.next = "STALL"
-                    with m.Else():                    # non-special case, fetch the whole word.
+                    with m.Else():
+                        # non-special case, fetch the whole word
                         m.d.comb += left_bit_index.eq(0)
                         m.d.comb += right_bit_index.eq(self.backing_memory_data_width-1)
+                        # and increment the address and decrement the remaining words counter
                         m.d.sync += next_address.eq(next_address + 1)
-                        m.d.comb += fetch_address.eq(next_address)
                         m.d.sync += additional_words_regd.eq(additional_words_regd - 1)
+
+                        m.d.comb += fetch_address.eq(next_address)
                 with m.State("STALL"):
                     m.next="STALL"
             return m