diff --git a/rtl_lib/gearbox.py b/rtl_lib/gearbox.py index 88681413da7ead5f872a071dd12ccce4035f9186..81df8a41a0bd08197c6422039b6d9d21c1b53127 100644 --- a/rtl_lib/gearbox.py +++ b/rtl_lib/gearbox.py @@ -6,279 +6,86 @@ from nmigen.cli import main from pipe_stage import PipeStage # NEXT TASKS (IMMEDIATE) -# finish fleshing out code, attach buses from the Flow controller to the gearbox -# do the write-logic +# get the testbenches working and model checking done +# formal proof (k-induction and/or ABC-PDR) # figure out how the Altera gearbox design works (number of pipeline stages and what each does) - - -# REMAINING WORK -# -# can make a testbench only operating on the indices and the flowcontrol signals that keeps a -# model of the valid/invalid bits and verifies that when a transaction happens, the right bits -# get read/written and the right indices get moved in the right ways - - -class IndexDisambiguator(Enum): - LAST_OP_UNKNOWN = 0 - LAST_OP_WAS_WRITE = 1 - LAST_OP_WAS_READ = 2 - class GearboxBusLayout(Layout): - def __init__(self, *, in_width, out_width): # the * forces keyword args + def __init__(self, *, upstream_width, downstream_width): # the * forces keyword args super().__init__([ # DATA - ("data_in", unsigned(in_width)), # FROM SOURCE - ("data_out", unsigned(out_width)), # TO DEST + ("data_in", unsigned(upstream_width)), # FROM SOURCE + ("data_out", unsigned(downstream_width)), # TO DEST # CONTROL - ("valid_in", 1), # FROM SOURCE - ("ready_out", 1), # TO SOURCE - - ("ready_in", 1), # FROM DEST - ("valid_out", 1), # TO DEST - ("fault", 1) + ("upstream_valid_in", 1), # FROM SOURCE + ("upstream_ready_out", 1), # TO SOURCE + ("downstream_ready_in", 1), # FROM DEST + ("downstream_valid_out", 1), # TO DEST ]) class GearboxBus(Record): - def __init__(self, *, in_width, out_width): - super().__init__(GearboxBusLayout(in_width=in_width, out_width=out_width)) - - - -class GearboxFCLayout(Layout): - def __init__(self, *, len_storage): - super().__init__([ - # DATA - ("read_ptr", unsigned(len(range(len_storage)))), # FROM GEARBOX - ("write_ptr", unsigned(len(range(len_storage)))), # FROM GEARBOX - - # CONTROL - ("write_happens_this_cycle", 1), # TO GEARBOX - ("read_happens_this_cycle", 1), # TO GEARBOX - - ("ready_in", 1), # FROM GEARBOX (FROM DOWNSTREAM) - ("valid_in", 1), # FROM GEARBOX (FROM UPSTREAM) - - - ]) - -class GearboxFCBus(Record): - def __init__(self, *, len_storage): - super().__init__(GearboxFCLayout(len_storage=len_storage)) - - - -class GearboxFlowControl(Elaboratable): - def __init__(self, *, in_width, out_width, len_storage): - self.in_width = in_width - self.out_width = out_width - self.len_storage = len_storage - - self.bus = GearboxFCBus(len_storage=len_storage) - #print(len(self.bus.read_ptr)) - - - def elaborate(self, platform): - m = Module() - internalfault = Signal() - - # The top-level flow control logic works as follows. - # First, we determine which operations are *possible* based on the read/write indices - # and the index disambiguator bit: - - # 1) we determine if we have enough invalid bits in the buffer to write-in - # If so, we set ready_out to signal to upstream we're ready to ingest - can_write_this_cycle = Signal(1) - # 2) we determine if we have enough valid bits in the buffer to read-out - # If so, we set valid_out to signal to downstream we're ready to produce - can_read_this_cycle = Signal(1) - # Then, we look at the flow-control signals from upstream/downstream to see which - # transactions will happen, and set the following two bus signals, which are - # used to gate the read/write index updates: - # write_happens_this_cycle - # read_happens_this_cycle - - - # We replicate signals to avoid repeating "self.bus." - - read_ptr = Signal(range(self.len_storage)) - m.d.comb += read_ptr.eq(self.bus.read_ptr) - - write_ptr = Signal(range(self.len_storage)) - m.d.comb += write_ptr.eq(self.bus.write_ptr) - - len_storage = self.len_storage - - disambiguator = Signal(IndexDisambiguator, reset=IndexDisambiguator.LAST_OP_WAS_READ) - - - with m.If(read_ptr == write_ptr): # the special case first - with m.Switch(disambiguator): - with m.Case(IndexDisambiguator.LAST_OP_UNKNOWN): # fault - m.d.comb += can_read_this_cycle.eq(0) - m.d.comb += can_write_this_cycle.eq(0) - m.d.sync += internalfault.eq(1) - - with m.Case(IndexDisambiguator.LAST_OP_WAS_WRITE): # completely full - m.d.comb += can_read_this_cycle.eq(1) - m.d.comb += can_write_this_cycle.eq(0) - - with m.Case(IndexDisambiguator.LAST_OP_WAS_READ): # completely empty - m.d.comb += can_read_this_cycle.eq(0) - m.d.comb += can_write_this_cycle.eq(1) - - with m.Elif(read_ptr < write_ptr): - # read_ptr < write_ptr. Here, the valid bits do not wrap, the invalid bits wrap. - # The valid bits are: inclusive [read_ptr, write_ptr) exclusive - # the invalid bits are inclusive [write_ptr, K] inclusive, union with inclusive [0, read_ptr) exclusive - - # We first calculate the number of valid and invalid bits: - numvalid = Signal(range(len_storage)) - numinvalid = Signal(range(len_storage)) - - m.d.comb += numvalid.eq(write_ptr - read_ptr) - m.d.comb += numinvalid.eq(len_storage - write_ptr + read_ptr) - - with m.If(numvalid >= self.out_width): - m.d.comb += can_read_this_cycle.eq(1) - - with m.If(numinvalid >= self.in_width): - m.d.comb += can_write_this_cycle.eq(1) - - with m.Elif(read_ptr > write_ptr): - # write_ptr < read_ptr. Here, the valid bits wrap, and the invalid bits do not wrap. - # The valid bits are: inclusive [read_ptr, K] inclusive, union with inclusive [0, write_ptr) exclusive - # the invalid bits are inclusive [write_ptr, read_ptr) exclusive - - # We first calculate the number of valid and invalid bits: - numvalid = Signal(range(len_storage)) - numinvalid = Signal(range(len_storage)) - - m.d.comb += numvalid.eq(len_storage - read_ptr + write_ptr) - m.d.comb += numinvalid.eq(read_ptr - write_ptr) - - with m.If(numvalid >= self.out_width): - m.d.comb += can_read_this_cycle.eq(1) - - with m.If(numinvalid >= self.in_width): - m.d.comb += can_write_this_cycle.eq(1) - - with m.Else(): # should never happen - m.d.sync += internalfault.eq(1) - return m + def __init__(self, *, upstream_width, downstream_width): + super().__init__(GearboxBusLayout(upstream_width=upstream_width, downstream_width=downstream_width)) class ArbitraryGearbox(Elaboratable): - def __init__(self, *, in_width, out_width): - self.in_width = in_width - self.out_width = out_width + def __init__(self, *, upstream_width, downstream_width): + self.upstream_width = upstream_width + self.downstream_width = downstream_width - self.bus = GearboxBus(in_width=in_width, out_width=out_width) + self.bus = GearboxBus(upstream_width=upstream_width, downstream_width=downstream_width) def elaborate(self, platform): m = Module() - loop = Signal(1) - len_storage = self.in_width + self.out_width - m.submodules.flow_controller = flow_controller = GearboxFlowControl(in_width=self.in_width, out_width=self.out_width, len_storage=len_storage) - - #storage = Signal(len_storage, reset=0b001_010_011_100_101_110_111) - #storage = Signal(len_storage, reset= 0b111_110_101_100_011_010_001) - - - storage = Signal(len_storage) - - write_ptr = Signal(range(len_storage)) - read_ptr = Signal(range(len_storage)) + upstream_width = self.upstream_width + downstream_width = self.downstream_width - # The buffer is composed of two different flavor of bits: + shift_reg_len = upstream_width + downstream_width + shift_reg = Signal(shift_reg_len) + valid_bit_count = Signal(range(shift_reg_len)) - # Invalid bits CANNOT BE READ OUT CAN BE WRITTEN TO - # Valid bits CAN BE READ OUT CANNOT BE WRITTEN TO - # and the location of those bits are given by the read_ptr and the write_ptr + upstream_transaction = Signal(1) + downstream_transaction = Signal(1) - # We only allow a read operation (which is not idempotent as the read_ptr is advanced) if: - # 1) the downstream interface is signaling ready - # 2) there are out_width valid bits in front of the read_ptr + m.d.comb += upstream_transaction.eq( self.bus.upstream_ready_out & self.bus.upstream_valid_in) + m.d.comb += downstream_transaction.eq(self.bus.downstream_ready_in & self.bus.downstream_valid_out) + m.d.comb += self.bus.upstream_ready_out.eq( shift_reg_len - valid_bit_count >= upstream_width) + m.d.comb += self.bus.downstream_valid_out.eq(shift_reg_len >= downstream_width) - # Likewise, we only allow a write operation (which is heavily non-idempotent as the write_ptr is advanced - # and the buffer is modified) if: - # 1) the upstream interface is signaling valid (we cannot allow ourselves to ingest invalid data!) - # 2) there are in_width invalid bits in front of the write_ptr + m.d.comb += self.bus.data_out.eq(shift_reg[0:downstream_width]) + with m.If(upstream_transaction == 1): + with m.If(downstream_transaction == 1): # upstream and downstream + # the simultaneous transaction is hard, but updating the valid count isn't + m.d.sync += valid_bit_count.eq(valid_bit_count + upstream_width - downstream_width) - # There are multiple cases for read_ptr and write_ptr: + # the updated shift register is composed of two parts, one from the old bits, + # and one from the new bits. + # + # First, we shift out downstream_width bits from the bottom of the register + # What remains is (shift_reg >> downstream_width) + # + # To fit the new bits in, we must shift them at the correct location so no valid + # bit is overwritten and no junk bits are left: + # (self.bus.data_in << (valid_bit_count - downstream_width)) + m.d.sync += shift_reg.eq((self.bus.data_in << (valid_bit_count - downstream_width))| (shift_reg >> downstream_width)) - - # Naturally, there is a tricky edge case which requires an extra bit (literally) of disambiguation. - # If write_ptr == read_ptr we don't know if the buffer is entirely full or entirely empty. - - # If the last operation was a read, and we find write_ptr == read_ptr, we know it's empty - # If the last operation was a write, and we find write_ptr == read_ptr, we know it's full - - # We keep a bit of state -- which is only relevant if write_ptr == read_ptr, to keep track of - # this: IndexDisambiguator, which can either be LAST_OP_WAS_WRITE or LAST_OP_WAS_READ - - - - - - with m.If(self.bus.fault == 0): - # read index update: - with m.If(self.bus.ready_in == 1): - with m.If(read_ptr + self.out_width >= len_storage): - m.d.sync += read_ptr.eq(read_ptr + self.out_width - len_storage) - with m.Else(): - m.d.sync += read_ptr.eq(read_ptr + self.out_width) - - # read-out bits case analysis: - with m.If(read_ptr + self.out_width <= len_storage): - m.d.comb += self.bus.data_out.eq(storage.bit_select(read_ptr, self.out_width)) - with m.Else(): - with m.Switch(read_ptr): - for cand_rptr in range(len_storage - self.out_width + 1, len_storage): - with m.Case(cand_rptr): - m.d.comb += loop.eq(1) - non_wrapping_bitwidth = len_storage - cand_rptr - wrapping_bitwidth = self.out_width + cand_rptr - len_storage - m.d.comb += self.bus.data_out.eq(Cat( - storage.bit_select(cand_rptr, non_wrapping_bitwidth), # the non-wrapping bits are less-significant - storage.bit_select(0, wrapping_bitwidth) # ...than the wrapping bit - )) - - # write index update: - with m.If(0 == 1): - with m.If(write_ptr + self.in_width <= len_storage): - m.d.sync += write_ptr.eq(write_ptr + self.in_width - len_storage) - with m.Else(): - m.d.sync += write_ptr.eq(write_ptr + self.in_width) - - # actually write-in the bits - - with m.If(write_ptr + self.out_width <= len_storage): - m.d.comb += self.bus.data_out.eq(storage.bit_select(read_ptr, self.out_width)) - with m.Else(): - with m.Switch(write_ptr): - for cand_wptr in range(len_storage - self.out_width + 1, len_storage): - with m.Case(cand_wptr): - m.d.comb += loop.eq(1) - non_wrapping_bitwidth = len_storage - cand_wptr - wrapping_bitwidth = self.out_width + cand_wptr - len_storage - m.d.comb += self.bus.data_out.eq(Cat( - storage.bit_select(cand_wptr, non_wrapping_bitwidth), # the non-wrapping bits are less-significant - storage.bit_select(0, wrapping_bitwidth) # ...than the wrapping bit - )) - - - #m.d.sync += Assert(read_ptr == 0) - - #m.d.comb += Assume((self.bus.data_in == 3) |( self.bus.data_in==5)) + with m.Else(): # upstream, no downstream + m.d.sync += valid_bit_count.eq(valid_bit_count + upstream_width) + # we shift in bits on the high side of the shift register + m.d.sync += shift_reg.eq((self.bus.data_in << valid_bit_count) | shift_reg) + with m.Else(): + with m.If(downstream_transaction == 1): # no upstream, downstream + m.d.sync += valid_bit_count.eq(valid_bit_count - downstream_width) + # we get rid of the low bits that go out the bus during this transaction + m.d.sync += shift_reg.eq(shift_reg[downstream_width:]) return m @@ -289,7 +96,7 @@ class IdempotentGearbox(Elaboratable): def __init__(self, *, width): self.width = width - self.bus = GearboxBus(in_width=width, out_width=width) + self.bus = GearboxBus(upstream_width=width, downstream_width=width) def elaborate(self, platform): m = Module() @@ -314,14 +121,14 @@ class IdempotentGearbox(Elaboratable): # and also the arbitrary ratio gearbox. class GoldenGearboxModel(Elaboratable): - def __init__(self, *, in_width, out_width, sim_memory_size): - self.in_width = in_width - self.out_width = out_width + def __init__(self, *, upstream_width, downstream_width, sim_memory_size): + self.upstream_width = upstream_width + self.downstream_width = downstream_width self.sim_memory_size = sim_memory_size self.memory = Signal(sim_memory_size) - self.bus = GearboxBus(in_width=in_width, out_width=out_width) + self.bus = GearboxBus(upstream_width=upstream_width, downstream_width=downstream_width) def elaborate(self, platform): m = Module() @@ -329,7 +136,7 @@ class GoldenGearboxModel(Elaboratable): read_ptr = Signal(range(self.sim_memory_size)) with m.If(self.bus.valid_in == 1): - m.d.sync += write_ptr.eq(write_ptr + self.in_width) + m.d.sync += write_ptr.eq(write_ptr + self.upstream_width) with m.Switch(write_ptr): for cand_wptr in range(0, self.sim_memory_size): with m.Case(cand_wptr): @@ -339,6 +146,9 @@ class GoldenGearboxModel(Elaboratable): Cat(self.memory.bit_select(0, cand_wptr), self.bus.data_in)) + with m.If(self.bus.ready_in == 1): + m.d.comb += self.bus.data_out.eq(self.memory.bit_select(read_ptr,self.downstream_width)) + m.d.sync += read_ptr.eq(read_ptr + self.downstream_width) return m @@ -357,14 +167,17 @@ class DummyPlug(Elaboratable): #m.d.comb += gearbox.bus.ready_in.eq(AnySeq(1)) #m.d.comb += gearbox.bus.valid_in.eq(AnySeq(1)) - m.submodules.gearbox = gearbox = GoldenGearboxModel(in_width=3, out_width=6, sim_memory_size=16) + m.submodules.gearbox = gearbox = ArbitraryGearbox(upstream_width=4, downstream_width=2) #, sim_memory_size=16) counter = Signal(8) m.d.sync += counter.eq(counter+1) - with m.If(counter%2 == 0): - m.d.comb += gearbox.bus.valid_in.eq(1) + #with m.If(counter%4 = ): + m.d.comb += gearbox.bus.downstream_ready_in.eq(1) + + + m.d.comb += gearbox.bus.upstream_valid_in.eq(1) - m.d.comb += gearbox.bus.data_in.eq(0xf) + m.d.comb += gearbox.bus.data_in.eq(0xe) return m