from nmigen import *
from nmigen.hdl.rec import *
from nmigen.cli import main


# An example parse tree looks like:

#                            nonterminal A                          
#                          **      |   **                           
#                       **         |      **                        
#                    **            |         **                     
#                  *               |           *                    
#            nonterminal B    terminal K    nonterminal D           
#           /          *                                   *        
#          /            *                                   *       
#    terminal I       nonterminal C                      terminal L 
#                        \                                          
#                         \                                         
#                       terminal J                                  


# As it is parsed, we serialize it on the fly. The serialization scheme represents the tree
# as a sequence of records, each of which corresponds to a reduction. In other words, each
# nonterminal appearing in the parse tree will have a corresponding record.
#
# The list of records, in order, is therefore as follows:
#
# nonterminal C -> terminal J
# nonterminal B -> terminal I, nonterminal C
# nonterminal D -> terminal L
# nonterminal A -> nonterminal B, terminal K, nonterminal D

# We note that for every nonterminal on the right side of a record, that nonterminal has already
# appeared on the left side of a previous record. This is because of how a bottom-up parse proceeds.

# Each serialized parse tree record looks like this:
#
#        FIELD 0               FIELD 1                FIELD 2                 FIELD 3                      FIELD N                 FIELD N+1
# [Nonterminal symbol] [N = number of subnodes] [tag bit 1, subnode 1] [tag bit 2, subnode 2] ... [tag bit N-1, subnode N-1] [tag bit n, subnode N]

# Field 0 indicates the symbol number of the nonterminal created by the reduction. Its bitwidth is
# log_2(number of nonterminals).

# The number of subnodes, N, is represented as an unsigned integer. To determine its bitwidth, we
# examine all the language rules, figure out which is the rule with the most symbols
# (terminal and nonterminal alike) on its right-hand-side, and take the log_2 of that.

# bitwidth(N) = log_2(max(len(RHS of rule 1), len(RHS of rule 2), len(RHS of rule 3), ... )))

# There are N slots for subnodes following N, and each slot contains a tag bit and a subnode identifier:

# If the subnode is a terminal, the symbol/token number of that terminal is placed in the corresponding
# slot in the record, and the tag bit is set to zero.

# If the subnode is a nonterminal, the *index* of the record that *created* that nonterminal is placed in
# the corresponding slot in the record, and the tag bit is set to one.

# Therefore, the length of each subnode slot is:
# 1 + log_2(max(maximum possible number of emitted records per parse, number of terminals))


# For the above example parse tree, the emitted records would be:

# record index: concise form                 verbose form
# record 0:     [C] [1] [0 J]                [nonterminal C] [1 subnode ] [terminal J]
# record 1:     [B] [2] [0 I] [1 0]          [nonterminal B] [2 subnodes] [terminal I] [nonterminal at record 0]
# record 2:     [D] [1] [0 L]                [nonterminal D] [1 subnode ] [terminal L]
# record 3:     [A] [3] [1 1] [0 K] [1 2]    [nonterminal A] [3 subnodes] [nonterminal at record 1] [terminal K] [nonterminal at record 2]

# It is possible to deterministically construct the corresponding parse tree from these records.

# This serialization scheme has the advantage of not requiring read access to the
# serialization memory, which allows for streaming of the parse tree to another component
# on/outside the FPGA. In order to accomplish this, we maintain a "sideband" stack (managed
# in parallel with the primary parse stack) where indices for nonterminals are kept.
# These indices allow "stitching" of the individual records into a coherent parse tree.

# Every edge in the above parse tree that is represented with "*" is an edge that needs to
# be stitched in this way.


# The parameters for instantiating the tree serializer are as follows:
#
# parameters dependent only on the language:
#     * number of nonterminals
#     * number of terminals
#     * list of language rules
#
# parameters dependent on the use case:
#    * bitwidth of parse tree memory
#    * length of parse tree memory
#    * maximum possible number of emitted records per parse
#
# If the latter is not provided, a pessimistic estimate will be calculated:
# (number of bits in memory) / (number of bits to represent the smallest possible record)

# In general, the bitwidth of the record fields do not correspond to the bitwidth of memory available
# to the FPGA. In order to address this, we split up the records across multiple memory addresses as
# is necessary. This means that the records of the serialized parse tree won't necessarily be in any
# sort of alignment. If it is desired, we can add functionality to do alignment. But I do not think it
# is a big deal since this data structure anyway needs to be decoded from the start -- there's no random
# access possible.






class TreeSerializer(Elaboratable):
    def __init__(self, *, item_width, indices_width, stack_depth, serialized_tree_length):
        # Parameters
        self.item_width    = item_width
        self.stack_depth   = stack_depth
        self.indices_width = indices_width
        self.mem_address_width = indices_width
        self.serialized_tree_length = serialized_tree_length

        # inputs

        # Activated only once per reduction
        self.start_reduction             = Signal(1)
        # When ^ goes high, the three below are registered

        self.number_to_pop               = Signal(range(stack_depth))
        self.reduce_rule_number          = Signal(item_width)
        self.item_created_by_reduce_rule = Signal(item_width)

        # Varies with every popped item
        self.destroyed_item_valid_in        = Signal(1)
        self.destroyed_item_in              = Signal(item_width)     # off main stack
        self.destroyed_item_index_in        = Signal(indices_width)  # off side stack

        # outputs
        # output to state machine
        self.ready_out                   = Signal(1)
        self.internal_fault              = Signal(1)

        self.serialized_index            = Signal(indices_width)    # push *this* onto side stack for the
                                                                    # newly created item
        # interface with serialization memory
        self.memory_write_port           = Signal(item_width + 1)
        self.memory_address_port         = Signal(self.mem_address_width)
        self.memory_write_enable         = Signal(1)


        self.mem = Memory(width=(self.item_width + 1), depth=serialized_tree_length)

    def elaborate(self, platform):
        m = Module()

        start_of_record = Signal(self.mem_address_width) # start of next/yet-unwritten node record, advanced only
                                                         # after each reduce

        m.submodules.parse_tree = wport = (self.mem).write_port()



        m.d.comb += wport.en.eq(self.memory_write_enable),
        m.d.comb += wport.addr.eq(self.memory_address_port),
        m.d.comb += wport.data.eq(self.memory_write_port)


        # Per-reduce registered signals:
        number_of_children          = Signal(range(self.stack_depth))

        reduce_rule_number          = Signal(self.item_width)
        item_created_by_reduce_rule = Signal(self.item_width)

        # incremented each cycle
        number_written              = Signal(range(self.stack_depth))
        m.d.comb += self.serialized_index.eq(start_of_record)


        with m.FSM() as fsm:
            with m.State("INITIALIZE"):
                m.d.comb += self.ready_out.eq(0)
                m.d.comb += self.internal_fault.eq(0)

                m.d.sync += start_of_record.eq(0)
                m.d.sync += number_written.eq(0)

                m.next="NODE"


            with m.State("NODE"):
                m.d.comb += self.ready_out.eq(1)
                m.d.comb += self.internal_fault.eq(0)

                #m.d.sync += reduce_rule_number.eq(self.reduce_rule_number)
                m.d.sync += item_created_by_reduce_rule.eq(self.item_created_by_reduce_rule)

                with m.If(self.start_reduction == 1):
                    with m.If(self.destroyed_item_index_in == 0):
                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_in)
                    with m.Else():
                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_index_in)
                    m.d.comb += self.memory_address_port.eq(start_of_record + 2)
                    m.d.comb += self.memory_write_enable.eq(1)

                    m.d.sync += number_of_children.eq(self.number_to_pop)
                    m.d.sync += number_written.eq(1)

                    m.next = "SUBNODES"

                with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
                    m.next = "ABORT"

            with m.State("SUBNODES"):
                m.d.comb += self.ready_out.eq(0)
                m.d.comb += self.internal_fault.eq(0)

                with m.If(self.destroyed_item_valid_in == 1):
                    with m.If(self.destroyed_item_index_in == 0):
                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_in)
                    with m.Else():
                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_index_in)
                    m.d.comb += self.memory_address_port.eq(start_of_record + 2 + number_written)
                    m.d.comb += self.memory_write_enable.eq(1)

                    m.d.sync += number_written.eq(number_written + 1)


                with m.If(number_written == number_of_children):
                    m.d.comb += self.memory_write_port.eq(item_created_by_reduce_rule)
                    m.d.comb += self.memory_address_port.eq(start_of_record)
                    m.d.comb += self.memory_write_enable.eq(1)

                    m.next = "FIXUP"

                with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
                    m.next = "ABORT"

            with m.State("FIXUP"):
                    m.d.comb += self.ready_out.eq(0)
                    m.d.comb += self.internal_fault.eq(0)

                    m.d.comb += self.memory_write_port.eq(number_of_children)
                    m.d.comb += self.memory_address_port.eq(start_of_record + 1)
                    m.d.comb += self.memory_write_enable.eq(1)
                    m.d.sync += start_of_record.eq(start_of_record + 2 + number_of_children)

                    m.next = "NODE"

                    with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
                        m.next = "ABORT"

            with m.State("ABORT"):
                m.d.comb += self.ready_out.eq(0)
                m.d.comb += self.internal_fault.eq(1)