From 0fcb92480a1fe8b8478c74e3bc1d36d3f460d264 Mon Sep 17 00:00:00 2001
From: Kia <kia@special-circumstanc.es>
Date: Sun, 31 Jan 2021 12:20:09 -0700
Subject: [PATCH] unoptimized_lr/simple_parse_tree.py

---
 unoptimized_lr/simple_parse_tree.py | 221 ++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 unoptimized_lr/simple_parse_tree.py

diff --git a/unoptimized_lr/simple_parse_tree.py b/unoptimized_lr/simple_parse_tree.py
new file mode 100644
index 0000000..700af89
--- /dev/null
+++ b/unoptimized_lr/simple_parse_tree.py
@@ -0,0 +1,221 @@
+# An example parse tree looks like:
+
+#                       nonterminal A                   
+#                      *      |      *                  
+#                     *       |       *                 
+#                    *    terminal K   *                
+#                   *                   *               
+#            nonterminal B          nonterminal D       
+#           /          *                   *            
+#          /            *                   *           
+#         /           nonterminal C          *          
+#        /               \                    *         
+#       /                 \                  terminal L 
+#    terminal I            \                            
+#                      terminal J                       
+
+MAKE VERTICAL SPACING GOOD  and PROPER 
+
+# As it is parsed, we serialize it on the fly. The serialization scheme represents the tree
+# as a sequence of records, each of which corresponds to a reduction. In other words, each
+# nonterminal appearing in the parse tree will have a corresponding record.
+#
+# The list of records, in order, is therefore as follows:
+#
+# nonterminal C -> terminal J
+# nonterminal B -> terminal I, nonterminal C
+# nonterminal D -> terminal L
+# nonterminal A -> nonterminal B, terminal K, nonterminal D
+
+# We note that for every nonterminal on the right side of a record, that nonterminal has already
+# appeared on the left side of a previous record. This is because of how a bottom-up parse proceeds.
+
+# Each serialized parse tree record looks like this:
+#
+#        FIELD 0               FIELD 1                FIELD 2                 FIELD 3                      FIELD N                 FIELD N+1
+# [Nonterminal symbol] [N = number of subnodes] [tag bit 1, subnode 1] [tag bit 2, subnode 2] ... [tag bit N-1, subnode N-1] [tag bit n, subnode N]
+
+# Field 0 indicates the symbol number of the nonterminal created by the reduction. Its bitwidth is
+# log_2(number of nonterminals).
+
+# The number of subnodes, N, is represented as an unsigned integer. To determine its bitwidth, we
+# examine all the language rules, figure out which is the rule with the most symbols
+# (terminal and nonterminal alike) on its right-hand-side, and take the log_2 of that.
+
+# bitwidth(N) = log_2(max(len(RHS of rule 1), len(RHS of rule 2), len(RHS of rule 3), ... )))
+
+# There are N slots for subnodes following N, and each slot contains a tag bit and a subnode identifier:
+
+# If the subnode is a terminal, the symbol/token number of that terminal is placed in the corresponding
+# slot in the record, and the tag bit is set to zero.
+
+# If the subnode is a nonterminal, the *index* of the record that *created* that nonterminal is placed in
+# the corresponding slot in the record, and the tag bit is set to one.
+
+# Therefore, the length of each subnode slot is:
+# 1 + log_2(max(maximum possible number of emitted records per parse, number of terminals))
+
+
+# For the above example parse tree, the emitted records would be:
+
+# record index: concise form                 verbose form
+# record 0:     [C] [1] [0 J]                [nonterminal C] [1 subnode ] [terminal J]
+# record 1:     [B] [2] [0 I] [1 0]          [nonterminal B] [2 subnodes] [terminal I] [nonterminal at record 0]
+# record 2:     [D] [1] [0 L]                [nonterminal D] [1 subnode ] [terminal L]
+# record 3:     [A] [3] [1 1] [0 K] [1 2]    [nonterminal A] [3 subnodes] [nonterminal at record 1] [terminal K] [nonterminal at record 2]
+
+# It is possible to deterministically construct the corresponding parse tree from these records.
+
+# This serialization scheme has the advantage of not requiring read access to the
+# serialization memory, which allows for streaming of the parse tree to another component
+# on/outside the FPGA. In order to accomplish this, we maintain a "sideband" stack (managed
+# in parallel with the primary parse stack) where indices for nonterminals are kept.
+# These indices allow "stitching" of the individual records into a coherent parse tree.
+
+# Every edge in the above parse tree that is represented with "*" is an edge that needs to
+# be stitched in this way.
+
+
+
+
+class TreeSerializer(Elaboratable):
+    def __init__(self, *, item_width, indices_width, stack_depth, serialized_tree_length):
+        # Parameters
+        self.item_width    = item_width
+        self.stack_depth   = stack_depth
+        self.indices_width = indices_width
+        self.mem_address_width = indices_width
+        self.serialized_tree_length = serialized_tree_length
+
+        # inputs
+
+        # Activated only once per reduction
+        self.start_reduction             = Signal(1)
+        # When ^ goes high, the three below are registered
+
+        self.number_to_pop               = Signal(range(stack_depth))
+        self.reduce_rule_number          = Signal(item_width)
+        self.item_created_by_reduce_rule = Signal(item_width)
+
+        # Varies with every popped item
+        self.destroyed_item_valid_in        = Signal(1)
+        self.destroyed_item_in              = Signal(item_width)     # off main stack
+        self.destroyed_item_index_in        = Signal(indices_width)  # off side stack
+
+        # outputs
+        # output to state machine
+        self.ready_out                   = Signal(1)
+        self.internal_fault              = Signal(1)
+
+        self.serialized_index            = Signal(indices_width)    # push *this* onto side stack for the
+                                                                    # newly created item
+        # interface with serialization memory
+        self.memory_write_port           = Signal(item_width + 1)
+        self.memory_address_port         = Signal(self.mem_address_width)
+        self.memory_write_enable         = Signal(1)
+
+
+        self.mem = Memory(width=(self.item_width + 1), depth=serialized_tree_length)
+
+    def elaborate(self, platform):
+        m = Module()
+
+        start_of_record = Signal(self.mem_address_width) # start of next/yet-unwritten node record, advanced only
+                                                         # after each reduce
+
+        m.submodules.parse_tree = wport = (self.mem).write_port()
+
+
+
+        m.d.comb += wport.en.eq(self.memory_write_enable),
+        m.d.comb += wport.addr.eq(self.memory_address_port),
+        m.d.comb += wport.data.eq(self.memory_write_port)
+
+
+        # Per-reduce registered signals:
+        number_of_children          = Signal(range(self.stack_depth))
+
+        reduce_rule_number          = Signal(self.item_width)
+        item_created_by_reduce_rule = Signal(self.item_width)
+
+        # incremented each cycle
+        number_written              = Signal(range(self.stack_depth))
+        m.d.comb += self.serialized_index.eq(start_of_record)
+
+
+        with m.FSM() as fsm:
+            with m.State("INITIALIZE"):
+                m.d.comb += self.ready_out.eq(0)
+                m.d.comb += self.internal_fault.eq(0)
+
+                m.d.sync += start_of_record.eq(0)
+                m.d.sync += number_written.eq(0)
+
+                m.next="NODE"
+
+
+            with m.State("NODE"):
+                m.d.comb += self.ready_out.eq(1)
+                m.d.comb += self.internal_fault.eq(0)
+
+                #m.d.sync += reduce_rule_number.eq(self.reduce_rule_number)
+                m.d.sync += item_created_by_reduce_rule.eq(self.item_created_by_reduce_rule)
+
+                with m.If(self.start_reduction == 1):
+                    with m.If(self.destroyed_item_index_in == 0):
+                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_in)
+                    with m.Else():
+                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_index_in)
+                    m.d.comb += self.memory_address_port.eq(start_of_record + 2)
+                    m.d.comb += self.memory_write_enable.eq(1)
+
+                    m.d.sync += number_of_children.eq(self.number_to_pop)
+                    m.d.sync += number_written.eq(1)
+
+                    m.next = "SUBNODES"
+
+                with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
+                    m.next = "ABORT"
+
+            with m.State("SUBNODES"):
+                m.d.comb += self.ready_out.eq(0)
+                m.d.comb += self.internal_fault.eq(0)
+
+                with m.If(self.destroyed_item_valid_in == 1):
+                    with m.If(self.destroyed_item_index_in == 0):
+                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_in)
+                    with m.Else():
+                        m.d.comb += self.memory_write_port.eq(self.destroyed_item_index_in)
+                    m.d.comb += self.memory_address_port.eq(start_of_record + 2 + number_written)
+                    m.d.comb += self.memory_write_enable.eq(1)
+
+                    m.d.sync += number_written.eq(number_written + 1)
+
+
+                with m.If(number_written == number_of_children):
+                    m.d.comb += self.memory_write_port.eq(item_created_by_reduce_rule)
+                    m.d.comb += self.memory_address_port.eq(start_of_record)
+                    m.d.comb += self.memory_write_enable.eq(1)
+
+                    m.next = "FIXUP"
+
+                with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
+                    m.next = "ABORT"
+
+            with m.State("FIXUP"):
+                    m.d.comb += self.ready_out.eq(0)
+                    m.d.comb += self.internal_fault.eq(0)
+
+                    m.d.comb += self.memory_write_port.eq(number_of_children)
+                    m.d.comb += self.memory_address_port.eq(start_of_record + 1)
+                    m.d.comb += self.memory_write_enable.eq(1)
+                    m.d.sync += start_of_record.eq(start_of_record + 2 + number_of_children)
+
+                    m.next = "NODE"
+
+                    with m.If(self.memory_address_port > (self.serialized_tree_length - 1)):
+                        m.next = "ABORT"
+
+            with m.State("ABORT"):
+                m.d.comb += self.ready_out.eq(0)
+                m.d.comb += self.internal_fault.eq(1)
-- 
GitLab