diff --git a/CFGBoltzmann.py b/CFGBoltzmann.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed0d9953308971b84639b3f2207a87fef3c5100f
--- /dev/null
+++ b/CFGBoltzmann.py
@@ -0,0 +1,503 @@
+import collections
+import functools
+import random
+from enum import *
+
+
+CFG = collections.namedtuple("CFG", "nonterminals terminals rules start")
+# We define a context-free grammar as:
+# 1. A list of nonterminal symbols
+# 2. A list of terminal symbols
+# 3. A list of production rules
+# 4. A start symbol.
+
+# Each symbol is defined by a positive integer.
+
+
+# For the ETF expression grammar:
+
+# Terminal symbols are:
+# INTEGER, ADDOP, MULTOP, OPENPAREN, CLOSEPAREN
+
+# Nonterminal symbols are:
+# EXPRESSION, TERM, FACTOR
+
+# The start symbol is EXPRESSION
+
+# The rules are:
+
+# EXPRESSION -> EXPRESSION ADDOP TERM
+# EXPRESSION -> TERM
+# TERM -> TERM MULTOP FACTOR
+# TERM -> FACTOR
+# FACTOR -> OPENPAREN EXPRESSION CLOSEPAREN
+# FACTOR -> INTEGER
+
+#symbols = Enum("Symbols", "EXPRESSION TERM FACTOR INTEGER ADDOP MULTOP OPENPAREN CLOSEPAREN")
+
+nonterminals = Enum("Nonterminals", "EXPRESSION TERM FACTOR")
+
+terminals = Enum("Terminals", "INTEGER ADDOP MULTOP OPENPAREN CLOSEPAREN")
+
+list_of_nonterminals = [e for e in nonterminals]
+list_of_terminals    = [e for e in terminals]
+
+start = nonterminals.EXPRESSION
+
+# The rules are a list of tuples, each of which represents a rule, as follows:
+
+# (Nonterminal symbol, [ordered list of symbols (terminal or nonterminal) that are reduced to aforementioned nonterminal])
+
+rules = [
+         (nonterminals.EXPRESSION, [nonterminals.EXPRESSION, terminals.ADDOP,      nonterminals.TERM      ]),
+         (nonterminals.EXPRESSION, [nonterminals.TERM                                              ]),
+         (nonterminals.TERM,       [nonterminals.TERM,       terminals.MULTOP,     nonterminals.FACTOR    ]),
+         (nonterminals.TERM,       [nonterminals.FACTOR                                            ]),
+         (nonterminals.FACTOR,     [terminals.OPENPAREN,  nonterminals.EXPRESSION, terminals.CLOSEPAREN]),
+         (nonterminals.FACTOR,     [terminals.INTEGER                                           ])
+         ]
+
+
+
+# Once we have a CFG's definition, we can use Boltzmann sampling in order to generate random strings
+# from that CFG.
+
+# We use the "sandwich" technique in https://notebook.drmaciver.com/posts/2020-07-11-10:49.html
+# to estimate the Boltzmann distribution generating function. However, to enumerate the number of
+# words in the grammar with length n, we use the technique described in:
+#
+# "Generating Strings at Random from a Context Free Grammar" by Bruce McKenzie, 1997
+
+# There is the more effective method in "Automating Grammar Comparison", where the authors
+# devise a slightly more advanced scheme that uses the Cantor pairing function to
+# encode the parse tree of each generated string in a unitary index. I do not think I need
+# to resort to such a technique just yet.
+
+# The notation is therefore as follows from that paper:
+
+# The paper uses the notation ||X||_n to mean "the number of strings of length n generated by
+# the symbol X", but since this is text and not LaTeX, we use the notation "||X||==n" to mean
+# the same.
+
+# Nonterminals are indexed by i, i = [1, number of nonterminals]
+# Each nonterminal may have multiple ways of generating it. For each nonterminal i,
+# there may be multiple rules that create it. For each i, there is an index j of these
+# multiple ways.
+
+# Production rules are thus indexed by:
+# (i = [1, number of nonterminals], j = [1, number of ways to generate nonterminal i]).
+
+# Each production rule is represented by α_ij = X_ij1, X_ij2, ..., X_ijT, with T=T_ij
+# representing the number of symbols in the reduce rule (i, j)
+
+# We have two functions, and we use memoization in order to evaluate them without
+# pathological cost.
+
+
+# The first function, f(i, N), which we will call Fzero, represents the number
+# of strings with length exactly N that can be generated by the expansion of
+# nonterminal number i. It is a list of integers, with one element for each 
+# production rule that generates nonterminal i
+
+# f(i, N) = [||α_ij||==N for j = [1, number of production rules for nonterminal i]]
+
+
+# The second function, f'(i, j, k, N), which we will call Fprim, represents the
+# number of strings with length exactly N that can be generated by the
+# last k symbols of the specific production rule (i, j).
+
+# We define it as follows, which leads to a simple "peeling off" way of evaluating it
+# by induction. It is a list, with one element for each way to "split" the N characters
+# between the first symbol (the k'th symbol in the entire rule) in the relevant subset
+# of the rule and the k+1'th symbol to the last symbol of the rule:
+
+# f'(i, j, k, N) = [ (||X_ijk|| == L) * (||X_ij(k+1) X_ij(k+2)...X_ijT|| == N-L) for L = [1, N-T_ij+k]]
+
+# The * is an ordinary multiplication, not a Kleene star. We note that this looks like
+# a convolution.
+
+# The "ways to split the length N" is N-T_ij+k because the *entire production rule* has
+# T_ij symbols, the smaller subset from k+1 to T_ij has T_ij-k symbols. Therefore,
+# this smaller subset will *always* generate a list of terminals with length T_ij-k or
+# greater (because we require that there be no epsilon-productions).
+
+
+# To evaluate Fzero and Fprim, we define them in terms of each other, analyze cases, and
+# use memoization:
+
+# Fzero(i, N) = [sum(Fprim(i, j, 1, N)) for all possible j (j = [1, number of rules for nonterminal i])]
+# we index arrays starting at zero. like everyone else, so the 1 is gonna be a zero in the code.
+
+# There are two types of special cases for Fprim(i, j, k, N), and they can both occur at
+# the same time . We use these special cases to evaluate Fprim efficiently:
+
+# Special case 1: k is T_ij, and thus x_ijk is the last symbol in the production rule.
+# Special case 2: The symbol x_ijk is a terminal symbol.
+
+# The exhaustive case analysis is as follows.
+
+#                        /----------------------------------------------------------\
+#                        |X_ijk is a terminal symbol | X_ijk is a nonterminal symbol|
+#                        |---------------------------|------------------------------|
+# X_ijk is the last      |        Case A             |         Case C               |
+# symbol in production   |                           |  Reduces to analysis for the |
+# rule (i,j)             | Extremely easy base case  |  production rule for the     |
+#                        |                           |  nonterminal @ X_ijk         |
+#------------------------|---------------------------|------------------------------|
+# X_ijk is not the last  |        Case B             |         Case D               |
+# symbol in the          | A terminal always has len | The difficult case. We must  |
+# production rule (i,j)  | 1 and no possibilities so | evaluate the full convolution|
+#                        | this reduces to           | for both X_ijk and remaining |
+#                        | Fprim(i, j, k+1, N-1)     | symbols in the subset of rule|
+#                        \----------------------------------------------------------/
+
+# We also note that for the degenerate case N=0, we always can and must return the empty array []
+
+# In pseudocode (taken from the paper, with the correction that on top of page 5
+# the "if N==0 then return [1]" seems like it should be "if N==1 then return [1]"):
+
+# Fprim(i, j, k, N):
+# if (N==0) return []
+# if X_ijk is a terminal:
+#    if k==T_ij:
+#        # CASE A
+#        if N==1 return [1], else return [0]
+#    if k != T_ij:
+#        # CASE B
+#        return [sum(Fprim(i, j, k+1, N-1))]
+# else if X_ijk is a nonterminal:
+#    if k=T_ij:
+#        # CASE C
+#        return [sum(Fzero(index of nonterminal at index k, N))]
+#    if k != T_ij
+#        # CASE D
+#        return [sum(Fzero(index of nonterminal at index k, L)) *
+#                sum(Fprim(i, j, k+1, N-L)) for L = [1, N - T_ij + k]]
+
+
+# First, we preprocess the grammar by grouping all the rules for each nonterminal together.
+
+
+class CFGBoltzmann:
+    def __init__(self, rules, nonterminals, terminals):
+        self.unitary_rules = rules
+        self.nonterminals = nonterminals
+        self.terminals = terminals
+
+    def preprocessor(self):
+        unitary_rules = self.unitary_rules
+        factorized_rulepack = {}
+
+        for rule in unitary_rules:
+            nonterminal = rule[0]
+            rhs         = rule[1]
+            if (nonterminal not in factorized_rulepack):
+                factorized_rulepack[nonterminal] = [rhs]
+            else:
+                factorized_rulepack[nonterminal].append(rhs)
+
+
+        rulepack_list = []
+        list_of_terminals_in_sync_with_rulepack = []
+        for x in factorized_rulepack:
+            rulepack_list.append((x, factorized_rulepack[x]))
+            list_of_terminals_in_sync_with_rulepack.append(x)
+        self.nonterminals_ordered = list_of_terminals_in_sync_with_rulepack
+ #       print("ORDERED nonterminal list is", self.nonterminals_ordered)
+
+        self.processed_rulepack = rulepack_list
+
+    # We recall that:
+
+    # Fzero(i, N) = [sum(Fprim(i, j, 1, N)) for all possible j (j = [1, number of rules for nonterminal i])]
+
+    @functools.lru_cache(maxsize=8192)
+    def Fzero(self, nonterminal_index, exact_length_total):
+        # First find the nonterminal in the rulepack
+
+        possible_RHSes = self.processed_rulepack[nonterminal_index][1]
+      #  print(possible_RHSes)
+        assert(possible_RHSes != [])
+
+        retlist = []
+
+        for rhs_index in range(len(possible_RHSes)):
+#            print("IN Fzero, trying rhs_index", rhs_index)
+            retlist.append(sum(self.Fprim(nonterminal_index, rhs_index, 0, exact_length_total))) # we index arrays starting at zero. like everyone else.
+#        print ("Fzero returns", retlist)
+        return retlist
+
+
+
+    # Fprim is where the complicated case analysis lives.
+
+    @functools.lru_cache(maxsize=8192)
+    def Fprim(self, nonterminal_index, which_RHS, how_far_into_the_RHS, exact_length_total):
+        #print("arguments are", nonterminal_index, which_RHS, how_far_into_the_RHS, exact_length_total)
+
+        # first, handle the ultimate degenerate case:
+        if (exact_length_total == 0):
+            #print("LENGTH ZERO RETURNING []")
+            return []
+
+        # The case analysis hinges on what is at X_ijk.
+
+        RHS_in_question = self.processed_rulepack[nonterminal_index][1][which_RHS]
+        #print("Our RHS is", RHS_in_question)
+        #print("WE ARE PROCESSING index", how_far_into_the_RHS, "and our remaining lengthis", exact_length_total)
+
+        xijk = RHS_in_question[how_far_into_the_RHS]
+
+        if (xijk in self.terminals):
+         #   print("xijk", xijk, "is a terminal, hokay")
+        #    print("lenghts of RHS In question is", len(RHS_in_question))
+
+            # are we at the end of the rule
+            if (how_far_into_the_RHS == (len(RHS_in_question) - 1)):
+                # CASE A
+                if (exact_length_total == 1):
+#                    print("CASE A LEN 1")
+#                    print ("Fprim returns SPECIAL CASE", [1])
+                    return [1]
+                else:
+ #                   print("CASE A LEN NON-ONE")
+ #                   print ("Fprim returns BAD SPECIAL CASE", [0])
+                    return [0]
+            else:
+                # CASE B
+  #              print("CASE B")
+                reduct = self.Fprim(nonterminal_index, which_RHS, how_far_into_the_RHS + 1, exact_length_total - 1)
+                retlist = [sum(reduct)]
+   #             print ("Fprim returns", retlist)
+
+                return retlist
+
+
+        else:
+            assert (xijk in self.nonterminals)
+       #     print("xijk", xijk, "is a NONterminal!")
+       #     print("lenghts of RHS In question is", len(RHS_in_question))
+            #print("how far?", how_far_into_the_RHS)
+            ## we now calculate the index of nonterminal at index K, we need it for both cases
+            new_nonterminal_index = self.nonterminals_ordered.index(xijk)
+           # print("NEW NONTERMINAL INDDEX IS", new_nonterminal_index)
+
+            if (how_far_into_the_RHS == (len(RHS_in_question) - 1)):
+                # CASE C
+    #            print("CASE C")
+                reduct = self.Fzero(new_nonterminal_index, exact_length_total)
+                retlist = [sum(reduct)]
+     #           print ("Fprim returns", retlist)
+                return retlist
+
+            else:
+                # CASE D
+                # Here we must do the full convolution to account for every
+                # possible way that the total length of the desired string can
+                # be split between the nonterminal at X_ijk and the remaining
+                # segment of the rule (from k+1 to the end of the rule)
+
+                # Here, L will be the number of characters produced by X_ijk
+                # L will therefore go from 1 (we disallow epsilon-productions so
+                # X_ijk must produce at least one character) (INCLUSIVE) to (INCLUSIVE):
+                # Total Characters Requested - (minimum number of characters generated by k+1 to end of rule)
+                # which will be:
+                # Total Characters Requested - (T_ij -(k+1)+1) = TCR - (T_ij-k)
+                # But T_ij is the INDEX of the last element of the rule, aka len(rule)-1
+                # so the expression is
+                # Total Characters Requested - (len(rule)-k-1)
+                # exact_length_total - len(rule) + how_far_into_the_RHS + 1
+
+    #            print("CASE D")
+                retlist = []
+                #print(exact_length_total - len(RHS_in_question) + how_far_into_the_RHS + 2)
+                for l in range(1, exact_length_total - len(RHS_in_question) + how_far_into_the_RHS + 2):
+     #               print("L IS", l)
+                    nonterminal_possibilities = sum(self.Fzero(new_nonterminal_index, l))
+                    remainder_possibilities   = sum(self.Fprim(nonterminal_index, which_RHS, how_far_into_the_RHS + 1, exact_length_total - l))
+                    retlist.append(nonterminal_possibilities * remainder_possibilities)
+      #          print ("Fprim returns", retlist)
+                return retlist
+
+
+    # Now we do generation of the strings.
+
+    # This returns an *index* (not the item at the index) from a list, weighted
+    # by the integer at each element.
+
+    def normalized_choice(self, inlist):
+        #total_weight = sum(inlist)
+        #weights = [x / total_weight for x in inlist]
+        choice = random.choices(range(len(inlist)), weights=inlist)
+        return choice[0]
+
+
+    # Similar to Fzero and Fprim, we have Gzero and Gprim.
+
+    # the first function, Gzero(nonterminal index, requested length) serves only to
+    # select (weighted appropriately) which production rule for a nonterminal will
+    # be used. It then calls out to GPrim, which actually generates the string.
+
+    def Gzero_shimmed(self, nonterminal, requested_length):
+        nonterminal_index = self.nonterminals_ordered.index(nonterminal)
+        return self.Gzero(nonterminal_index, requested_length, 0)
+    def Gzero(self, nonterminal_index, requested_length, depth):
+        print("    "* depth +"ENTERING Gzero")
+        possibilities = self.Fzero(nonterminal_index, requested_length)
+        chosen_production = self.normalized_choice(possibilities)
+        generated_string = self.Gprim(nonterminal_index, chosen_production, 0, requested_length, depth)
+
+        return generated_string
+
+    # Like Fprim, Gprim takes a nonterminal index, a production index for the nonterminal,
+    # and an index into the production rule (and of course, a requested length).
+    # This lets us walk through the production rule symbol by symbol.
+
+    # As in Fprim, there is a case analysis based on if the index represents the last
+    # symbol in the rule, and if the index points to a terminal or to a nonterminal.
+
+    # There are two types of special cases for Fprim(i, j, k, N), and they can both occur at
+    # the same time . We use these special cases to evaluate Fprim efficiently:
+
+    # Special case 1: k is T_ij, and thus x_ijk is the last symbol in the production rule.
+    # Special case 2: The symbol x_ijk is a terminal symbol.
+
+    # The exhaustive case analysis is as follows.
+
+    #                        /----------------------------------------------------------\
+    #                        |X_ijk is a terminal symbol | X_ijk is a nonterminal symbol|
+    #                        |---------------------------|------------------------------|
+    # X_ijk is the last      |        Case A             |         Case C               |
+    # symbol in production   |                           | Reduces to Gzero for the     |
+    # rule (i,j)             |  The easiest base case.   | production rule for the      |
+    #                        |                           | nonterminal @ X_ijk          |
+    #------------------------|---------------------------|------------------------------|
+    # X_ijk is not the last  |        Case B             |         Case D               |
+    # symbol in the          | A terminal always has len | Like the convolution, but use|
+    # production rule (i,j)  | 1 and no possibilities so | Fprim and weighted-choice on |
+    #                        | this reduces to           | [X_ijk, last symbol in the   |
+    #                        | Gprim(i, j, k+1, N-1)     | production rule] to find out |
+    #                        |                           | how many symbols we're having|
+    #                        |                           | X_ijk generate. Then we use  |
+    #                        |                           | Gzero on X_ijk and Gprim on  |
+    #                        |                           | (k+1) to end of rule         |
+    #                        \----------------------------------------------------------/
+
+
+    def Gprim(self, nonterminal_index, chosen_production, how_far_into_the_RHS, exact_length_total, depth):
+        print("    "* depth + "GPRIM arguments are", nonterminal_index, chosen_production, how_far_into_the_RHS, exact_length_total)
+
+        # first, handle the ultimate degenerate case:
+        assert(exact_length_total != 0)
+
+        # The case analysis hinges on what is at X_ijk.
+
+        RHS_in_question = self.processed_rulepack[nonterminal_index][1][chosen_production]
+        print("    "* depth +"Our RHS is", RHS_in_question)
+        print("    "* depth +"WE ARE PROCESSING index", how_far_into_the_RHS, "and our remaining lengthis", exact_length_total)
+
+        xijk = RHS_in_question[how_far_into_the_RHS]
+
+        if (xijk in self.terminals):
+         #   print("xijk", xijk, "is a terminal, hokay")
+        #    print("lenghts of RHS In question is", len(RHS_in_question))
+
+            # are we at the end of the rule
+            if (how_far_into_the_RHS == (len(RHS_in_question) - 1)):
+                # CASE A
+                print("    "* depth +"GPRIM CASE A RETURNING", [xijk])
+                return [xijk]
+            else:
+                # CASE B
+                print("    "* depth +"GPRIM CASE B pointing @", [xijk])
+                reduct = self.Gprim(nonterminal_index, chosen_production, how_far_into_the_RHS + 1, exact_length_total - 1, depth+1)
+                retstring = [xijk] + reduct
+                print("    "* depth +"GPRIM CASE B RETURNING", retstring)
+                return retstring
+
+
+        else:
+            assert (xijk in self.nonterminals)
+       #     print("xijk", xijk, "is a NONterminal!")
+       #     print("lenghts of RHS In question is", len(RHS_in_question))
+            #print("how far?", how_far_into_the_RHS)
+            ## we now calculate the index of nonterminal at index K, we need it for both cases
+            new_nonterminal_index = self.nonterminals_ordered.index(xijk)
+           # print("NEW NONTERMINAL INDDEX IS", new_nonterminal_index)
+
+            if (how_far_into_the_RHS == (len(RHS_in_question) - 1)):
+                # CASE C
+                print("    "* depth +"CASE C STARTING")
+                retstring = self.Gzero(new_nonterminal_index, exact_length_total, depth+1)
+                print("    "* depth +"CASE C returning", retstring)
+                return retstring
+
+            else:
+                # CASE D
+
+                print("    "* depth +"CASE D STARTING")
+
+
+                splitting_possibilities = self.Fprim(nonterminal_index, chosen_production, how_far_into_the_RHS, exact_length_total)
+                print ("    "* depth +"CASE D; SPLIT POSSIBILITIES ARE", splitting_possibilities)
+
+                split_choice = 1+ self.normalized_choice(splitting_possibilities)
+                print ("    "* depth +"CASE D; SPLITTING WITH LENGTH", split_choice)
+
+                # hit the nonterminal X_ijk with Gzero
+
+                nonterminal_generates = self.Gzero(new_nonterminal_index, split_choice, depth+1)
+                print ("    "* depth +"CASE D NONTERMINAL @ XIJK generates", nonterminal_generates)
+
+                # and then the remaining part of the rule
+
+                rest_of_rule_generates = self.Gprim(nonterminal_index, chosen_production, how_far_into_the_RHS + 1, exact_length_total - split_choice, depth+1)
+                print ("CASE D REST OF RULE generates", rest_of_rule_generates)
+
+                retstring = nonterminal_generates + rest_of_rule_generates
+                print ("    "* depth +"CASE D RETURNING", retstring)
+                return retstring
+
+
+
+
+
+
+z = CFGBoltzmann(rules, list_of_nonterminals, list_of_terminals)
+
+rulepack_cooked = z.preprocessor()
+
+
+qq =  z.Gzero_shimmed(nonterminals.EXPRESSION, 11)
+print ("AND THE FINAL ANSWER IS","\n\n\n",qq, "\n\n\n")
+
+# Furthermore, we also note that the description of a context-free grammar is *itself* context-free
+# so if we take the CFG-description grammar (BNF or something isomorphic to it), and use Boltzmann
+# sampling on *it*, we will generate candidate grammars; which will be used to test the FPGA-based
+# parser generator against a reference LR parser generator implementation. Naturally, this procedure 
+# may not produce LR-parseable grammars or even deterministic context-free grammars; but if the grammar
+# is rejected by both the FPGA parser generator and reference parser generator; this is fine, we just
+# move on to the next one.
+
+
+# Once we randomly generate a CFG, we randomly generate strings that are valid strings, but also
+# generate exemplars that are potentially *invalid* strings (both by mutating valid strings but
+# also by generating random sequences ab initio from the terminal symbols). The potentially invalid
+# strings are tested against the reference parser, those that are accidentally valid are discarded,
+# and those that are invalid are used in the automatic equivalence testing.
+
+
+# We fail our candidate parser generator if:
+#
+# 1. During the generation phase, it rejects a grammar the reference generator doesn't reject, or
+#    accepts a grammar the reference generator rejects.
+
+# 2. During the execution phase, it rejects a string the reference generated parser doesn't reject,
+#    or accepts a string the reference generated parser rejects.
+
+# 3. During the execution phase, it accepts a string that the reference generated parser accepts; but
+#    the parse tree it generates is not identical to the parse tree that the reference generated parser
+#    produced.
+
+
+