diff --git a/cfg_utils.py b/cfg_utils.py index 6b962ef8c7c461fd0744f11610021c18ea849c51..7ff8e074311d09d0f9016e08ba0d08acb3102978 100644 --- a/cfg_utils.py +++ b/cfg_utils.py @@ -44,20 +44,19 @@ CFG = collections.namedtuple("CFG", "nonterminals terminals rules start") # FACTOR -> OPENPAREN EXPRESSION CLOSEPAREN # FACTOR -> INTEGER -terminals = Enum("Terminals", "INTEGER ADDOP MULTOP OPENPAREN CLOSEPAREN") -nonterminals = Enum("Nonterminals", "EXPRESSION TERM FACTOR") +symbols = Enum("Symbols", "EXPRESSION TERM FACTOR INTEGER ADDOP MULTOP OPENPAREN CLOSEPAREN") start = nonterminals.EXPRESSION # The rules are a list of tuples, each of which represents a rule, as follows: # (Nonterminal symbol, [ordered list of symbols (terminal or nonterminal) that are reduced to aforementioned nonterminal]) -rules = [(nonterminals.EXPRESSION, [nonterminals.EXPRESSION, terminals.ADDOP, nonterminals.TERM]), - (nonterminals.EXPRESSION, [nonterminals.TERM]), - (nonterminals.TERM, [nonterminals.TERM, terminals.MULTOP, nonterminals.FACTOR]), - (nonterminals.TERM, [nonterminals.FACTOR]), - (nonterminals.FACTOR, [terminals.OPENPAREN, nonterminals.EXPRESSION, terminals.CLOSEPAREN]), - (nonterminals.FACTOR, [terminals.INTEGER])] +rules = [(symbols.EXPRESSION, [symbols.EXPRESSION, symbols.ADDOP, symbols.TERM]), + (symbols.EXPRESSION, [symbols.TERM]), + (symbols.TERM, [symbols.TERM, symbols.MULTOP, symbols.FACTOR]), + (symbols.TERM, [symbols.FACTOR]), + (symbols.FACTOR, [symbols.OPENPAREN, symbols.EXPRESSION, symbols.CLOSEPAREN]), + (symbols.FACTOR, [symbols.INTEGER])] print(rules) @@ -67,6 +66,21 @@ print(rules) # from that CFG. +# This, however, is not Boltzmann sampling. + +# We examine all the leaves on the current derivationt tree. Those mark where we possibly could apply a rule. +# We then determine which leaves are amenable to rule application, and then we randomly select a leaf and for +# that leaf, we randomly select a rule, and apply it. + +# To avoid being accidentally quadratic, we cache a list of the leaves, and also cache the current length of +# the generated string (in order to do pseudo-Boltzmann sampling). + + +def next_stage(derivation_tree_with_cached, language_definition): + + + + # Furthermore, we also note that the description of a context-free grammar is *itself* context-free # so if we take the CFG-description grammar (BNF or something isomorphic to it), and use Boltzmann