diff --git a/cfg_utils.py b/cfg_utils.py index bf4c9b52926ca6b5abf372288d519fc389bc26ad..705d893116313846c011db956b90becb8719ef2f 100644 --- a/cfg_utils.py +++ b/cfg_utils.py @@ -515,14 +515,51 @@ class CFGBoltzmann: # Similar to Fzero and Fprim, we have Gzero and Gprim. + # the first function, Gzero(nonterminal index, requested length) serves only to + # select (weighted appropriately) which production rule for a nonterminal will + # be used. It then calls out to GPrim, which actually generates the string. + def Gzero(self, nonterminal_index, requested_length): possibilities = self.Fzero(nonterminal_index, requested_length) chosen_production = self.normalized_choice(possibilities) generated_string - self.Gprim(nonterminal_index, chosen_production, 1, requested_length) - - - + # Like Fprim, Gprim takes a nonterminal index, a production index for the nonterminal, + # and an index into the production rule (and of course, a requested length). + # This lets us walk through the production rule symbol by symbol. + + # As in Fprim, there is a case analysis based on if the index represents the last + # symbol in the rule, and if the index points to a terminal or to a nonterminal. + + # There are two types of special cases for Fprim(i, j, k, N), and they can both occur at + # the same time . We use these special cases to evaluate Fprim efficiently: + + # Special case 1: k is T_ij, and thus x_ijk is the last symbol in the production rule. + # Special case 2: The symbol x_ijk is a terminal symbol. + + # The exhaustive case analysis is as follows. + + # /----------------------------------------------------------\ + # |X_ijk is a terminal symbol | X_ijk is a nonterminal symbol| + # |---------------------------|------------------------------| + # X_ijk is the last | Case A | Case C | + # symbol in production | | Reduces to Gzero for the | + # rule (i,j) | The easiest base case. | production rule for the | + # | | nonterminal @ X_ijk | + #------------------------|---------------------------|------------------------------| + # X_ijk is not the last | Case B | Case D | + # symbol in the | A terminal always has len | Like the convolution, but use| + # production rule (i,j) | 1 and no possibilities so | Fprim and weighted-choice on | + # | this reduces to | X_ijk to find out how many | + # | Gprim(i, j, k+1, N-1) | symbols we're having X_ijk | + # | | generate (then use Gzero on | + # | | X_ijk and Gprim on k+1 to end| + # | | of the rule | + # \----------------------------------------------------------/ + + + def Gprim(self, nonterminal_index, chosen_production, how_far_into_the_RHS, requested_length): +