diff --git a/bison_xml_file_ingest.py b/bison_xml_file_ingest.py index c3d887e22627c5e3b629e5392c87eac7fe4ca024..915ed3d2a8351ede8c8809818db4c93a63e0f723 100644 --- a/bison_xml_file_ingest.py +++ b/bison_xml_file_ingest.py @@ -5,6 +5,7 @@ from enum import * import networkx as nx + import xml import xml.etree.ElementTree as ET tree = ET.parse('fun_with_bison/egg.xml') @@ -183,11 +184,11 @@ SRAction = Enum("Shift/Reduce Table Action", "SHIFT REDUCE ACCEPT") GOTOAction = Enum("Goto Table Action", "GOTO") -actual_rule_list = [] +rules = [] for monotonic_idx in range(len(rule_monotone_to_original)): # print(list_of_language_rules[monotonic_idx]) - actual_rule_list.append(list_of_language_rules[monotonic_idx]) + rules.append(list_of_language_rules[monotonic_idx]) #print(actual_rule_list) @@ -247,4 +248,323 @@ for nonterminal in [e for e in nonterminals_lookup]: print("\n")#for statenumber,idx in enumerate(shift_reduce_table): # print(statenumber, ": ", idx, "\n") +print(rules) + + +StackOp = Enum("Stack Operation", "PUSH POP") + + +SSD = Enum("Stack State", "DONTCARE BOTTOMOFSTACK") + + +sr_graph = nx.MultiDiGraph() + +# First we add a node for each of the enumerated states + +for statenumber, rule in enumerate(shift_reduce_table): + sr_graph.add_node(statenumber) + +# Then we add some stack-push transitions based on the shifts *alone* +for node in sr_graph.nodes(): + sr_graph.nodes[node]["label"] = (node, []) + +sr_graph.nodes[0]["label"] = (0, [SSD.BOTTOMOFSTACK]) + + +for initial_statenumber, rule in enumerate(shift_reduce_table): + for valid_transition in rule: + # Here, valid_transition is an index of the dictionary (so the index will be the character that causes the transition + # and so we will label the edge with the index, and the destination and shift/reduce type is found by accessing the + # dictionary itself) + destination_statenumber = rule[valid_transition][1] + if (destination_statenumber != None): + if (rule[valid_transition][0] == SRAction.SHIFT): + sr_graph.add_edge(initial_statenumber, destination_statenumber, label=(StackOp.PUSH, valid_transition)) + + +nx.drawing.nx_pydot.write_dot(sr_graph, "foo.dot") + + +# Then we add some stack-push transitions THAT DO NOT EXIST IN THE ACTUAL LR AUTOMATON +# THIS IS PURELY SO WE CAN DETERMINE THE INVARIANT (OF STACK STATE) THAT HOLDS FOR EACH ENUMERATED STATE + +for initial_statenumber, rule in enumerate(goto_table): + for valid_transition in rule: + # Here, valid_transition is an index of the dictionary (so the index will be the character that causes the transition + # and so we will label the edge with the index, and the destination and shift/reduce type is found by accessing the + # dictionary itself) + destination_statenumber = rule[valid_transition][1] + if (destination_statenumber != None): + if (rule[valid_transition][0] == GOTOAction.GOTO): + print("INITIAL STATE IS", initial_statenumber, "DESTINATION IS", destination_statenumber) + sr_graph.add_edge(initial_statenumber, destination_statenumber, label=(StackOp.PUSH, valid_transition)) + + +nx.drawing.nx_pydot.write_dot(sr_graph, "bar.dot") + +#sr_graph.add_edge(1,42,label="X") + + +# we implement a sort of forward-backward algorithm to determine the stack states for each of the LR-parser's stack numbers + + + +# The stack state for each node is represented as a list. +# An empty stack is only matched by a single-element list with the pseudosymbol "BOTTOMOFSTACK": +# [SSD.BOTTOMOFSTACK] + +# A stack with only a single item is matched by: +# [nonterminals.TERM, SSD.BOTTOMOFSTACK] + +# After we push an EXPRESSION on that stack, it looks like + +# [nonterminals.EXPRESSION, nonterminals.TERM, SSD.BOTTOMOFSTACK] +# ^ top of stack + + +# A stack with a single item on the top is matched by: +# [nonterminals.TERM, SSD.DONTCARE] + +# After we push an EXPRESSION on that stack, it looks like + +# [nonterminals.EXPRESSION, nonterminals.TERM, SSD.DONTCARE] + +# This allows to distinguish between a set of items on the top of a stack where there may be +# items below, and a set of items *that are the only items on the stack*. + + +# When a node has an extant stack state description and we wish to update it with information, we +# follow these rules: + +# current_desc = [X_0, X_1, ..., X_n] +# new_desc = [Y_0, Y_1, ..., Y_n] + +# We examine pairs (X_i, Y_i), starting at i=0, the top of the stack. If both of them +# are real symbols and are identical, the updated stack state description +# has that symbol in that index. + +# If both of them are real symbols (nonterminal or terminal) and differ, the updated +# stack state description has a DONTCARE there and that's it, there's no more entries +# in the stack state description. + +# If one of them is a real symbol and the other one is a pseudosymbol (BOTTOMOFSTACK or DONTCARE), +# this reduces to a DONTCARE. + +# If both of them are a DONTCARE, then, well, DONTCARE. +# If there's an BOTTOMOFSTACK and a DONTCARE, then it's a DONTCARE. +# If they're both an BOTTOMOFSTACK then it's an BOTTOMOFSTACK. + +# Initially, we assign a special dummy value to each node's stack state description, representing +# that we don't know anything about it, so nothing can be derived from it. + +# Only one node is initially tagged with something else than the dummy value -- the zero node, +# which is tagged with [BOTTOMOFSTACK] + +# We then repeatedly iterate over edges, letting information on stack states flow through the +# edges (generated by SHIFT entries in the SHIFT/REDUCE table *and* by GOTO entries in the +# GOTO table). We stop when a new round yields no changes for every node. + + + +# the above case analysis is implemented here +def unify(old_stack_state, new_stack_state): + print("UNIFY CALLED!", "old_stack_state is", old_stack_state, "new state is", new_stack_state) + + + if (old_stack_state == []): + return new_stack_state + updated_state = [] + + for old_elem, new_elem in zip(old_stack_state, new_stack_state): + if (isinstance(old_elem, SSD) or isinstance(new_elem, SSD)): + if (old_elem == SSD.BOTTOMOFSTACK and new_elem == SSD.BOTTOMOFSTACK): + updated_state.append(SSD.BOTTOMOFSTACK) + break + updated_state.append(SSD.DONTCARE) + break + if (old_elem != new_elem): + updated_state.append(SSD.DONTCARE) + break + if (old_elem == new_elem): + updated_state.append(old_elem) + print("UNIFY CALLED!", "updated state is ", updated_state) + + return updated_state + +print("unify ", unify([3,5],[3,4])) + + +#for index,x in zip(range(100),sr_graph.edges()): +# edge_op = sr_graph[x[0]][x[1]][0]["label"] +# stack_op_from = sr_graph.nodes[x[0]] +# stack_op_to = sr_graph.nodes[x[1]] +# if (stack_op_from["label"][1] != [] and stack_op_to["label"][1] == []): +# print("egg", stack_op_from["label"][1], "XX", edge_op[1]) +# stack_op_to["label"] = (stack_op_to["label"][0], [edge_op[1]] +stack_op_from["label"][1] ) + + +changed = 1 +iterations = 0 +while (changed == 1): + changed = 0 + iterations +=1 + for index,x in zip(range(100),sr_graph.edges()): + edge_op = sr_graph[x[0]][x[1]][0]["label"] + stack_op_from = sr_graph.nodes[x[0]] + stack_op_to = sr_graph.nodes[x[1]] + + stack_state_from_shiftop = [edge_op[1]] + stack_op_from["label"][1] + + current_stack_state = stack_op_to["label"][1] + + new_stack_state = unify(current_stack_state, stack_state_from_shiftop) + + if (new_stack_state != current_stack_state): + changed = 1 + + stack_op_to["label"] = (stack_op_to["label"][0], new_stack_state) + +print("it took us", iterations, "iterations to hit the fixed point") +#print(list(nx.dfs_edges(sr_graph))) + +for x in sr_graph.nodes(): + print(sr_graph.nodes[x]) + +nx.drawing.nx_pydot.write_dot(sr_graph, "processed_stack_state_graph.dot") + + + +# Check for anomalous weird stuff with multiple transitions from one node to another node +for x in sr_graph.edges(): + assert(len(sr_graph[x[0]][x[1]]) == 1) + +# We then cross-check the generated stack state descriptions against the original shift/goto +# entries -- given the tail of each edge, the edge's head must be consistent with the label on +# the edge. + +for x in sr_graph.edges(): + edge_op = sr_graph[x[0]][x[1]][0]["label"] + stack_op_from = sr_graph.nodes[x[0]] + stack_op_to = sr_graph.nodes[x[1]] + + assert(edge_op[1] == stack_op_to["label"][1][0]) + +# We also check that the generated stack state descriptions are consistent with the reduce +# rules. Every node is associated with a set of reduce rules that can be taken from it, and +# the right-hand-side of all the reduce rules for that node must be explicitly demarcated in +# that node's stack state description. + +# Here we extract the reduce entries from the shift-reduce table: +reduce_rule_locations = [] + +for statenumber, rule in enumerate(shift_reduce_table): + for valid_transition in rule: + # Here, valid_transition is an index of the dictionary (so the index will be the character that causes the transition + # and so we will label the edge with the index, and the destination and shift/reduce type is found by accessing the + # dictionary itself) + if (rule[valid_transition][0] == SRAction.REDUCE): + reduce_rule_number = rule[valid_transition][1] + reduce_rule_locations.append((statenumber, reduce_rule_number)) + +# Now we check that if reduce rule n can be invoked from stack state k, that the right-hand-side of reduce rule n +# is explicitly present in the stack state description k. + +for state_number, reduce_rule_number in reduce_rule_locations: + fromgraph = sr_graph.nodes[state_number]["label"] + assert(fromgraph[0] == state_number) + + stack_state = fromgraph[1] + print(stack_state) + rule_RHS = rules[reduce_rule_number - 1][1][::-1] + + for idx, x in enumerate(rule_RHS): + print("rule number", idx, "with rhs equal to", x) + print("stack_state[",idx,"]=",stack_state[idx]) + assert(stack_state[idx] == x) + + +# Finally, we check for uniqueness of stack state descriptions. No two may be identical. It +# is ok for two to overlap, [A, B, C] and [A, B] are allowed to co-exist, they are dealt with +# the "pairwise priority rule" mechanism in the parser gateware. + +def find_dupes(in_list): + working_list = in_list.copy() + seen_items = [] + duplicate_items = [] + while working_list != []: + pulled_item = working_list.pop() + if pulled_item in working_list: + return pulled_item + return None + + + +all_stack_states = [] + + + +for n in sr_graph.nodes: + all_stack_states.append(sr_graph.nodes[n]["label"][1]) +#all_stack_states.append(all_stack_states[1]) + + +dup = find_dupes(all_stack_states) +if dup != None: + print("DUPLICATE FOUND! ERROR!", dup) + exit (1) + + + +# So this takes care of the "stack states". +# We still need to convert the (shift/reduce table, goto table, list of reduce rules) +# into: +# +# 1) Valid item ruleset (list of all terminal symbols acceptable for each state) +# 2) Shift ruleset (list of all terminal symbols for each state that we are to shift) +# 3) reduce ruleset (list of all terminal symbols for each state that are to provoke a reduce +# and the corresponding rule number to invoke) +# 4) The list of reduce rules (well, specifically, how many items to pop and what is the new +# symbol to push) +# 5) An encoding from symbolic terminals/nonterminals/pseudosymbols (like "ENDOFSTACK", "OPENPAREN", "EXPRESSION") +# into hexadecimal constants +# Fortunately, this is the easy part :) + + +valid_item_ruleset = [] +# array (indexed by state number) of arrays of valid items (where the valid items for that state number live) + +force_shift_ruleset = [] +# array (indexed by state number) of arrays of shift items(where the items that provoke a shift for that state number live) + +reduce_ruleset = [] +# array (indexed by state number) of arrays of tuples (each tuple is of the form (item, reduce rule number)) + +reduce_rule_execute_ruleset = [] +# array (indexed by reduce rule number) of tuples (each tuple is of the form (number of items to pop, item to push)) + + +# here we populate the valid item, force shift, and reduce rulesets +for statenumber, rule in enumerate(shift_reduce_table): + this_state_s_valid_items = [] + this_state_s_force_shift_items = [] + this_state_s_reduce_rule_items = [] + + for item_accepted in rule: + this_state_s_valid_items.append(item_accepted) + if (rule[item_accepted][0] == SRAction.SHIFT): + this_state_s_force_shift_items.append(item_accepted) + if (rule[item_accepted][0] == SRAction.REDUCE): + this_state_s_reduce_rule_items.append((item_accepted,rule[item_accepted][1])) + + valid_item_ruleset.append(this_state_s_valid_items) + force_shift_ruleset.append(this_state_s_force_shift_items) + reduce_ruleset.append(this_state_s_reduce_rule_items) + +print("XXXXXXX", len(valid_item_ruleset)) + + +for rule in rules: + reduce_rule_execute_ruleset.append((rule[0], len(rule[1]))) + +print(reduce_rule_execute_ruleset)