From 1b2eceff3f319c2068eb58602d6747f764359d9f Mon Sep 17 00:00:00 2001
From: pompolic <pompolic@special-circumstanc.es>
Date: Mon, 30 May 2022 20:08:06 +0200
Subject: [PATCH] (WIP) AST printing commands, TopLevelParse support for
 selecting what to print

- ASTManager helper class to hold the top node of the ast to be printed
- HDoParseRetBreakpoint
- ast printing commands
- Update BreakpointManager tests with HDoParseRetBreakpoint tests
- Add support in TopLevelParse to select a stack item for printing the AST node of
---
 gdb-port/ast.py                           | 16 +++++++
 gdb-port/breakpoint-manager.py            | 10 +++++
 gdb-port/commands.py                      | 41 +++++++++++++++++
 gdb-port/hammer-breakpoints.py            | 25 +++++++++++
 gdb-port/tests/unit/breakpoint-manager.py |  4 ++
 gdb-port/top-level-parse.py               | 55 ++++++++++++++++++++++-
 6 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/gdb-port/ast.py b/gdb-port/ast.py
index a4e0c81..1d31f55 100644
--- a/gdb-port/ast.py
+++ b/gdb-port/ast.py
@@ -81,6 +81,7 @@ class HParsedToken:
 		if isinstance(address, str):
 			print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address)
 		self.address = address
+		# Unused for now
 		self.parent = parent
 		self.children = children
 		#self.token_type = token_type or self.read_token_type()
@@ -160,3 +161,18 @@ class HCountedArray:
 	def __str__(self):
 		elements_str = ", ".join([str(elem) for elem in self.elements_as_list()])
 		return "[ {0} ]".format(elements_str)
+
+# Class to hold subtrees of the AST
+# HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise
+# Its other responsibility is formatting the output when the AST printing command is executed
+class ASTManager:
+	def __init__(self):
+		self.top_node = None
+
+	def set_top_node(self, address):
+		# Address has to be an integer or gdb.Value or this will break
+		# TODO: HParseResult
+		self.top_node = HParsedToken(address)
+
+	def print_ast(self):
+		print(self.top_node)
diff --git a/gdb-port/breakpoint-manager.py b/gdb-port/breakpoint-manager.py
index 99b348d..f567158 100644
--- a/gdb-port/breakpoint-manager.py
+++ b/gdb-port/breakpoint-manager.py
@@ -59,6 +59,16 @@ class BreakpointManager:
 		print("::: Setting breakpoint in h_packrat_parse")
 		h_packrat_parse_ret = HPackratParseRetBreakpoint("*" + hex(hpp_retq))
 		self.hammer_retq_breakpoints.append(h_packrat_parse_ret)
+		hdp_retq = self.locate_retq("h_do_parse")
+		print("::: Setting breakpoint in h_do_parse")
+		# A potential problem in the future
+		# h_do_parse() has multiple return statements, but compiled down to
+		# a function with a single return instruction on the test machine
+		# If the compiler decides to use multiple RETs, having only one
+		# breakpoint set here will break the AST printing code.
+		# Return values of h_do_parse might get missed.
+		h_do_parse_ret = HDoParseRetBreakpoint("*" + hex(hdp_retq))
+		self.hammer_retq_breakpoints.append(h_do_parse_ret)
 		return self.hammer_retq_breakpoints
 
 	def del_hammer_retq_breakpoints(self):
diff --git a/gdb-port/commands.py b/gdb-port/commands.py
index ccb937c..1cc386f 100644
--- a/gdb-port/commands.py
+++ b/gdb-port/commands.py
@@ -163,3 +163,44 @@ class HammerParserDumpMemoryStats(gdb.Command):
 		print(stats)
 
 HammerParserDumpMemoryStats()
+
+# When the hammer-parse-step-after-apply command is executed, top_level_parse
+# marks the stack location whose result is of interest of the user.
+# The stack grows during parsing, and eventually gets popped back to the marked location.
+# At the time the associated h_do_parse() returns, the partial AST, constructed from the bottom up, will be ready
+# E.g. hammer-parse-step-to-parse-result 5 will stop after the 6th parser from the top of the parser backtrace has been applied
+# hammer-parse-step-after-apply -5 will stop after, at the time of command execution, 5th parser from the bottom of the backtrace has been applied. (note how this is 1-indexed)
+# Passing parser here is not strictly necessary, but probably helps debug off-by-one errors
+class HammerParseStepToParseResult(gdb.Command):
+	def __init__(self):
+		super(HammerParseStepToParseResult, self).__init__("hammer-parse-step-to-parse-result", gdb.COMMAND_OBSCURE)
+		print(":: hammer-parse-step-to-parse-result")
+
+	def invoke(self, arg, from_tty):
+		args = gdb.string_to_argv(arg)
+		index = -1
+		if len(args) < 1:
+			print("No argument given. Execution will be stopped after the current parser being applied returns its result.")
+
+		try:
+			index = int(args[0])
+		except ValueError:
+			print("Argument needs to be an integer. Execution will be stopped after the current parser being applied returns its result.")
+
+		top_level_parse.setup_ast_stack_index(index)
+
+HammerParseStepToParseResult()
+
+
+# When invoked without argument, it'll attempt to print the AST held by ast_manager. This may or may not be outdated.
+# When invoked with an integer argument, it'll remember the selection, and print the AST when the HParseResult is returned by h_do_parse.
+class HammerParserPrintAST(gdb.Command):
+	def __init__(self):
+		super(HammerParserPrintAST, self).__init__("hammer-parser-print-ast", gdb.COMMAND_OBSCURE)
+		print(":: hammer-parser-print-ast")
+
+	def invoke(self, arg, from_tty):
+		#TODO: if we're stopped at a HDoParseBreakpoint, the parse result is not available yet. Maybe printing can be scheduled?
+		ast_manager.print_ast()
+
+HammerParserPrintAST()
diff --git a/gdb-port/hammer-breakpoints.py b/gdb-port/hammer-breakpoints.py
index e2990bb..ef9edcb 100644
--- a/gdb-port/hammer-breakpoints.py
+++ b/gdb-port/hammer-breakpoints.py
@@ -61,6 +61,31 @@ class HDoParseBreakpoint(gdb.Breakpoint):
 		#return False
 		return retval
 
+class HDoParseRetBreakpoint(gdb.Breakpoint):
+	def stop(self):
+		# TODO: Check for ast command
+		frame = gdb.selected_frame()
+		block = frame.block()
+		# Updated GDB, now this works
+		# TODO: rewrite other breakpoints with [] syntax, or frame.read_var()
+		# For locals, block['foo'].value(frame) is equivalent to frame.read_var('foo')
+		tmp_res = block['tmp_res'].value(frame)
+		parser = frame.read_var('parser') # Arguments need to be accessed via the frame and not the block
+
+		# True when collecting AST information is enabled
+		#build_ast = gdb.convenience_variable("hammer_gather_ast")
+		#if build_ast:
+		#TODO: fill in ASTManager here
+			#ast_manager.set_top_node(tmp_res)
+
+		# Checking want_result_of() here avoids instantiating a HParseResult every time h_do_parse returns
+		stop = top_level_parse.want_result_of(parser)
+		if stop:
+				ast_manager.set_top_node(tmp_res)
+				ast_manager.print_ast()
+				return True
+		return False
+
 class PerformLowLevelParseBreakpoint(gdb.Breakpoint):
 	def stop(self):
 		frame = gdb.selected_frame()
diff --git a/gdb-port/tests/unit/breakpoint-manager.py b/gdb-port/tests/unit/breakpoint-manager.py
index 4afd5c7..74941b0 100644
--- a/gdb-port/tests/unit/breakpoint-manager.py
+++ b/gdb-port/tests/unit/breakpoint-manager.py
@@ -49,6 +49,8 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase):
 		self.pllpbp_mock_object = self.pllpbp_patcher.start()
 		self.hamrbp_patcher = unittest.mock.patch('__main__.HArenaMallocRawBreakpoint')
 		self.hamrbp_mock_object = self.hamrbp_patcher.start()
+		self.hdprbp_patcher = unittest.mock.patch('__main__.HDoParseRetBreakpoint')
+		self.hdprbp_mock_object = self.hdprbp_patcher.start()
 
 	def tearDown(self):
 		self.hpprbp_patcher.stop()
@@ -60,6 +62,7 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase):
 		self.hppbp_patcher.stop()
 		self.pllpbp_patcher.stop()
 		self.hamrbp_patcher.stop()
+		self.hdprbp_patcher.stop()
 
 # TODO: mock breakpoints, assert on arguments to constructor
 	def test_set_h_rule_breakpoints(self):
@@ -76,6 +79,7 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase):
 		self.bpm.set_hammer_retq_breakpoints()
 		self.assertTrue(self.plprbp_mock_object.called)
 		self.assertTrue(self.hpprbp_mock_object.called)
+		self.assertTrue(self.hdprbp_mock_object.called)
 		#bps_valid = [ bp.is_valid() for bp in self.bpm.hammer_retq_breakpoints]
 		#self.assertEqual(bps_valid, 2 * [True])
 		#Cleanup
diff --git a/gdb-port/top-level-parse.py b/gdb-port/top-level-parse.py
index d37e8ba..9404539 100644
--- a/gdb-port/top-level-parse.py
+++ b/gdb-port/top-level-parse.py
@@ -11,13 +11,17 @@ class TopLevelParse:
 		self.vt_types = None
 		self.parser_decombinator = None
 
+		# Counts stack pushes/pops to determine if stopping is needed for hammer-parse-after-apply
+		self.ast_stack_index = None
+		# Holds a reference to the relevant parser stack
+		self.ast_selected_stack = None
+
 	def init_parser(self):
 		self.vt_types = VTTypes()
 		self.parser_decombinator = ParserDecombinator(self.vt_types)
 
 	# Called from h_packrat_parse()'s handler, where the parse state and arena get initialized
 	def enter_h_packrat_parse(self, parser):
-		# TODO: add a parser stack or something?
 		parser_stack = ParserStack(None, None)
 		self.parser_stacks.append(parser_stack)
 		return 0
@@ -34,6 +38,14 @@ class TopLevelParse:
 		if parser_stack.parse_state is None and parser_stack.parse_state != parse_state:
 			self.first_h_do_parse_after_packrat_parse(parse_state, arena)
 
+	def return_from_h_do_parse(self, parse_state, parser):
+		parser_stack = self.peek_parserstack()
+		parser_obj = self.parser_objs[parser] # We don't try to make a new Parser() object here, because it has to have been done in the corresponding enter_h_do_parse() call
+		# The rationale for handling the "current" parser separately from stack:
+		# Parsers are only pushed on the stack in perform_lowlevel_parse, which doesn't get called when h_do_parse() gets the result from cache.
+		# If other backends are supported, this might change to pushing/popping the stack in h_do_parse()
+		self.h_do_parse_parser = parser_obj # Restore the "current" parser, otherwise it'll show the parser h_do_parse() was last called with on the GUI and backtrace
+
 	# Called from h_do_parse()'s handler, at which point we know the addresses of the state and arena
 	def first_h_do_parse_after_packrat_parse(self, parse_state, arena):
 		parser_stack = self.peek_parserstack()
@@ -170,6 +182,47 @@ class TopLevelParse:
 	def dump_memory_stats(self):
 		return [(p.name, hex(p.address), p.bytes_used) for p in self.parser_objs.values()]
 
+	# TODO: better naming. this is for the AST view
+	# This will only work as advertised if called from h_do_parse()
+	def want_result_of(self, parser=None):
+		# Check that:
+		# - The relevant parser stack is active (h_packrat_parse hasn't been called again, or has returned since)
+		# - A particular parser application on the stack has been selected
+		# - The selected parser has just been popped off the stack:
+		#	- stack is one item smaller than the "index" pointing to the spot the selected application is at
+		#	- (Potential modification in the future: the selected application is on the top of the stack and the same parser is being applied)
+		if self.peek_parserstack() == self.ast_selected_stack and self.ast_stack_index and len(self.ast_selected_stack.p_stack) == self.ast_stack_index:
+			return True
+		else:
+			return False
+
+	def setup_ast_stack_index(self, stop_index):
+		self.ast_selected_stack = self.peek_parserstack()
+		# Indexing from the bottom of the stack
+		if stop_index < 0:
+			# Convert negative index to positive one
+			self.ast_stack_index = len(self.ast_selected_stack.p_stack)-abs(stop_index)
+		# Indexing from the top of the stack
+		else if stop_index > 0:
+			# The "current" parser is not on the stack while we're in h_do_parse, as it gets pushed in perform_lowlevel_parse. Consider:
+			# [0x5555555779f0] digit [current] <--- not on stack
+
+			#[0x55555557f1b0] (Unnamed sequence) <--- self.ast_selected_stack.p_stack[5]
+			#[0x55555557f200] xrefs
+			#[0x5555555800f0] (Unnamed sequence)
+			#[0x555555580140] xr_td
+			#[0x555555585670] (Unnamed choice)
+			#[0x55555559bb90] (Unnamed ignoreseq) <--- self.ast_selected_stack.p_stack[0]
+
+			self.ast_stack_index = stop_index-1
+		# 0 has been passed in: Select current parser
+		else:
+			self.ast_stack_index = len(self.ast_selected_stack.p_stack)
+
+	def clear_ast_stack_index(self):
+		self.ast_stack_counter = None
+		self.ast_selected_stack = None
+
 	# TODO: get_avg_mem_use_all_arenas, get_total_mem_use
 
 top_level_parse = TopLevelParse()
-- 
GitLab