From 1b2eceff3f319c2068eb58602d6747f764359d9f Mon Sep 17 00:00:00 2001 From: pompolic <pompolic@special-circumstanc.es> Date: Mon, 30 May 2022 20:08:06 +0200 Subject: [PATCH] (WIP) AST printing commands, TopLevelParse support for selecting what to print - ASTManager helper class to hold the top node of the ast to be printed - HDoParseRetBreakpoint - ast printing commands - Update BreakpointManager tests with HDoParseRetBreakpoint tests - Add support in TopLevelParse to select a stack item for printing the AST node of --- gdb-port/ast.py | 16 +++++++ gdb-port/breakpoint-manager.py | 10 +++++ gdb-port/commands.py | 41 +++++++++++++++++ gdb-port/hammer-breakpoints.py | 25 +++++++++++ gdb-port/tests/unit/breakpoint-manager.py | 4 ++ gdb-port/top-level-parse.py | 55 ++++++++++++++++++++++- 6 files changed, 150 insertions(+), 1 deletion(-) diff --git a/gdb-port/ast.py b/gdb-port/ast.py index a4e0c81..1d31f55 100644 --- a/gdb-port/ast.py +++ b/gdb-port/ast.py @@ -81,6 +81,7 @@ class HParsedToken: if isinstance(address, str): print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address) self.address = address + # Unused for now self.parent = parent self.children = children #self.token_type = token_type or self.read_token_type() @@ -160,3 +161,18 @@ class HCountedArray: def __str__(self): elements_str = ", ".join([str(elem) for elem in self.elements_as_list()]) return "[ {0} ]".format(elements_str) + +# Class to hold subtrees of the AST +# HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise +# Its other responsibility is formatting the output when the AST printing command is executed +class ASTManager: + def __init__(self): + self.top_node = None + + def set_top_node(self, address): + # Address has to be an integer or gdb.Value or this will break + # TODO: HParseResult + self.top_node = HParsedToken(address) + + def print_ast(self): + print(self.top_node) diff --git a/gdb-port/breakpoint-manager.py b/gdb-port/breakpoint-manager.py index 99b348d..f567158 100644 --- a/gdb-port/breakpoint-manager.py +++ b/gdb-port/breakpoint-manager.py @@ -59,6 +59,16 @@ class BreakpointManager: print("::: Setting breakpoint in h_packrat_parse") h_packrat_parse_ret = HPackratParseRetBreakpoint("*" + hex(hpp_retq)) self.hammer_retq_breakpoints.append(h_packrat_parse_ret) + hdp_retq = self.locate_retq("h_do_parse") + print("::: Setting breakpoint in h_do_parse") + # A potential problem in the future + # h_do_parse() has multiple return statements, but compiled down to + # a function with a single return instruction on the test machine + # If the compiler decides to use multiple RETs, having only one + # breakpoint set here will break the AST printing code. + # Return values of h_do_parse might get missed. + h_do_parse_ret = HDoParseRetBreakpoint("*" + hex(hdp_retq)) + self.hammer_retq_breakpoints.append(h_do_parse_ret) return self.hammer_retq_breakpoints def del_hammer_retq_breakpoints(self): diff --git a/gdb-port/commands.py b/gdb-port/commands.py index ccb937c..1cc386f 100644 --- a/gdb-port/commands.py +++ b/gdb-port/commands.py @@ -163,3 +163,44 @@ class HammerParserDumpMemoryStats(gdb.Command): print(stats) HammerParserDumpMemoryStats() + +# When the hammer-parse-step-after-apply command is executed, top_level_parse +# marks the stack location whose result is of interest of the user. +# The stack grows during parsing, and eventually gets popped back to the marked location. +# At the time the associated h_do_parse() returns, the partial AST, constructed from the bottom up, will be ready +# E.g. hammer-parse-step-to-parse-result 5 will stop after the 6th parser from the top of the parser backtrace has been applied +# hammer-parse-step-after-apply -5 will stop after, at the time of command execution, 5th parser from the bottom of the backtrace has been applied. (note how this is 1-indexed) +# Passing parser here is not strictly necessary, but probably helps debug off-by-one errors +class HammerParseStepToParseResult(gdb.Command): + def __init__(self): + super(HammerParseStepToParseResult, self).__init__("hammer-parse-step-to-parse-result", gdb.COMMAND_OBSCURE) + print(":: hammer-parse-step-to-parse-result") + + def invoke(self, arg, from_tty): + args = gdb.string_to_argv(arg) + index = -1 + if len(args) < 1: + print("No argument given. Execution will be stopped after the current parser being applied returns its result.") + + try: + index = int(args[0]) + except ValueError: + print("Argument needs to be an integer. Execution will be stopped after the current parser being applied returns its result.") + + top_level_parse.setup_ast_stack_index(index) + +HammerParseStepToParseResult() + + +# When invoked without argument, it'll attempt to print the AST held by ast_manager. This may or may not be outdated. +# When invoked with an integer argument, it'll remember the selection, and print the AST when the HParseResult is returned by h_do_parse. +class HammerParserPrintAST(gdb.Command): + def __init__(self): + super(HammerParserPrintAST, self).__init__("hammer-parser-print-ast", gdb.COMMAND_OBSCURE) + print(":: hammer-parser-print-ast") + + def invoke(self, arg, from_tty): + #TODO: if we're stopped at a HDoParseBreakpoint, the parse result is not available yet. Maybe printing can be scheduled? + ast_manager.print_ast() + +HammerParserPrintAST() diff --git a/gdb-port/hammer-breakpoints.py b/gdb-port/hammer-breakpoints.py index e2990bb..ef9edcb 100644 --- a/gdb-port/hammer-breakpoints.py +++ b/gdb-port/hammer-breakpoints.py @@ -61,6 +61,31 @@ class HDoParseBreakpoint(gdb.Breakpoint): #return False return retval +class HDoParseRetBreakpoint(gdb.Breakpoint): + def stop(self): + # TODO: Check for ast command + frame = gdb.selected_frame() + block = frame.block() + # Updated GDB, now this works + # TODO: rewrite other breakpoints with [] syntax, or frame.read_var() + # For locals, block['foo'].value(frame) is equivalent to frame.read_var('foo') + tmp_res = block['tmp_res'].value(frame) + parser = frame.read_var('parser') # Arguments need to be accessed via the frame and not the block + + # True when collecting AST information is enabled + #build_ast = gdb.convenience_variable("hammer_gather_ast") + #if build_ast: + #TODO: fill in ASTManager here + #ast_manager.set_top_node(tmp_res) + + # Checking want_result_of() here avoids instantiating a HParseResult every time h_do_parse returns + stop = top_level_parse.want_result_of(parser) + if stop: + ast_manager.set_top_node(tmp_res) + ast_manager.print_ast() + return True + return False + class PerformLowLevelParseBreakpoint(gdb.Breakpoint): def stop(self): frame = gdb.selected_frame() diff --git a/gdb-port/tests/unit/breakpoint-manager.py b/gdb-port/tests/unit/breakpoint-manager.py index 4afd5c7..74941b0 100644 --- a/gdb-port/tests/unit/breakpoint-manager.py +++ b/gdb-port/tests/unit/breakpoint-manager.py @@ -49,6 +49,8 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase): self.pllpbp_mock_object = self.pllpbp_patcher.start() self.hamrbp_patcher = unittest.mock.patch('__main__.HArenaMallocRawBreakpoint') self.hamrbp_mock_object = self.hamrbp_patcher.start() + self.hdprbp_patcher = unittest.mock.patch('__main__.HDoParseRetBreakpoint') + self.hdprbp_mock_object = self.hdprbp_patcher.start() def tearDown(self): self.hpprbp_patcher.stop() @@ -60,6 +62,7 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase): self.hppbp_patcher.stop() self.pllpbp_patcher.stop() self.hamrbp_patcher.stop() + self.hdprbp_patcher.stop() # TODO: mock breakpoints, assert on arguments to constructor def test_set_h_rule_breakpoints(self): @@ -76,6 +79,7 @@ class BreakpointManagerSettingBreakpoints(unittest.TestCase): self.bpm.set_hammer_retq_breakpoints() self.assertTrue(self.plprbp_mock_object.called) self.assertTrue(self.hpprbp_mock_object.called) + self.assertTrue(self.hdprbp_mock_object.called) #bps_valid = [ bp.is_valid() for bp in self.bpm.hammer_retq_breakpoints] #self.assertEqual(bps_valid, 2 * [True]) #Cleanup diff --git a/gdb-port/top-level-parse.py b/gdb-port/top-level-parse.py index d37e8ba..9404539 100644 --- a/gdb-port/top-level-parse.py +++ b/gdb-port/top-level-parse.py @@ -11,13 +11,17 @@ class TopLevelParse: self.vt_types = None self.parser_decombinator = None + # Counts stack pushes/pops to determine if stopping is needed for hammer-parse-after-apply + self.ast_stack_index = None + # Holds a reference to the relevant parser stack + self.ast_selected_stack = None + def init_parser(self): self.vt_types = VTTypes() self.parser_decombinator = ParserDecombinator(self.vt_types) # Called from h_packrat_parse()'s handler, where the parse state and arena get initialized def enter_h_packrat_parse(self, parser): - # TODO: add a parser stack or something? parser_stack = ParserStack(None, None) self.parser_stacks.append(parser_stack) return 0 @@ -34,6 +38,14 @@ class TopLevelParse: if parser_stack.parse_state is None and parser_stack.parse_state != parse_state: self.first_h_do_parse_after_packrat_parse(parse_state, arena) + def return_from_h_do_parse(self, parse_state, parser): + parser_stack = self.peek_parserstack() + parser_obj = self.parser_objs[parser] # We don't try to make a new Parser() object here, because it has to have been done in the corresponding enter_h_do_parse() call + # The rationale for handling the "current" parser separately from stack: + # Parsers are only pushed on the stack in perform_lowlevel_parse, which doesn't get called when h_do_parse() gets the result from cache. + # If other backends are supported, this might change to pushing/popping the stack in h_do_parse() + self.h_do_parse_parser = parser_obj # Restore the "current" parser, otherwise it'll show the parser h_do_parse() was last called with on the GUI and backtrace + # Called from h_do_parse()'s handler, at which point we know the addresses of the state and arena def first_h_do_parse_after_packrat_parse(self, parse_state, arena): parser_stack = self.peek_parserstack() @@ -170,6 +182,47 @@ class TopLevelParse: def dump_memory_stats(self): return [(p.name, hex(p.address), p.bytes_used) for p in self.parser_objs.values()] + # TODO: better naming. this is for the AST view + # This will only work as advertised if called from h_do_parse() + def want_result_of(self, parser=None): + # Check that: + # - The relevant parser stack is active (h_packrat_parse hasn't been called again, or has returned since) + # - A particular parser application on the stack has been selected + # - The selected parser has just been popped off the stack: + # - stack is one item smaller than the "index" pointing to the spot the selected application is at + # - (Potential modification in the future: the selected application is on the top of the stack and the same parser is being applied) + if self.peek_parserstack() == self.ast_selected_stack and self.ast_stack_index and len(self.ast_selected_stack.p_stack) == self.ast_stack_index: + return True + else: + return False + + def setup_ast_stack_index(self, stop_index): + self.ast_selected_stack = self.peek_parserstack() + # Indexing from the bottom of the stack + if stop_index < 0: + # Convert negative index to positive one + self.ast_stack_index = len(self.ast_selected_stack.p_stack)-abs(stop_index) + # Indexing from the top of the stack + else if stop_index > 0: + # The "current" parser is not on the stack while we're in h_do_parse, as it gets pushed in perform_lowlevel_parse. Consider: + # [0x5555555779f0] digit [current] <--- not on stack + + #[0x55555557f1b0] (Unnamed sequence) <--- self.ast_selected_stack.p_stack[5] + #[0x55555557f200] xrefs + #[0x5555555800f0] (Unnamed sequence) + #[0x555555580140] xr_td + #[0x555555585670] (Unnamed choice) + #[0x55555559bb90] (Unnamed ignoreseq) <--- self.ast_selected_stack.p_stack[0] + + self.ast_stack_index = stop_index-1 + # 0 has been passed in: Select current parser + else: + self.ast_stack_index = len(self.ast_selected_stack.p_stack) + + def clear_ast_stack_index(self): + self.ast_stack_counter = None + self.ast_selected_stack = None + # TODO: get_avg_mem_use_all_arenas, get_total_mem_use top_level_parse = TopLevelParse() -- GitLab