diff --git a/gdb-port/README.md b/gdb-port/README.md index f3e7b71047d683a3da3aae825cd233054cf4ca8f..d2b846509e4d8ca04b911b11bf6cd55e407c16e8 100644 --- a/gdb-port/README.md +++ b/gdb-port/README.md @@ -8,9 +8,11 @@ Requirements Invocation ``` -gdb -ex "source /path/to/utility-commands.py" -ex "hammer-parse-stop-at-pos 50" -ex "source /path/to/parser-type-instrumentation-gdb.py" -ex "source /path/to/parser-name-instrumentation-gdb.py" --args /path/to/pdf /path/to/input.pdf +gdb -ex "source /home/corax/src/gitlab-repos/profiling/perf-instrumentation/gdb-port/utility-commands.py" -ex "source /home/corax/src/gitlab-repos/profiling/perf-instrumentation/gdb-port/commands.py" -ex "source /home/corax/src/gitlab-repos/profiling/perf-instrumentation/gdb-port/hammer-breakpoints.py" -ex "source /home/corax/src/gitlab-repos/profiling/perf-instrumentation/gdb-port/breakpoint-manager.py" -ex "source /home/corax/src/gitlab-repos/profiling/perf-instrumentation/gdb-port/top-level-parse.py" -ex "hammer-parse-stop-at-pos 50" -ex "source /path/to/parser-type-instrumentation-gdb.py" -ex "source /path/to/parser-name-instrumentation-gdb.py" --args /path/to/pdf /path/to/input.pdf ``` +Note that `-ex "hammer-parse-stop-at-pos 50"` is not strictly necessary, but by default the tool will print memory stats and exit. + To enable the GUI, in the gdb console: ``` @@ -118,6 +120,12 @@ hammer-parser-average-mem Prints the average number of bytes used separately for each HArena. +``` +hammer-parser-dump-memory-stats +``` + +Prints memory usage statistics for all parsers encountered up to that point. (If a HParser is not explicitly named in an H_RULE in one of the parser initializing functions, and has not been applied on the input yet, it will not appear in the statistics. For example: given the `H_RULE(foo, h_choice(h_uint8(), h_uint16(), NULL))`, `foo` will appear in the statistics if it's been declared in `init_parser()`, but the unnamed `h_uint8()` will only appear if it's been applied at least once.) + # Limitations This tool is currently built and tested against the pdf parser. It makes a few assumptions: diff --git a/gdb-port/parser-name-instrumentation-gdb.py b/gdb-port/parser-name-instrumentation-gdb.py index c142441e116a8f2a74569e4de8c9e1e99a2bea59..e082de68f197d06624a60d568c8cfb48eada6d29 100644 --- a/gdb-port/parser-name-instrumentation-gdb.py +++ b/gdb-port/parser-name-instrumentation-gdb.py @@ -144,78 +144,6 @@ class InitParserBreakpoint(gdb.Breakpoint): return False -# TODO: refactored to breakpoint-manager.py , remove -#class HRuleBreakpoint(gdb.Breakpoint): -# def stop(self): -# frame = gdb.selected_frame() -# block = frame.block() -# -# for p in block: -# top_level_parse.parser_objs[int(p.value(frame))] = Parser(p.name, int(p.value(frame))) - -class HArenaMallocRawBreakpoint(gdb.Breakpoint): - def stop(self): - frame = gdb.selected_frame() - block = frame.block() - - for val in block: - if val.name == 'size': - alloc_size = int(val.value(frame)) - - top_level_parse.enter_h_arena_malloc_raw(alloc_size) - - return False - -hammer_retq_breakpoints = [] - -#class BreakpointManager(): -# def __init__(self, h_rule_functions): -# self.hammer_retq_breakpoints = [] -# self.h_rule_breakpoints = [] -# -# self.h_do_parse = None -# self.h_packrat_parse = None -# self.perform_lowlevel_parse = None -# self.h_arena_malloc_raw = None -# -# self.parse_action = None -# self.parse_choice = None -# self.parse_sequence = None -# self.parse_difference = None -# self.parse_many = None -# self.parse_and = None -# self.parse_attr_bool = None -# self.parse_bind = None -# self.parse_bits = None -# self.parse_butnot = None -# self.parse_charset = None -# self.parse_ch = None -# self.parse_end = None -# self.parse_endianness = None -# self.parse_epsilon = None -# self.parse_ignore = None -# self.parse_ignoreseq = None -# self.parse_indirect = None -# self.parse_int_range = None -# self.parse_not = None -# self.parse_nothing = None -# self.parse_optional = None -# self.parse_permutation = None -# self.parse_skip = None -# self.parse_seek = None -# self.parse_tell = None -# self.parse_token = None -# self.parse_unimplemented = None -# self.parse_put = None -# self.parse_get = None -# self.parse_whitespace = None -# self.parse_xor = None -# -# def set_h_rule_breakpoints(self): -# for func in H_RULE_FUNCTIONS: -# func_retq = locate_retq(func[0], func[1]) -# self.h_rule_breakpoints[func] = HRuleBreakpoint("*" + hex(func_retq)) - print(": Initializing BreakpointManager") breakpoint_manager = BreakpointManager(H_RULE_FUNCTIONS) @@ -226,169 +154,6 @@ class PDFMainBreakpoint(gdb.Breakpoint): return True -# GDB parameters -# TODO: hammer parameter prefix - -print(": Registering parameters and commands") - -class ExtendedParseStepInfo(gdb.Parameter): - """Controls whether to display parser stack and input preview on stepping the parse.""" - def __init__(self): - super(ExtendedParseStepInfo, self).__init__("hammer-extended-parse-step-info", gdb.COMMAND_OBSCURE, gdb.PARAM_BOOLEAN) - self.show_doc = "Show parser stack and input preview after hammer-parse-step:" - #self.set_doc = "Show parser stack and input preview after hammer-parse-step:" - self.value = True - print(":: hammer-extended-parse-step-info") - -ExtendedParseStepInfo() - -# GDB commands - -# TODO: GDB help strings -# TODO: factor commands out into their own file - -class HammerParserBacktrace(gdb.Command): - def __init__(self): - super(HammerParserBacktrace, self).__init__ ("hammer-parser-backtrace", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-backtrace") - - def invoke(self, arg, from_tty): - parserstack = top_level_parse.peek_parserstack().p_stack - args = gdb.string_to_argv(arg) - if len(args) < 1: - maxsize = len(parserstack) - else: - try: - maxsize = int(args[0]) - if maxsize < 1: - raise ValueError - except ValueError: - maxsize = len(parserstack) - print("Argument must be a positive integer") - - print("[" + str(hex(top_level_parse.h_do_parse_parser.address)) + "] " + top_level_parse.h_do_parse_parser.name + " [current]") #TODO: GUI widget should reflect this - print(" ") - depth = min(len(parserstack), maxsize) - if depth > 0: # if stack not empty - # unsure what the idiomatic python is for handling negative indices starting with -1, - # but this addition is to avoid off-by-one errors - index = -(depth+1) - for p in parserstack[-1:index:-1]: - print("[" + str(hex(p.address)) + "] " + p.name) # TODO: errors in perform_lowlevel_parse, if p.name is None - if depth < len(parserstack): - print("[...]") - -HammerParserBacktrace() - -class HammerParserMemUse(gdb.Command): - def __init__(self): - super(HammerParserMemUse, self).__init__("hammer-parser-mem-use", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-mem-use") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - if len(args) < 1: - print("Usage: hammer-parser-mem-use <address>") - return - - parser_addr = args[0] - try: - parser_addr_int = int(parser_addr, 16) - parser_obj = top_level_parse.parser_by_address(parser_addr_int) - if parser_obj is not None: - print(parser_obj.bytes_used) - except ValueError: - print("Address needs to be a hexadecimal number") - -HammerParserMemUse() - -class HammerParserMemUseName(gdb.Command): - def __init__(self): - super(HammerParserMemUseName, self).__init__("hammer-parser-mem-use-name", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-mem-use-name") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - if len(args) < 1: - print("Usage: hammer-parser-mem-use-name <name>") - return - - parser_name = args[0] - parser_objs = top_level_parse.parsers_by_name(parser_name) - if parser_objs is not None: - for p in parser_objs: - print((p.name, hex(p.address), p.bytes_used)) - -HammerParserMemUseName() - -class HammerParserTopSingleArenaMem(gdb.Command): - def __init__(self): - super(HammerParserTopSingleArenaMem, self).__init__("hammer-parser-top-single-arena-mem", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-top-single-arena-mem") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - - p = top_level_parse.get_parser_top_per_arena_mem() - print((p.name, hex(p.address), p.bytes_used)) - -HammerParserTopSingleArenaMem() - -class HammerParserTopTotalArenaMem(gdb.Command): - def __init__(self): - super(HammerParserTopTotalArenaMem, self).__init__("hammer-parser-top-total-arena-mem", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-top-total-arena-mem") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - - p = top_level_parse.get_parser_top_total_arena_mem() - print((p.name, hex(p.address), p.bytes_used)) - total_mem_use = p.get_arenasum() - print("Total: " + str(total_mem_use) + " bytes") - -HammerParserTopTotalArenaMem() - -# TODO: average memory use, per arena and total - -class HammerParserPreviewInput(gdb.Command): - def __init__(self): - super(HammerParserPreviewInput, self).__init__("hammer-parser-preview-input", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-preview-input") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - - print(top_level_parse.input_chunk) - -HammerParserPreviewInput() - -class HammerParserAverageMem(gdb.Command): - def __init__(self): - super(HammerParserAverageMem, self).__init__("hammer-parser-average-mem", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-average-mem") - - def invoke(self, arg, from_tty): - args = gdb.string_to_argv(arg) - - mem = top_level_parse.get_avg_mem_use_per_arena() - print("Bytes used on average in each arena:") - print(mem) - -HammerParserAverageMem() - -class HammerParserCurrentEnv(gdb.Command): - def __init__(self): - super(HammerParserCurrentEnv, self).__init__("hammer-parser-current-env", gdb.COMMAND_OBSCURE) - print(":: hammer-parser-current-env") - - def invoke(self, arg, from_tty): - p = top_level_parse.h_do_parse_parser - p_env = top_level_parse.parser_decombinator.decompose_parser(p, top_level_parse) #TODO: parser -> env mapping function in top_level_parse - print(type(p_env).__name__ + " - " + str(p_env)) # TODO: consistency with GUI - -HammerParserCurrentEnv() - print(": Registering exit handler") # Clean up by-address breakpoints in hammer when inferior exits. # Caveat: Assumes there's a single inferior, the debugged parser, so no checking is done