# These need to be constructed in HDoParseBreakpoint (or at least in a scope where 'parser' is visible) # TODO: these can learn of parsers before the parser name instrumentation does # TopLevelParse should be amenable to adding parsers to the dict through here parser_name_defaults = { 'action_vt': '(Unnamed action)', 'and_vt': '(Unnamed and)', 'attr_bool_vt': '(Unnamed attr_bool)', 'bind_vt': '(Unnamed bind)', 'bits_vt': '(Unnamed bits)', 'butnot_vt': '(Unnamed butnot)', 'ch_vt': '(Unnamed ch)', 'charset_vt': '(Unnamed charset)', 'choice_vt': '(Unnamed choice)', 'difference_vt': '(Unnamed difference)', 'end_vt': '(Unnamed end)', 'endianness_vt': '(Unnamed endianness)', 'ignore_vt': '(Unnamed ignore)', 'ignoreseq_vt': '(Unnamed ignoreseq)', 'indirect_vt': '(Unnamed indirect)', 'int_range_vt': '(Unnamed int_range)', 'length_value_vt': '(Unnamed length_value)', 'many_vt': '(Unnamed many)', 'not_vt': '(Unnamed not)', 'nothing_vt': '(Unnamed nothing)', 'optional_vt': '(Unnamed optional)', 'permutation_vt': '(Unnamed permutation)', 'seek_vt': '(Unnamed seek)', 'skip_vt': '(Unnamed skip)', 'tell_vt': '(Unnamed tell)', 'sequence_vt': '(Unnamed sequence)', 'token_vt': '(Unnamed token)', 'unimplemented_vt': '(Unnamed unimplemented)', 'get_vt': '(Unnamed get)', 'put_vt': '(Unnamed put)', 'whitespace_vt': '(Unnamed whitespace)', 'xor_vt': '(Unnamed xor)' } class VTTypes: def __init__(self): self.vt_symbols = {int(gdb.lookup_symbol(key)[0].value().address) : gdb.lookup_symbol(key)[0] for key in parser_name_defaults.keys()} # vt_p == vt_symbols[int(parser_vtable_p)].value().address # name = parser_name_defaults[vt_types.lookup_by_address(parser_vtable_p)].name # address is expected to be pointer to a parser that can be converted to int() # e.g. parser.address of a Parser object, integer def lookup_by_address(self, address): try: return self.vt_symbols[int(address)] except KeyError: return None vt_types = VTTypes() class HParserEnv: def __init__(self, parser, top_level_parse): print("HParserEnv constructed") # DEBUG # parser is expected to be a Parser object def name_from_vtable(self, parser): parser_addr = parser.address # TODO: do this without passing a string to gdb.parse_and_eval() # perhaps using gdb.Value would be the best vtable_p = gdb.parse_and_eval("((HParser*) " + str(parser_addr) + ")->vtable") try: name = parser_name_defaults[vt_types.lookup_by_address(vtable_p).name] # if lookup_by_address() returns None except AttributeError: name = "(Unknown parser type (vtable symbol not found in lookup)" except KeyError: name = "(Unknown parser type (vtable exists but has no default name associated)" return name # TODO: Unit test: # make SequenceEnv(parser, top_level_parse) # parser_array = [top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)] # compare pointers for equality # TODO: consistent naming for member_parser, member_parser_pointers, class AttrBoolEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser member_parser_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->p") self.member_parser = top_level_parse.add_or_get_parser(member_parser_p) self.predicate_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->pred") self.user_data_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->user_data") def __str__(self): return str(self.member_parser) class BitsEnv(HParserEnv): class SequenceEnv(HParserEnv): def __init__(self, parser, top_level_parse): super().__init__(parser, top_level_parse) # TODO: maybe move self.parser to base class. otherwise, is this needed? self.parser = parser self.member_parsers = [] #frame = gdb.selected_frame() h_sequence_p = gdb.parse_and_eval("(HSequence*) parser->env") num_parsers = gdb.parse_and_eval("((HSequence *) parser->env)->len") # TODO: should GDB do the array indexing operation, or should the Python code? # TODO: top_level_parse.create_or_get_parser(address) #[gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") for index in range(0, num_parsers)] # TODO: maybe the loop can be replaced with a list comprehension #[top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)] for index in range(0, num_parsers): parser_p = gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") parser_obj = top_level_parse.add_or_get_parser(parser_p) if parser_obj.name is None: parser_obj.name_parser( self.name_from_vtable(parser_obj) ) self.member_parsers.append(parser_obj) def __str__(self): return str([str(parser) for parser in self.member_parsers]) class IgnoreEnv: def __init__(self, parser, top_level_parse): self.parser = parser self.member_parser_pointers = [] ignored_p = gdb.parse_and_eval("(HParser*) parser->env") # Ideally we could look up ignored_p using top_level_parse, but it will not find a result if perform_lowlevel_parse(ignored_p) wasn't called yet #TODO: method to add parser in TopLevelParse ''' try: parser_obj = top_level_parse.parser_objs[ignored_p] except KeyError: # Create a parser object with no name and the address of the parser parser_obj = Parser(None, int(ignored_p)) top_level_parse.parser_objs[int(ignored_p)] = parser_obj self.member_parsers.append(parser_obj) ''' ignore_obj = top_level_parse.add_or_get_parser(ignored_p) self.member_parsers.append(ignore_p) class ActionEnv: def __init__(self, parser, top_level_parse): self.parser = parser member_parser_p = gdb.parse_and_eval("((HParseAction*) parser->env)->p"); parser_obj = top_level_parse.add_or_get_parser(member_parser_p) self.member_parser = parser_obj # should return a HAction action = gdb.parse_and_eval("((HParseAction*) parser->env)->action"); self.action = action user_data_p = gdb.parse_and_eval("(HParseAction*) parser->env)->user_data"); self.user_data_p = user_data_p class AndEnv: def __init__(self, parser, top_level_parse): self.parser = parser member_parser_p = gdb.parse_and_eval("(HParser*) parser->env"); parser_obj = top_level_parse.add_or_get_parser(member_parser_p) self.member_parser = parser_obj class ChEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser self.ch_arg = gdb.parse_and_eval("(uint8_t) parser->env") self.ch_value = ord(self.ch_arg.value()) def __str__(self): return str(self.ch_value) class ChoiceEnv(HParserEnv): def __init__(self, parser, top_level_parse): super().__init__(parser, top_level_parse) self.parser = parser self.member_parsers = [] #frame = gdb.selected_frame() h_sequence_p = gdb.parse_and_eval("(HSequence*) parser->env") num_parsers = gdb.parse_and_eval("((HSequence *) parser->env)->len") # TODO: should GDB do the array indexing operation, or should the Python code? # TODO: top_level_parse.create_or_get_parser(address) #[gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") for index in range(0, num_parsers)] # TODO: maybe the loop can be replaced with a list comprehension #[top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)] for index in range(0, num_parsers): parser_p = gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") parser_obj = top_level_parse.add_or_get_parser(parser_p) if parser_obj.name is None: parser_obj.name_parser( self.name_from_vtable(parser_obj) ) self.member_parsers.append(parser_obj) def __str__(self): return str([str(parser) for parser in self.member_parsers]) class ManyEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser h_repeat_p = gdb.parse_and_eval("(HSequence*) parser->env") self.count = gdb.parse_and_eval("((HRepeat *) parser->env)->count") # TODO: resolve these pointers, make parser objects self.separator = gdb.parse_and_eval("((HRepeat *) parser->env)->sep") self.p = gdb.parse_and_eval("((HRepeat *) parser->env)->p") def __str__(self): return "P: " + str(self.p) + ", sep: " + str(self.separator) + ", count: " + str(self.count) class NotEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser member_p = gdb.parse_and_eval("(HParser*) parser->env") member_obj = top_level_parse.add_or_get_parser(member_p) self.member_parser = member_obj def __str__(self): return str(self.member_parser) class NothingEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser def __str__(self): return "[]" class OptionalEnv(HParserEnv): def __init__(self, parser, top_level_parse): self.parser = parser member_p = gdb.parse_and_eval("(HParser*) parser->env") member_obj = top_level_parse.add_or_get_parser(member_p) self.member_parser = member_obj def __str__(self): return str(self.member_parser) vtable_to_env = { 'sequence_vt': SequenceEnv, 'ignore_vt': IgnoreEnv, 'action_vt': ActionEnv, 'and_vt': AndEnv, 'attr_bool_vt': AttrBoolEnv, 'ch_vt': ChEnv, 'many_vt': ManyEnv, 'not_vt': NotEnv, 'nothing_vt': NothingEnv, 'optional_vt': OptionalEnv } # When given a Parser object, decompose_parser() deduces its type from the vtable, and returns the appropriate HParserEnv subclass, containing member parsers and args # Not sure what to name it. Alternatives: # ParserDisassembler? # ParserExploder? # ParserDecomposer? class ParserDecombinator: def __init__(self): # TODO: make a better data structure than vtable_to_env + parser_name_defaults + this self.vt_envs = { k : (v,vtable_to_env[k]) for k,v in parser_name_defaults.items() } def decompose_parser(self, parser, top_level_parse): # Sadly, this is stringly typed for now parser_addr = parser.address vtable_p = gdb.parse_and_eval("((HParser*) " + str(parser_addr) + ")->vtable") try: envClass = vtable_to_env[vtable_p.name] except KeyError: print("Unknown vtable: " + str(vtable_p)) return None return envClass(parser, top_level_parse)