class HParseResult: HParseResult_t_p = None def __init__(self, address): # Note to self: Address has to be an integer and not string # Otherwise all hell breaks loose if address == 0: raise ValueError("Nullpointer given as address of HParseResult") self.address = address # Mostly for convenience self.has_ast = self.read_AST_not_null() self.ast = None if self.has_ast: self.ast = self.make_HParsedToken() self.bit_length = self.read_member('bit_length') self.arena = self.read_member('arena') # AST is not null # Some combinators, such as h_ignore(), return a ParseResult with no AST def read_AST_not_null(self): if self.address == 0: return False if not __class__.HParseResult_t_p: __class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer() res = gdb.Value(self.address).cast(__class__.HParseResult_t_p) if res['ast'] == 0: return False return True def make_HParsedToken(self): if self.has_ast: tok = self.read_member('ast') return HParsedToken(tok) def read_member(self, member_name): if not __class__.HParseResult_t_p: __class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer() res = gdb.Value(self.address).cast(__class__.HParseResult_t_p) return res[member_name] def __str__(self): return "HParseResult ({0}) {{ arena:{1}, data:{2} }}".format(self.address, self.arena, self.ast) class HParsedToken: token_union_members = { 2: 'bytes', 4: 'sint', 8: 'uint', 12: 'dbl', 13: 'flt', 16: 'seq', 64: 'user' } #TT_MAX = gdb.lookup_type("enum HTokenType_").fields()[-1].enumval # Will be cached on the first lookup # Annoyingly, the numerical value for the first custom token type == TT_MAX TT_MAX = None # Enum value hardcoded for convenience of implementation TT_SEQUENCE = 16 # These enum values have no token data #no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]] no_token_data = None #HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer() HParsedToken_t_p = None def __init__(self, address, parent=None, token_type=None, children=None): # Intended to warn about the difference between "0xdeadbeef" and 0xdeadbeef # The former will allocate an char[] and cast it to a HParsedToken* when reading members if isinstance(address, str): print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address) if address == 0: raise ValueError("Nullpointer given as address of HParsedToken") self.address = address # Unused for now self.parent = parent self.children = children #self.token_type = token_type or self.read_token_type() self.token_type = token_type or self.read_member('token_type') # The entire HParsedToken as a gdb.Value self.token = self.read_token_val() # The data, either a union in the struct or a HTokenData # The encapsulated value is returned in either case self.data = self.read_token_data() self.index = self.read_member('index') self.bit_length = self.read_member('bit_length') self.bit_offset = self.read_member('bit_offset') # TODO: doesn't work for "custom" sequence types such as Dict if self.token_type == __class__.TT_SEQUENCE: #TODO: decide if this should be a HCountedArray or array of HParsedTokens self.children = self.populate_children_list() def read_token_val(self): if not __class__.HParsedToken_t_p: __class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer() tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p) return tok #TODO: how to tell when token_type == TT_MAX is meant to be TT_MAX, and when it's meant to be a custom type? def has_token_data(self): if not __class__.no_token_data: #__class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]] __class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]] #no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]] no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]] return self.token_type not in no_token_data def read_token_data(self): if self.has_token_data(): # We default to using the 'user' field. Also covers custom token types member = 'user' # Check if self.token_type < TT_MAX if self.token_type < gdb.lookup_type("enum HTokenType_").fields()[-1].enumval: member = __class__.token_union_members.get(int(self.token_type), "user") data = self.obj_from_token_data(member) return data # Token type is one of the enum values known not to have data else: return None def read_member(self, member_name): if not __class__.HParsedToken_t_p: __class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer() tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p) return tok[member_name] # TODO: this and read_token_data are messy # The desirable approach at the moment would be: # - self.token has the token as a gdb.Value # - self.data has the token as an instance of the classes defined here # (HBytes, HCountedArray), or as a literal such as int. # currently it can return gdb.Values def obj_from_token_data(self, member): if member == "bytes": return HBytes(self.token[member]) if member == "seq": return HCountedArray(int(self.token[member])) else: return self.token[member] def populate_children_list(self): data_as_list = self.obj_from_token_data("seq").elements_as_list() return data_as_list # TODO: this is probably fine for already-parsed input, but needs more thought def __str__(self): if self.children: return "{{ {0}, {1} }}".format(self.token_type, ", ".join([str(child) for child in self.children])) else: return "{{ {0}, {1} }}".format(self.token_type, self.data) class HCountedArray: HCountedArray_t_p = None def __init__(self, address): self.address = address self.capacity = self.read_member('capacity') self.used = self.read_member('used') self.arena = self.read_member('arena') self.elements = self.read_member('elements') def read_member(self, member_name): if not __class__.HCountedArray_t_p: __class__.HCountedArray_t_p = gdb.lookup_type("HCountedArray").pointer() tok = gdb.Value(self.address).cast(__class__.HCountedArray_t_p) return tok[member_name] def elements_as_list(self): return [HParsedToken(self.elements[i], self.address) for i in range(0, self.used)] #TODO: indent wrapper #TODO: the format is just for testing walking the AST graph #TODO: escaped newlines and str() don't play well together def __str__(self): elements_str = ", ".join([str(elem) for elem in self.elements_as_list()]) return "[ {0} ]".format(elements_str) # Unlike HCountedArray and HParsedToken, HBytes wraps the gdb.Value that is the structure itself, not a pointer to it # This is because the bytes field of a HParsedToken is a HBytes, not a HBytes* # If a HBytes* is really needed: for a given HParsedToken hpt, # hpt.token['bytes'].address yields its address # A wrapper class for that might look like # class HBytesPointer: # def __init__(self, address): # self.adddress = address # self.token = self.read_member("token") # self.len = self.read_member("len") # # def read_member(self, member_name): # bytes = gdb.Value(self.address).cast(gdb.lookup_type("HBytes").pointer()) # return bytes[member_name] # # foo = HBytesPointer(int(hpt.token['bytes'].address)) class HBytes: def __init__(self, gdbvalue): self.gdbvalue = gdbvalue self.len = self.gdbvalue['len'] self.token = self.gdbvalue['token'] def __str__(self): if self.len == 0: return "{{ token: \"\", len: 0 }}" else: return "{{ token: \"{0}\", len: {1} }}".format(self.token.string("UTF-8", "replace", self.len), self.len) # Class to hold subtrees of the AST # HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise # Its other responsibility is formatting the output when the AST printing command is executed class ASTManager: def __init__(self): self.top_node = None # The HParser that returned this AST fragment self.parser = None def set_top_node(self, address, parser): # Address has to be an integer or gdb.Value or this will break if address == 0: self.top_node = None else: self.top_node = HParseResult(address) # Expected to be a Parser object (probably best to use TopLevelParse for the lookup) self.parser = parser def print_ast(self): print(self.parser) print(self.top_node) ast_manager = ASTManager()