class HParseResult: #HParseResult_t = gdb.lookup_type("HParseResult") #HParseResult_t_p = gdb.lookup_type("HParseResult").pointer() #HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer() def __init__(self, address): # Note to self: Address has to be an integer and not string # Otherwise all hell breaks loose self.address = address # Mostly for convenience self.has_ast = self.read_AST_not_null() self.ast = None if self.has_ast: self.ast = self.make_HParsedToken() self.bit_length = self.read_member('bit_length') self.arena = self.read_member('arena') # AST is not null # Some combinators, such as h_ignore(), return a ParseResult with no AST def read_AST_not_null(self): res = gdb.Value(self.address).cast(gdb.lookup_type("HParseResult").pointer()) if res['ast'] == 0: return False return True def make_HParsedToken(self): if self.has_ast: tok = self.read_member('ast') return HParsedToken(tok) def read_member(self, member_name): # TODO: cache the gdb.Type res = gdb.Value(self.address).cast(gdb.lookup_type("HParseResult").pointer()) return res[member_name] def __str__(self): return "HParseResult ({0}) {{ arena:{1}, data:{2} }}".format(self.address, self.arena, self.ast) class HParsedToken: token_union_members = { 2: 'bytes', 4: 'sint', 8: 'uint', 12: 'dbl', 13: 'flt', 16: 'seq', 64: 'user' } #TT_MAX = gdb.lookup_type("enum HTokenType_").fields()[-1].enumval # Enum value hardcoded for convenience of implementation TT_SEQUENCE = 16 # These enum values have no token data #no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]] #HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer() def __init__(self, address, parent=None, token_type=None, children=None): # Intended to warn about the difference between "0xdeadbeef" and 0xdeadbeef # The former will allocate an char[] and cast it to a HParsedToken* when reading members if isinstance(address, str): print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address) self.address = address # Unused for now self.parent = parent self.children = children #self.token_type = token_type or self.read_token_type() self.token_type = token_type or self.read_member('token_type') # The entire HParsedToken as a gdb.Value self.token = self.read_token_val() # The data, either a union in the struct or a HTokenData # The encapsulated value is returned in either case #TODO: make HCountedArray self.data = self.read_token_data() self.index = self.read_member('index') self.bit_length = self.read_member('bit_length') self.bit_offset = self.read_member('bit_offset') # TODO: doesn't work for "custom" sequence types such as Dict if self.token_type == __class__.TT_SEQUENCE: #TODO: decide if this should be a HCountedArray or array of HParsedTokens self.children = self.populate_children() def read_token_val(self): tok = gdb.Value(self.address).cast(gdb.lookup_type("HParsedToken").pointer()) return tok def has_token_data(self, token_type): no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]] return token_type not in no_token_data #TODO: return make HCountedArray from data, if this is a sequence def read_token_data(self): tok = gdb.Value(self.address).cast(gdb.lookup_type("HParsedToken").pointer()) if self.has_token_data(self.token_type): # We default to using the 'user' field. Also covers custom token types member = 'user' # Check if self.token_type < TT_MAX if self.token_type < gdb.lookup_type("enum HTokenType_").fields()[-1].enumval: member = __class__.token_union_members.setdefault(self.token_type, "user") data = tok[member] return data # Token type is one of the enum values known not to have data else: return None def read_member(self, member_name): tok = gdb.Value(self.address).cast(gdb.lookup_type("HParsedToken").pointer()) return tok[member_name] def populate_children(self): data_as_array = HCountedArray(self.data) return data_as_array # TODO: this is probably fine for already-parsed input, but needs more thought def __str__(self): if self.children: return "{{ {0}, {1} }}".format(self.token_type, self.children) else: return "{{ {0}, {1} }}".format(self.token_type, self.data) class HCountedArray: #HCountedArray_t_p = gdb.lookup_type("HCountedArray").pointer() def __init__(self, address): self.address = address self.capacity = self.read_member('capacity') self.used = self.read_member('used') self.arena = self.read_member('arena') #TODO: maybe save them as an array self.elements = self.read_member('elements') def read_member(self, member_name): tok = gdb.Value(self.address).cast(gdb.lookup_type("HCountedArray").pointer()) return tok[member_name] def elements_as_list(self): return [HParsedToken(self.elements[i], self.address) for i in range(0, self.used)] #TODO: indent wrapper #TODO: the format is just for testing walking the AST graph #TODO: escaped newlines and str() don't play well together def __str__(self): elements_str = ", ".join([str(elem) for elem in self.elements_as_list()]) return "[ {0} ]".format(elements_str) class HBytes: def __init__(self, address): self.address = address self.len = self.read_member("len") self.token = self.read_member("token") def read_member(self, member_name): hb = gdb.Value(self.address).cast(gdb.lookup_type("HBytes").pointer()) return hb[member_name] def __str__(self): if self.len == 0: return "{{ token: \"\", len: 0 }}" else: return "{{ token: \"{0}\", len: {1} }}".format(self.token.string("UTF-8", "replace", self.len), self.len) # Class to hold subtrees of the AST # HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise # Its other responsibility is formatting the output when the AST printing command is executed class ASTManager: def __init__(self): self.top_node = None # The HParser that returned this AST fragment self.parser = None def set_top_node(self, address, parser): # Address has to be an integer or gdb.Value or this will break self.top_node = HParseResult(address) # Expected to be a Parser object (probably best to use TopLevelParse for the lookup) self.parser = parser def print_ast(self): print(self.parser) print(self.top_node) ast_manager = ASTManager()