Newer
Older
def __init__(self, address):
# Note to self: Address has to be an integer and not string
# Otherwise all hell breaks loose
if address == 0:
raise ValueError("Nullpointer given as address of HParseResult")
# Mostly for convenience
self.has_ast = self.read_AST_not_null()
self.ast = self.make_HParsedToken()
self.bit_length = self.read_member('bit_length')
self.arena = self.read_member('arena')
# Some combinators, such as h_ignore(), return a ParseResult with no AST
if self.address == 0:
return False
if not __class__.HParseResult_t_p:
__class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer()
res = gdb.Value(self.address).cast(__class__.HParseResult_t_p)
if res['ast'] == 0:
return False
return True
def make_HParsedToken(self):
__class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer()
res = gdb.Value(self.address).cast(__class__.HParseResult_t_p)
def __str__(self):
return "HParseResult ({0}) {{ arena:{1}, data:{2} }}".format(self.address, self.arena, self.ast)
class HParsedToken:
token_union_members = {
2: 'bytes',
4: 'sint',
8: 'uint',
12: 'dbl',
13: 'flt',
16: 'seq',
64: 'user'
}
#TT_MAX = gdb.lookup_type("enum HTokenType_").fields()[-1].enumval
# Will be cached on the first lookup
# Annoyingly, the numerical value for the first custom token type == TT_MAX
# Enum value hardcoded for convenience of implementation
TT_SEQUENCE = 16
# These enum values have no token data
#no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
#HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
def __init__(self, address, parent=None, token_type=None, children=None):
# Intended to warn about the difference between "0xdeadbeef" and 0xdeadbeef
# The former will allocate an char[] and cast it to a HParsedToken* when reading members
if isinstance(address, str):
print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address)
if address == 0:
raise ValueError("Nullpointer given as address of HParsedToken")
# Unused for now
self.parent = parent
self.children = children
#self.token_type = token_type or self.read_token_type()
self.token_type = token_type or self.read_member('token_type')
# The entire HParsedToken as a gdb.Value
self.token = self.read_token_val()
# The data, either a union in the struct or a HTokenData
# The encapsulated value is returned in either case
self.data = self.read_token_data()
self.index = self.read_member('index')
self.bit_length = self.read_member('bit_length')
self.bit_offset = self.read_member('bit_offset')
# TODO: doesn't work for "custom" sequence types such as Dict
if self.token_type == __class__.TT_SEQUENCE:
#TODO: decide if this should be a HCountedArray or array of HParsedTokens
self.children = self.populate_children_list()
if not __class__.HParsedToken_t_p:
__class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p)
#TODO: how to tell when token_type == TT_MAX is meant to be TT_MAX, and when it's meant to be a custom type?
def has_token_data(self):
if not __class__.no_token_data:
#__class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
__class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]]
#no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]]
return self.token_type not in no_token_data
if self.has_token_data():
# We default to using the 'user' field. Also covers custom token types
member = 'user'
# Check if self.token_type < TT_MAX
if self.token_type < gdb.lookup_type("enum HTokenType_").fields()[-1].enumval:
member = __class__.token_union_members.get(int(self.token_type), "user")
data = self.obj_from_token_data(member)
return data
# Token type is one of the enum values known not to have data
else:
return None
def read_member(self, member_name):
__class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p)
# TODO: this and read_token_data are messy
# The desirable approach at the moment would be:
# - self.token has the token as a gdb.Value
# - self.data has the token as an instance of the classes defined here
# (HBytes, HCountedArray), or as a literal such as int.
# currently it can return gdb.Values
def obj_from_token_data(self, member):
if member == "bytes":
return HBytes(self.token[member])
return HCountedArray(int(self.token[member]))
return self.token[member]
def populate_children_list(self):
data_as_list = self.obj_from_token_data("seq").elements_as_list()
return data_as_list
# TODO: this is probably fine for already-parsed input, but needs more thought
return "{{ {0}, {1} }}".format(self.token_type, ", ".join([str(child) for child in self.children]))
else:
return "{{ {0}, {1} }}".format(self.token_type, self.data)
def __init__(self, address):
self.address = address
self.capacity = self.read_member('capacity')
self.used = self.read_member('used')
self.arena = self.read_member('arena')
self.elements = self.read_member('elements')
def read_member(self, member_name):
__class__.HCountedArray_t_p = gdb.lookup_type("HCountedArray").pointer()
tok = gdb.Value(self.address).cast(__class__.HCountedArray_t_p)
def elements_as_list(self):
return [HParsedToken(self.elements[i], self.address) for i in range(0, self.used)]
#TODO: indent wrapper
#TODO: the format is just for testing walking the AST graph
elements_str = ", ".join([str(elem) for elem in self.elements_as_list()])
return "[ {0} ]".format(elements_str)
# Unlike HCountedArray and HParsedToken, HBytes wraps the gdb.Value that is the structure itself, not a pointer to it
# This is because the bytes field of a HParsedToken is a HBytes, not a HBytes*
# If a HBytes* is really needed: for a given HParsedToken hpt,
# hpt.token['bytes'].address yields its address
# A wrapper class for that might look like
# class HBytesPointer:
# def __init__(self, address):
# self.adddress = address
# self.token = self.read_member("token")
# self.len = self.read_member("len")
#
# def read_member(self, member_name):
# bytes = gdb.Value(self.address).cast(gdb.lookup_type("HBytes").pointer())
# return bytes[member_name]
#
# foo = HBytesPointer(int(hpt.token['bytes'].address))
self.gdbvalue = gdbvalue
self.len = self.gdbvalue['len']
self.token = self.gdbvalue['token']
def __str__(self):
if self.len == 0:
return "{{ token: \"\", len: 0 }}"
else:
return "{{ token: \"{0}\", len: {1} }}".format(self.token.string("UTF-8", "replace", self.len), self.len)
# Class to hold subtrees of the AST
# HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise
# Its other responsibility is formatting the output when the AST printing command is executed
class ASTManager:
def __init__(self):
self.top_node = None
# The HParser that returned this AST fragment
self.parser = None
def set_top_node(self, address, parser):
# Address has to be an integer or gdb.Value or this will break
if address == 0:
self.top_node = None
else:
self.top_node = HParseResult(address)
# Expected to be a Parser object (probably best to use TopLevelParse for the lookup)
self.parser = parser
def print_ast(self):
print(self.parser)
print(self.top_node)