Skip to content
Snippets Groups Projects
ast.py 8.77 KiB
Newer Older
class HParseResult:
	HParseResult_t_p = None

	def __init__(self, address):
		# Note to self: Address has to be an integer and not string
		# Otherwise all hell breaks loose
		if address == 0:
			raise ValueError("Nullpointer given as address of HParseResult")
		self.address = address
pompolic's avatar
pompolic committed
		# Mostly for convenience
		self.has_ast = self.read_AST_not_null()
		self.ast = None
pompolic's avatar
pompolic committed
		if self.has_ast:
			self.ast = self.make_HParsedToken()

pompolic's avatar
pompolic committed
		self.bit_length = self.read_member('bit_length')
		self.arena = self.read_member('arena')

	# AST is not null
pompolic's avatar
pompolic committed
	# Some combinators, such as h_ignore(), return a ParseResult with no AST
	def read_AST_not_null(self):
		if self.address == 0:
			return False

		if not __class__.HParseResult_t_p:
			__class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer()
		res = gdb.Value(self.address).cast(__class__.HParseResult_t_p)
		if res['ast'] == 0:
			return False
		return True

	def make_HParsedToken(self):
pompolic's avatar
pompolic committed
		if self.has_ast:
			tok = self.read_member('ast')
pompolic's avatar
pompolic committed
			return HParsedToken(tok)
pompolic's avatar
pompolic committed
	def read_member(self, member_name):
		if not __class__.HParseResult_t_p:
			__class__.HParseResult_t_p = gdb.lookup_type("HParseResult").pointer()
		res = gdb.Value(self.address).cast(__class__.HParseResult_t_p)
pompolic's avatar
pompolic committed
		return res[member_name]
pompolic's avatar
pompolic committed
	def __str__(self):
		return "HParseResult ({0}) {{ arena:{1}, data:{2} }}".format(self.address, self.arena, self.ast)

class HParsedToken:
	token_union_members = {
		2: 'bytes',
		4: 'sint',
		8: 'uint',
		12: 'dbl',
		13: 'flt',
		16: 'seq',
		64: 'user'
	}

pompolic's avatar
pompolic committed
	#TT_MAX = gdb.lookup_type("enum HTokenType_").fields()[-1].enumval
	# Will be cached on the first lookup
pompolic's avatar
pompolic committed
	# Annoyingly, the numerical value for the first custom token type == TT_MAX
pompolic's avatar
pompolic committed
	# Enum value hardcoded for convenience of implementation
	TT_SEQUENCE = 16
	# These enum values have no token data
pompolic's avatar
pompolic committed
	#no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
	no_token_data = None
pompolic's avatar
pompolic committed
	#HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
	HParsedToken_t_p = None
pompolic's avatar
pompolic committed
	def __init__(self, address, parent=None, token_type=None, children=None):
		# Intended to warn about the difference between "0xdeadbeef" and 0xdeadbeef
		# The former will allocate an char[] and cast it to a HParsedToken* when reading members
		if isinstance(address, str):
pompolic's avatar
pompolic committed
			print("Warning: Address % given to HParsedToken is a string. This is probably an error (expecting int or gdb.Value)" % address)
		if address == 0:
			raise ValueError("Nullpointer given as address of HParsedToken")
		self.address = address
		self.parent = parent
		self.children = children
		#self.token_type = token_type or self.read_token_type()
		self.token_type = token_type or self.read_member('token_type')
		# The entire HParsedToken as a gdb.Value
		self.token = self.read_token_val()
		# The data, either a union in the struct or a HTokenData
		# The encapsulated value is returned in either case
		self.data = self.read_token_data()
		self.index = self.read_member('index')
		self.bit_length = self.read_member('bit_length')
		self.bit_offset = self.read_member('bit_offset')
pompolic's avatar
pompolic committed
		# TODO: doesn't work for "custom" sequence types such as Dict
		if self.token_type == __class__.TT_SEQUENCE:
			#TODO: decide if this should be a HCountedArray or array of HParsedTokens
			self.children = self.populate_children_list()

	def read_token_val(self):
		if not __class__.HParsedToken_t_p:
			__class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
		tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p)
pompolic's avatar
pompolic committed
#TODO: how to tell when token_type == TT_MAX is meant to be TT_MAX, and when it's meant to be a custom type?
	def has_token_data(self):
		if not __class__.no_token_data:
pompolic's avatar
pompolic committed
			#__class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
			__class__.no_token_data = [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]]
		#no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE", "TT_MAX"]]
		no_token_data = __class__.no_token_data or [v.enumval for v in gdb.lookup_type("enum HTokenType_").fields() if v.name in ["TT_INVALID", "TT_RESERVED_1", "TT_ERR", "TT_NONE"]]
		return self.token_type not in no_token_data

	def read_token_data(self):
		if self.has_token_data():
			# We default to using the 'user' field. Also covers custom token types
			member = 'user'
pompolic's avatar
pompolic committed
			# Check if self.token_type < TT_MAX
			if self.token_type < gdb.lookup_type("enum HTokenType_").fields()[-1].enumval:
				member = __class__.token_union_members.get(int(self.token_type), "user")
			data = self.obj_from_token_data(member)
			return data
		# Token type is one of the enum values known not to have data
		else:
			return None

	def read_member(self, member_name):
		if not __class__.HParsedToken_t_p:
			__class__.HParsedToken_t_p = gdb.lookup_type("HParsedToken").pointer()
		tok = gdb.Value(self.address).cast(__class__.HParsedToken_t_p)
		return tok[member_name]
	# TODO: this and read_token_data are messy
	# The desirable approach at the moment would be:
	# - self.token has the token as a gdb.Value
	# - self.data has the token as an instance of the classes defined here
	#   (HBytes, HCountedArray), or as a literal such as int.
	#   currently it can return gdb.Values
	def obj_from_token_data(self, member):
		if member == "bytes":
			return HBytes(self.token[member])
			return HCountedArray(int(self.token[member]))
			return self.token[member]
	def populate_children_list(self):
		data_as_list = self.obj_from_token_data("seq").elements_as_list()
		return data_as_list
pompolic's avatar
pompolic committed

	# TODO: this is probably fine for already-parsed input, but needs more thought
	def __str__(self):
pompolic's avatar
pompolic committed
		if self.children:
pompolic's avatar
pompolic committed
			return "{{ {0}, {1} }}".format(self.token_type, ", ".join([str(child) for child in self.children]))
pompolic's avatar
pompolic committed
		else:
			return "{{ {0}, {1} }}".format(self.token_type, self.data)

class HCountedArray:
	HCountedArray_t_p = None

	def __init__(self, address):
		self.address = address
		self.capacity = self.read_member('capacity')
		self.used = self.read_member('used')
		self.arena = self.read_member('arena')
		self.elements = self.read_member('elements')

	def read_member(self, member_name):
		if not __class__.HCountedArray_t_p:
			__class__.HCountedArray_t_p = gdb.lookup_type("HCountedArray").pointer()
		tok = gdb.Value(self.address).cast(__class__.HCountedArray_t_p)
		return tok[member_name]

	def elements_as_list(self):
		return [HParsedToken(self.elements[i], self.address) for i in range(0, self.used)]

	#TODO: indent wrapper
pompolic's avatar
pompolic committed
	#TODO: the format is just for testing walking the AST graph
pompolic's avatar
pompolic committed
	#TODO: escaped newlines and str() don't play well together
pompolic's avatar
pompolic committed
		elements_str = ", ".join([str(elem) for elem in self.elements_as_list()])
		return "[ {0} ]".format(elements_str)
# Unlike HCountedArray and HParsedToken, HBytes wraps the gdb.Value that is the structure itself, not a pointer to it
# This is because the bytes field of a HParsedToken is a HBytes, not a HBytes*
# If a HBytes* is really needed: for a given HParsedToken hpt,
# hpt.token['bytes'].address yields its address

# A wrapper class for that might look like
# class HBytesPointer:
#	def __init__(self, address):
#		self.adddress = address
#		self.token = self.read_member("token")
#		self.len = self.read_member("len")
#
#	def read_member(self, member_name):
#		bytes = gdb.Value(self.address).cast(gdb.lookup_type("HBytes").pointer())
#		return bytes[member_name]
#
# foo = HBytesPointer(int(hpt.token['bytes'].address))
pompolic's avatar
pompolic committed
class HBytes:
	def __init__(self, gdbvalue):
		self.gdbvalue = gdbvalue
		self.len = self.gdbvalue['len']
		self.token = self.gdbvalue['token']
pompolic's avatar
pompolic committed

	def __str__(self):
		if self.len == 0:
			return "{{ token: \"\", len: 0 }}"
		else:
			return "{{ token: \"{0}\", len: {1} }}".format(self.token.string("UTF-8", "replace", self.len), self.len)

# Class to hold subtrees of the AST
# HDoParseRetBreakpoint would ideally use the ASTManager to construct the partial ast piecewise
# Its other responsibility is formatting the output when the AST printing command is executed
class ASTManager:
	def __init__(self):
		self.top_node = None
		# The HParser that returned this AST fragment
		self.parser = None
	def set_top_node(self, address, parser):
		# Address has to be an integer or gdb.Value or this will break
		if address == 0:
			self.top_node = None
		else:
			self.top_node = HParseResult(address)
		# Expected to be a Parser object (probably best to use TopLevelParse for the lookup)
		self.parser = parser
pompolic's avatar
pompolic committed

ast_manager = ASTManager()