# TODO: handlers for filters
# TODO: postordinate parser fails to get named
# TODO: step-to-parser command
# TODO: "current parser" on GUI is the one on top of the stack, while the argument of h_do_parse is not yet accounted for
# TODO: possible enhancement, caching the vtable type in Parser objects (allows searching by type)
# TODO: The parameter given to h_do_parse is not shown in the backtrace, which is confusing


# List of names of functions with H_RULEs declared, used by BreakpointManager
H_RULE_FUNCTIONS = [ "init_runlengthdecode_parser", "init_LZW_parser" ]

class Parser:
	def __init__(self, name, address):
		self.name = name
		self.address = address
		self.bytes_used = {}
	
	def name_parser(self, name):
		self.name = name

	# TODO: remove
	def get_name_or_placeholder(self):
		if self.name is None:
			return "Wait for it... (if you're reading this, you found a bug)"
		else:
			return self.name

	def add_mem_use(self, state, size):
		if self.bytes_used.setdefault(state, None) is None:
			self.bytes_used[state] = size
		else:
			self.bytes_used[state] += size

	def get_mem_use(self, state=None):
		if state is None:
			return self.bytes_used
		else:
			return self.bytes_used.setdefault(state, 0)

	def __str__(self):
		return "(" + str(self.name) + ", " + hex(self.address) + ")"

	# Return the highest per-arena allocation count
	# TODO: disambiguate "parse state" and "arena", possibly tracking both
	def get_arenamax(self):
		res = 0
		if self.bytes_used:
			res = max(self.bytes_used.values())
		return res

	def get_arenasum(self):
		res = 0
		if self.bytes_used:
			res = sum(self.bytes_used.values())
		return res

class ParserStack:
	def __init__(self, parse_state, arena):
		self.parse_state = parse_state
		self.arena = arena
		self.p_stack = []
		self.unclaimed_mem_use = 0

	def push(self, parser):
		self.p_stack.append(parser)

	def pop(self):
		return self.p_stack.pop()

	def peek(self):
		try:
			retval = self.p_stack[-1]
		except IndexError:
			retval = None
		return retval

	def set_state(self, state):
		self.parse_state = state
	# Shortcut for setting the name property of the parser on the top of stack
	# In terms of tracing, *most* calls to a parser look something like this with the packrat backend:
	# h_do_parse()
	#	parse_foo()
	#		perform_lowlevel_parse()
	
	# perform_lowlevel_parse() is called when the memo table at that position is not filled in yet.
	# it calls the corresponding parse_* virtual function via the vtable, but other than that does not have type information
	# it's probably possible to extract type information, by comparing vtable addresses, but that seems painful
	
	# parse_foo() is the parser's corresponding virtual function in the frontend, which does not have the equivalent of a "this" pointer
	
	# So what we do to keep track of parsers is incrementally filling in the details for both
	
	# h_do_parse() is the backend's "actually run the parser" function, but does not get called for some parsers
	# (apparently mostly it's for higher-order parsers)
	# also contains the decision logic about whether to call perform_lowlevel_parse()
	
	# possible scenarios:
	# h_do_parse()
	#	perform_lowlevel_parse()
	#		parse_foo()
	
	# h_do_parse()
	#	perform_lowlevel_parse()

	# h_do_parse()
	def name_top_parser(self, name):
		self.p_stack[-1].name_parser(name)

	def add_mem_use_each(self, size):
		for p in self.p_stack:
			p.bytes_used += size

	def add_mem_use_top(self, size):
		self.p_stack[-1].bytes_used += size
		
	def show_stack(self):
		print("stack would be printed here. Depth:", len(self.p_stack))
		#print([(p.get_name_or_placeholder(), hex(p.address)) for p in self.p_stack])

	def depth(self):
		return len(self.p_stack)

# Class that is responsible for bookkeeping throughout the entire parse
# NB, this is slightly different terminology than the hammer API implicitly uses:
# There, a parse is started by h_parse(), and it is associated with a parse state.
# This corresponds to the ParserStack above. TopLevelParse keeps track of all these.
# Subsequent h_do_parse()s with the same parser state are considered to belong to the same parse

class TopLevelParse:
	def __init__(self):
		self.parser_stacks = []
		self.parser_objs = {}
		self.unclaimed_mem_use = 0
		# Holds 32 characters starting at state->input_stream[index], used by the GUI
		self.current_input_chunk = ''
		self.current_parser_env = ''
		# We save/push in perform_lowlevel_parse, but this is used to display them ahead of time
		self.h_do_parse_parser = None
		self.vt_types = None
		self.parser_decombinator = None

	def init_parser(self):
		self.vt_types = VTTypes()
		self.parser_decombinator = ParserDecombinator(self.vt_types)

	# Called from h_packrat_parse()'s handler, where the parse state and arena get initialized
	def enter_h_packrat_parse(self, parser):
		# TODO: add a parser stack or something?
		parser_stack = ParserStack(None, None)
		self.parser_stacks.append(parser_stack)
		return 0

	# TODO: arena parameter is useless
	def enter_h_do_parse(self, parse_state, arena, parser):
		parser_stack = self.peek_parserstack()
		try:
			parser_obj = self.parser_objs[parser]
		except KeyError:
			parser_obj = Parser(None, parser)
			self.parser_objs[parser] = parser_obj
		self.h_do_parse_parser = parser_obj # TODO: current_parser_env should be set here instead too
		if parser_stack.parse_state is None and parser_stack.parse_state != parse_state:
			self.first_h_do_parse_after_packrat_parse(parse_state, arena)

	# Called from h_do_parse()'s handler, at which point we know the addresses of the state and arena
	def first_h_do_parse_after_packrat_parse(self, parse_state, arena):
		parser_stack = self.peek_parserstack()
		parser_stack.set_state(parse_state)

	# Popping the stack of stack of parsers
	def return_from_h_packrat_parse(self):
		old_stack = self.parser_stacks.pop()
		if old_stack.depth() > 0:
			print("Warning: parser stack not empty but parse is successful?")

	# Memoize the parser object for this particular address, then push it on the stack
	# Returns the parser object we just initalized (or the one already existing)
	#TODO: memoize_parser method
	def enter_perform_lowlevel_parse(self, parser_addr):
		try:
			parser_obj = self.parser_objs[parser_addr]
		except KeyError:
			# Create a parser object with no name and the address of the parser
			parser_obj = Parser(None, parser_addr)
			self.parser_objs[parser_addr] = parser_obj

		parser_stack = self.peek_parserstack()
		parser_stack.push(parser_obj)
		if self.parser_decombinator:
			p_env = self.parser_decombinator.decompose_parser(parser_obj, self)
			self.set_parser_env(type(p_env).__name__ + " - " + str(p_env)) # TODO: pass this as data structure to frontend
		return parser_obj

	def return_from_perform_lowlevel_parse(self):
		parser_stack = self.peek_parserstack()
		parser_obj = parser_stack.pop()
		# debug print here

	def enter_h_arena_malloc_raw(self, alloc_size):
		parser_obj = self.peek_parser()
		parser_stack = self.peek_parserstack()
		# This is probably the slowest part of the code, or maybe the overhead adds up over many calls to h_arena_malloc_raw()
		if parser_obj is not None:
			# Caveat: parser_stack is assumed not to be None if we could get a parser_obj
			parser_obj.add_mem_use(parser_stack.parse_state, alloc_size)
		elif parser_stack is not None:
			#print("Allocation of " + str(alloc_size) + " bytes without a parser on the stack. (Happens before first call perform_lowlevel_parse to or after return from that call)")
			parser_stack.unclaimed_mem_use += alloc_size
		else:
			#print("Allocation of " + str(alloc_size) + " bytes without a parser stack. (This happens before and after parse)")
			self.unclaimed_mem_use += alloc_size

	def parse_virtual(self, parser_name):
		parser_obj = self.peek_parser()
		if parser_obj.name is None:
			parser_obj.name_parser(parser_name)
		#else:
			#print("Warning: parser already named! This is a bug. old name: %s, new name: %s" % (parser_obj.name, parser_name))

	def peek_parserstack(self):
		try:
			retval = self.parser_stacks[-1]
		except IndexError:
			retval = None
		return retval

	def peek_parser(self):
		try: 
			retval = self.peek_parserstack().peek()
		except AttributeError:
			# print("Parser stack of stacks empty!")
			retval = None

		# retval will also be None when parser stack is empty (while parser stack of stacks isn't)
		return retval

	def parser_by_address(self, parser_addr):
		try:
			return self.parser_objs[int(parser_addr)]
		except KeyError:
			print("Parser with address " + str(hex(parser_addr)) + " not found!")

	def parsers_by_name(self, parser_name):
		results = [v for k,v in self.parser_objs.items() if v.name == parser_name]
		if len(results) > 0:
			return results
		else:
			return None

	def set_input_chunk(self, chunk):
		self.input_chunk = chunk

	def get_input_chunk(self):
		return self.input_chunk

	def set_parser_env(self, parser_env):
		self.current_parser_env = parser_env

	def get_parser_env(self):
		return self.current_parser_env

	def add_or_get_parser(self, parser_addr):
		try:
			parser_obj = self.parser_objs[int(parser_addr)]
		except KeyError:
			# Create a parser object with no name and the address of the parser
			parser_obj = Parser(None, int(parser_addr))
			self.parser_objs[int(parser_addr)] = parser_obj

		return parser_obj

	def get_parser_top_per_arena_mem(self):
		return sorted(self.parser_objs.values(), key=Parser.get_arenamax, reverse=True)[0]

	def get_parser_top_total_arena_mem(self):
		return sorted(self.parser_objs.values(), key=Parser.get_arenasum, reverse=True)[0]

	def get_avg_mem_use_per_arena(self):
		avg_mem_use = {}
		arena_counts = {}
		# Accumulate byte counts and counts of parsers using that arena
		for p in self.parser_objs.values():
			for arena,mem in p.bytes_used.items():
				if arena in arena_counts.keys():
					arena_counts[arena] += 1
				else:
					arena_counts[arena] = 1

				if arena in avg_mem_use.keys():
					avg_mem_use[arena] += mem
				else:
					avg_mem_use[arena] = mem

		averages = {arena: mem/arena_counts[arena] for arena,mem in avg_mem_use.items()}

		return averages

	# TODO: get_avg_mem_use_all_arenas, get_total_mem_use

top_level_parse = TopLevelParse()
# Approach 1: load the application, set breakpoints, execute stack commands on breakpoint hit, continue

class InitParserBreakpoint(gdb.Breakpoint):
	def stop(self):
		frame = gdb.selected_frame()
		block = frame.block()
		top_level_parse.init_parser()
		
		# This will also catch locals that aren't parsers, but it's not a problem in practice,
		# since h_parse() will never be called on them
		# If it becomes a problem after all, gdb.parse_and_eval() might be used to filter them out
		for p in block:
			top_level_parse.parser_objs[int(p.value(frame))] = Parser(p.name, int(p.value(frame)))

		return False

# TODO: refactored to breakpoint-manager.py , remove
#class HRuleBreakpoint(gdb.Breakpoint):
#	def stop(self):
#		frame = gdb.selected_frame()
#		block = frame.block()
#
#		for p in block:
#			top_level_parse.parser_objs[int(p.value(frame))] = Parser(p.name, int(p.value(frame)))

class HArenaMallocRawBreakpoint(gdb.Breakpoint):
	def stop(self):
		frame = gdb.selected_frame()
		block = frame.block()
		
		for val in block:
			if val.name == 'size':
				alloc_size = int(val.value(frame))

		top_level_parse.enter_h_arena_malloc_raw(alloc_size)

		return False

hammer_retq_breakpoints = []

#class BreakpointManager():
#	def __init__(self, h_rule_functions):
#		self.hammer_retq_breakpoints = []
#		self.h_rule_breakpoints = []
#
#		self.h_do_parse = None
#		self.h_packrat_parse = None
#		self.perform_lowlevel_parse = None
#		self.h_arena_malloc_raw = None
#
#		self.parse_action = None
#		self.parse_choice = None
#		self.parse_sequence = None
#		self.parse_difference = None
#		self.parse_many = None
#		self.parse_and = None
#		self.parse_attr_bool = None
#		self.parse_bind = None
#		self.parse_bits = None
#		self.parse_butnot = None
#		self.parse_charset = None
#		self.parse_ch = None
#		self.parse_end = None
#		self.parse_endianness = None
#		self.parse_epsilon = None
#		self.parse_ignore = None
#		self.parse_ignoreseq = None
#		self.parse_indirect = None
#		self.parse_int_range = None
#		self.parse_not = None
#		self.parse_nothing = None
#		self.parse_optional = None
#		self.parse_permutation = None
#		self.parse_skip = None
#		self.parse_seek = None
#		self.parse_tell = None
#		self.parse_token = None
#		self.parse_unimplemented = None
#		self.parse_put = None
#		self.parse_get = None
#		self.parse_whitespace = None
#		self.parse_xor = None
#
#	def set_h_rule_breakpoints(self):
#		for func in H_RULE_FUNCTIONS:
#			func_retq = locate_retq(func[0], func[1])
#			self.h_rule_breakpoints[func] = HRuleBreakpoint("*" + hex(func_retq))

breakpoint_manager = BreakpointManager(H_RULE_FUNCTIONS)

class PDFMainBreakpoint(gdb.Breakpoint):
	def stop(self):
		breakpoints = breakpoint_manager.set_hammer_retq_breakpoints()
		#breakpoint_manager.hammer_retq_breakpoints = breakpoints

		return True

	def set_hammer_retq_breakpoints(self):
		breakpoints = []
		plp_retq = locate_perform_lowlevel_parse_retq()
		perform_lowlevel_parse_ret = PerformLowLevelParseRetBreakpoint("*" + hex(plp_retq))
		breakpoints.append(perform_lowlevel_parse_ret)
		hpp_retq = locate_h_packrat_parse_retq()
		h_packrat_parse_ret = HPackratParseRetBreakpoint("*" + hex(hpp_retq))
		breakpoints.append(h_packrat_parse_ret)
		return breakpoints
# GDB parameters
# TODO: hammer parameter prefix

class ExtendedParseStepInfo(gdb.Parameter):
	"""Controls whether to display parser stack and input preview on stepping the parse."""
	def __init__(self):
		super(ExtendedParseStepInfo, self).__init__("hammer-extended-parse-step-info", gdb.COMMAND_OBSCURE, gdb.PARAM_BOOLEAN)
		self.show_doc = "Show parser stack and input preview after hammer-parse-step:"
		#self.set_doc = "Show parser stack and input preview after hammer-parse-step:"
		self.value = True

ExtendedParseStepInfo()

# GDB commands

# TODO: GDB help strings
# TODO: factor commands out into their own file

class HammerParserBacktrace(gdb.Command):
	def __init__(self):
		super(HammerParserBacktrace, self).__init__ ("hammer-parser-backtrace", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		parserstack = top_level_parse.peek_parserstack().p_stack
		args = gdb.string_to_argv(arg)
		if len(args) < 1:
			maxsize = len(parserstack)
		else:
			try:
				maxsize = int(args[0])
				if maxsize < 1:
					raise ValueError
			except ValueError:
				maxsize = len(parserstack)
				print("Argument must be a positive integer")

		print("[" + str(hex(top_level_parse.h_do_parse_parser.address)) + "] " + top_level_parse.h_do_parse_parser.name + " [current]") #TODO: GUI widget should reflect this
		print(" ")
		depth = min(len(parserstack), maxsize)
		if depth > 0: # if stack not empty
			# unsure what the idiomatic python is for handling negative indices starting with -1,
			# but this addition is to avoid off-by-one errors
			index = -(depth+1)
			for p in parserstack[-1:index:-1]:
				print("[" + str(hex(p.address)) + "] " + p.name) # TODO: errors in perform_lowlevel_parse, if p.name is None
			if depth < len(parserstack):
				print("[...]")

HammerParserBacktrace()

class HammerParserMemUse(gdb.Command):
	def __init__(self):
		super(HammerParserMemUse, self).__init__("hammer-parser-mem-use", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)
		if len(args) < 1:
			print("Usage: hammer-parser-mem-use <address>")
			return

		parser_addr = args[0]
		try:
			parser_addr_int = int(parser_addr, 16)
			parser_obj = top_level_parse.parser_by_address(parser_addr_int)
			if parser_obj is not None:
				print(parser_obj.bytes_used)
		except ValueError:
			print("Address needs to be a hexadecimal number")

HammerParserMemUse()

class HammerParserMemUseName(gdb.Command):
	def __init__(self):
		super(HammerParserMemUseName, self).__init__("hammer-parser-mem-use-name", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)
		if len(args) < 1:
			print("Usage: hammer-parser-mem-use-name <name>")
			return

		parser_name = args[0]
		parser_objs = top_level_parse.parsers_by_name(parser_name)
		if parser_objs is not None:
			for p in parser_objs:
				print((p.name, hex(p.address), p.bytes_used))

HammerParserMemUseName()

class HammerParserTopSingleArenaMem(gdb.Command):
	def __init__(self):
		super(HammerParserTopSingleArenaMem, self).__init__("hammer-parser-top-single-arena-mem", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)

		p = top_level_parse.get_parser_top_per_arena_mem()
		print((p.name, hex(p.address), p.bytes_used))

HammerParserTopSingleArenaMem()

class HammerParserTopTotalArenaMem(gdb.Command):
	def __init__(self):
		super(HammerParserTopTotalArenaMem, self).__init__("hammer-parser-top-total-arena-mem", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)

		p = top_level_parse.get_parser_top_total_arena_mem()
		print((p.name, hex(p.address), p.bytes_used))
		total_mem_use = p.get_arenasum()
		print("Total: " + str(total_mem_use) + " bytes")

HammerParserTopTotalArenaMem()

# TODO: average memory use, per arena and total

class HammerParserPreviewInput(gdb.Command):
	def __init__(self):
		super(HammerParserPreviewInput, self).__init__("hammer-parser-preview-input", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)

		print(top_level_parse.input_chunk)

HammerParserPreviewInput()

class HammerParserAverageMem(gdb.Command):
	def __init__(self):
		super(HammerParserAverageMem, self).__init__("hammer-parser-average-mem", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		args = gdb.string_to_argv(arg)

		mem = top_level_parse.get_avg_mem_use_per_arena()
		print("Bytes used on average in each arena:")
		print(mem)

HammerParserAverageMem()

class HammerParserCurrentEnv(gdb.Command):
	def __init__(self):
		super(HammerParserCurrentEnv, self).__init__("hammer-parser-current-env", gdb.COMMAND_OBSCURE)

	def invoke(self, arg, from_tty):
		p = top_level_parse.h_do_parse_parser
		p_env = top_level_parse.parser_decombinator.decompose_parser(p, top_level_parse) #TODO: parser -> env mapping function in top_level_parse
		print(type(p_env).__name__ + " - " + str(p_env)) # TODO: consistency with GUI

HammerParserCurrentEnv()

#TODO: move all this to BreakpointManager?
# Call when execution stopped at breakpoint in main
def locate_perform_lowlevel_parse_retq():
	arch = gdb.selected_frame().architecture()
	p_l_p_sym = gdb.lookup_symbol("perform_lowlevel_parse")[0]
	p_l_p_address = int(p_l_p_sym.value().address)
	# The choice of disassembling only 400 instructions from the start is arbitrary. (This function is 310 bytes long on this particular machine.) There is probably a way to find out where a function ends.
	instructions = arch.disassemble(p_l_p_address, p_l_p_address+400)
	results = [ ins["addr"] for ins in instructions if ins["asm"].startswith("ret") ]
	return results[0]

def locate_h_packrat_parse_retq():
	arch = gdb.selected_frame().architecture()
	h_p_p_sym = gdb.lookup_symbol("h_packrat_parse")[0]
	h_p_p_address = int(h_p_p_sym.value().address)
	# Same as with perform_lowlevel_parse, +450 is arbitrary
	instructions = arch.disassemble(h_p_p_address, h_p_p_address+450)
	results = [ ins["addr"] for ins in instructions if ins["asm"].startswith("ret") ]
	return results[0]

def locate_init_parser_retq():
	arch = gdb.selected_frame().architecture()
	i_p_sym = gdb.lookup_symbol("init_parser")[0]
	i_p_address = int(i_p_sym.value().address)
	# Same as with perform_lowlevel_parse, +16000 is arbitrary
	instructions = arch.disassemble(i_p_address, i_p_address+16000)
	results = [ ins["addr"] for ins in instructions if ins["asm"].startswith("ret") ]
	return results[0]

def locate_retqs(symbol, length):
	arch = gdb.selected_frame().architecture()
	sym = gdb.lookup_symbol(symbol)[0]
	sym_address = int(sym.value().address)
	instructions = arch.disassemble(sym_address, sym_address+length)
	results = [ ins["addr"] for ins in instructions if ins["asm"].startswith("ret") ]
	return results

def locate_retq(symbol, length):
	results = locate_retqs(symbol, length)
	return results[0]

def del_hammer_retq_breakpoints(breakpoints):
	for bp in breakpoints:
		bp.delete()

# Clean up by-address breakpoints in hammer when inferior exits.
# Caveat: Assumes there's a single inferior, the debugged parser, so no checking is done
# TODO: where to store breakpoints? TopLevelParse? A BreakpointManager class?i
def exit_handler(event):
	#breakpoints = [ perform_lowlevel_parse_ret, h_packrat_parse_ret ]
	#del_hammer_retq_breakpoints(breakpoints)
	del_hammer_retq_breakpoints(breakpoint_manager.hammer_retq_breakpoints)

gdb.events.exited.connect(exit_handler)

#TODO: regex match retq, ret, etc

# Break on main so that libhammer.so gets to load
main = PDFMainBreakpoint("main")




#init_parser = InitParserBreakpoint("pdf.c:1223")
h_do_parse = HDoParseBreakpoint("h_do_parse")
h_packrat_parse = HPackratParseBreakpoint("h_packrat_parse")
perform_lowlevel_parse = PerformLowLevelParseBreakpoint("perform_lowlevel_parse")
h_arena_malloc_raw = HArenaMallocRawBreakpoint("h_arena_malloc_raw")
# todo: investigate GDB frame filters for rendering backtraces

parse_action = ParserVirtualBreakpoint("parse_action")
parse_choice = ParserVirtualBreakpoint("parse_choice")
parse_sequence = ParserVirtualBreakpoint("parse_sequence")
parse_difference = ParserVirtualBreakpoint("parse_difference")
parse_many = ParserVirtualBreakpoint("parse_many")
parse_and = ParserVirtualBreakpoint("parse_and")
parse_attr_bool = ParserVirtualBreakpoint("parse_attr_bool")
parse_bind = ParserVirtualBreakpoint("parse_bind")
parse_bits = ParserVirtualBreakpoint("parse_bits")
parse_butnot = ParserVirtualBreakpoint("parse_butnot")
parse_charset = ParserVirtualBreakpoint("parse_charset")
parse_ch = ParserVirtualBreakpoint("parse_ch")
parse_end = ParserVirtualBreakpoint("parse_end")
parse_endianness = ParserVirtualBreakpoint("parse_endianness")
parse_epsilon = ParserVirtualBreakpoint("parse_epsilon")
parse_ignore = ParserVirtualBreakpoint("parse_ignore")
parse_ignoreseq = ParserVirtualBreakpoint("parse_ignoreseq")
parse_indirect = ParserVirtualBreakpoint("parse_indirect")
parse_int_range = ParserVirtualBreakpoint("parse_int_range")
parse_not = ParserVirtualBreakpoint("parse_not")
parse_nothing = ParserVirtualBreakpoint("parse_nothing")
parse_optional = ParserVirtualBreakpoint("parse_optional")
parse_permutation = ParserVirtualBreakpoint("parse_permutation")
parse_skip = ParserVirtualBreakpoint("parse_skip")
parse_seek = ParserVirtualBreakpoint("parse_seek")
parse_tell = ParserVirtualBreakpoint("parse_tell")
parse_token = ParserVirtualBreakpoint("parse_token")
parse_unimplemented = ParserVirtualBreakpoint("parse_unimplemented")
parse_put = ParserVirtualBreakpoint("parse_put")
parse_get = ParserVirtualBreakpoint("parse_get")
parse_whitespace = ParserVirtualBreakpoint("parse_whitespace")
parse_xor = ParserVirtualBreakpoint("parse_xor")

# Commandline:
# $ gdb -ex "source /path/to/parser-name-instrumentation-gdb.py" --args /path/to/pdf /path/to/input.pdf

# run until main
gdb.execute("run")

#plp_retq = locate_perform_lowlevel_parse_retq()
#perform_lowlevel_parse_ret = PerformLowLevelParseRetBreakpoint("*" + hex(plp_retq))
#hpp_retq = locate_h_packrat_parse_retq()
#h_packrat_parse_ret = HPackratParseRetBreakpoint("*" + hex(hpp_retq))
i_p_retq = locate_init_parser_retq()
init_parser = InitParserBreakpoint("*" + hex(i_p_retq))
#hammer_retq_breakpoints = [perform_lowlevel_parse_ret, h_packrat_parse_ret]

#h_rule_breakpoints = {}

#for func in H_RULE_FUNCTIONS:
#	func_retq = locate_retq(func[0], func[1])
#	h_rule_breakpoints[func] = HRuleBreakpoint("*" + hex(func_retq))
breakpoint_manager.set_h_rule_breakpoints()


# TODO: the RET breakpoints in hammer break when "run" is executed again. figure out a way to automatically replace these

# Run until stop position, if set. Finish parsing otherwise
gdb.execute("continue")

print([(p.name, hex(p.address), p.bytes_used) for p in top_level_parse.parser_objs.values()])

# Approach 2: capture process trace with gdb, load the trace, execute stack commands on breakpoint hit, etc