From 70dfbebb3acbc71a3d7829bf437e182aea5ff2d5 Mon Sep 17 00:00:00 2001
From: pompolic <pompolic@special-circumstanc.es>
Date: Tue, 6 Jul 2021 23:31:09 +0200
Subject: [PATCH] WIP commit

Dependencies between script files are kind of a mess at the moment
---
 gdb-port/gui.py                             |  6 ++++--
 gdb-port/parser-name-instrumentation-gdb.py | 12 +++++++++++-
 gdb-port/parser-type-instrumentation-gdb.py | 13 +++++++++----
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/gdb-port/gui.py b/gdb-port/gui.py
index 34089ef..429621f 100644
--- a/gdb-port/gui.py
+++ b/gdb-port/gui.py
@@ -46,6 +46,7 @@ class PresentationLayer():
 		#self.set_input_chunk(top_level_parse.get_input_chunk())
 
 		self.parser_env = StringVar()
+		self.parser_env.set(top_level_parse.get_parser_env())
 
 		#"Current parser" widgets
 		ttk.Label(self.frame, text="Current parser").grid(column=1, row=1)
@@ -59,8 +60,8 @@ class PresentationLayer():
 		# ParserHierarchy widget (sequence members, parsers this one contains, etc)
 		# Showing parser decisions is probably possible by capturing the return value of h_do_parse
 		ttk.Label(self.frame, text="Parser hierarchy").grid(column=2, row=3)
-		#ttk.Label(self.frame, textvariable=self.parser_env).grid(column=2, row=4)
-		ttk.Label(self.frame, text="[Placeholder, Placeholder, Placeholder]").grid(column=2, row=4)
+		ttk.Label(self.frame, textvariable=self.parser_env).grid(column=2, row=4)
+		#ttk.Label(self.frame, text="[Placeholder, Placeholder, Placeholder]").grid(column=2, row=4)
 
 		ttk.Button(self.frame, text="Step", command=step).grid(column=1, row=4)
 
@@ -71,6 +72,7 @@ def step(*args):
 	gdb.execute("hammer-parse-step 1")
 	presentation_layer.set_top_parser(top_level_parse.peek_parser())
 	presentation_layer.set_input_chunk(top_level_parse.get_input_chunk())
+	presentation_layer.set_parser_env(top_level_parse.get_parser_env())
 
 if top_level_parse is None:
 	print("Please import parser name script")
diff --git a/gdb-port/parser-name-instrumentation-gdb.py b/gdb-port/parser-name-instrumentation-gdb.py
index c612002..6d55b62 100644
--- a/gdb-port/parser-name-instrumentation-gdb.py
+++ b/gdb-port/parser-name-instrumentation-gdb.py
@@ -121,6 +121,7 @@ class TopLevelParse:
 		self.unclaimed_mem_use = 0
 		# Holds 32 characters starting at state->input_stream[index], used by the GUI
 		self.current_input_chunk = ''
+		self.current_parser_env = ''
 
 	# Called from h_packrat_parse()'s handler, where the parse state and arena get initialized
 	def enter_h_packrat_parse(self, parser):
@@ -159,6 +160,9 @@ class TopLevelParse:
 
 		parser_stack = self.peek_parserstack()
 		parser_stack.push(parser_obj)
+		if parser_decombinator:
+			p_env = parser_decombinator.decompose_parser(parser_obj, self)
+			self.set_parser_env(str(p_env))
 		return parser_obj
 
 	def return_from_perform_lowlevel_parse(self):
@@ -223,6 +227,12 @@ class TopLevelParse:
 	def get_input_chunk(self):
 		return self.input_chunk
 
+	def set_parser_env(self, parser_env):
+		self.current_parser_env = parser_env
+
+	def get_parser_env(self):
+		return self.current_parser_env
+
 	def add_or_get_parser(self, parser_addr):
 		try:
 			parser_obj = self.parser_objs[int(parser_addr)]
@@ -275,8 +285,8 @@ class HDoParseBreakpoint(gdb.Breakpoint):
 		top_level_parse.enter_h_do_parse(state, None, parser)
 
 		input_chunk = input_ptr + index
-		top_level_parse.set_input_chunk(input_chunk.string('UTF-8','replace',32))
 		#print(input_chunk.string('ascii','backslashreplace',10))
+		top_level_parse.set_input_chunk(input_chunk.string('UTF-8','replace',32))
 
 
 		# Check if we need to stop after a number of steps
diff --git a/gdb-port/parser-type-instrumentation-gdb.py b/gdb-port/parser-type-instrumentation-gdb.py
index fd46ec3..59c4b26 100644
--- a/gdb-port/parser-type-instrumentation-gdb.py
+++ b/gdb-port/parser-type-instrumentation-gdb.py
@@ -152,7 +152,7 @@ class AndEnv(HParserEnv):
 		self.member_parser = parser_obj
 
 	def __str__(self):
-		return str([str(self.member_parser)]))
+		return str([str(self.member_parser)])
 
 class ChEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
@@ -270,7 +270,7 @@ class IgnoreSeqEnv(HParserEnv):
 			self.member_parsers.append(parser_obj)
 
 	def __str__(self):
-		return str([str(self.which), str([str(p) for p in self.member_parsers]))
+		return str([str(self.which), str([str(p) for p in self.member_parsers])])
 
 class IndirectEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
@@ -515,7 +515,7 @@ vtable_to_env = {
 	'charset_vt': CharsetEnv,
 	'choice_vt': ChoiceEnv,
 	'difference_vt': DifferenceEnv,
-	'end_vt', EndEnv,
+	'end_vt': EndEnv,
 	'endianness_vt': EndiannessEnv,
 	'epsilon_vt': EpsilonEnv,
 	'ignore_vt': IgnoreEnv,
@@ -540,6 +540,8 @@ vtable_to_env = {
 	'xor_vt': XorEnv
 	}
 
+# TODO: runtime address -> env class map
+
 # When given a Parser object, decompose_parser() deduces its type from the vtable, and returns the appropriate HParserEnv subclass, containing member parsers and args
 # Not sure what to name it. Alternatives:
 # ParserDisassembler?
@@ -556,9 +558,12 @@ class ParserDecombinator:
 		parser_addr = parser.address
 		vtable_p = gdb.parse_and_eval("((HParser*) " + str(parser_addr) + ")->vtable")
 		try:
-			envClass = vtable_to_env[vtable_p.name]
+			vtable_sym = vt_types.lookup_by_address(vtable_p)
+			envClass = vtable_to_env[vtable_sym.name]
 		except KeyError:
 			print("Unknown vtable: " + str(vtable_p))
 			return None
 
 		return envClass(parser, top_level_parse)
+
+parser_decombinator = ParserDecombinator()
-- 
GitLab