From 8d1d5a8bbd7240490113a9900799f1ffbffe2afe Mon Sep 17 00:00:00 2001
From: pompolic <pompolic@special-circumstanc.es>
Date: Mon, 5 Jul 2021 20:08:01 +0200
Subject: [PATCH] Env classes for all known parser types

---
 gdb-port/parser-type-instrumentation-gdb.py | 373 +++++++++++++++++---
 1 file changed, 323 insertions(+), 50 deletions(-)

diff --git a/gdb-port/parser-type-instrumentation-gdb.py b/gdb-port/parser-type-instrumentation-gdb.py
index 62e6310..e234409 100644
--- a/gdb-port/parser-type-instrumentation-gdb.py
+++ b/gdb-port/parser-type-instrumentation-gdb.py
@@ -15,6 +15,7 @@ parser_name_defaults = {
 	'difference_vt': '(Unnamed difference)',
 	'end_vt': '(Unnamed end)',
 	'endianness_vt': '(Unnamed endianness)',
+	'epsilon_vt': '(Unnamed epsilon)',
 	'ignore_vt': '(Unnamed ignore)',
 	'ignoreseq_vt': '(Unnamed ignoreseq)',
 	'indirect_vt': '(Unnamed indirect)',
@@ -84,69 +85,46 @@ class AttrBoolEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
 		self.parser = parser
 
-		member_parser_p =  gdb.parse_and_eval("((HAttrBool *) parser->env)->p")
+		member_parser_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->p")
 		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
-		self.predicate_p =  gdb.parse_and_eval("((HAttrBool *) parser->env)->pred")
-		self.user_data_p =  gdb.parse_and_eval("((HAttrBool *) parser->env)->user_data")
-	
+		self.predicate_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->pred")
+		self.user_data_p = gdb.parse_and_eval("((HAttrBool *) parser->env)->user_data")
+
 	def __str__(self):
 		return str(self.member_parser)
 
-class BitsEnv(HParserEnv):
-	def __init__(self, parser, top_level_parse):
-		self.parser = parser
-
-		self.length =  gdb.parse_and_eval("((HBitsEnv *) parser->env)->length")
-		self.signedp =  gdb.parse_and_eval("((HBitsEnv *) parser->env)->signedp")
-
-class SequenceEnv(HParserEnv):
+class BindEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
-		super().__init__(parser, top_level_parse) # TODO: maybe move self.parser to base class. otherwise, is this needed?
 		self.parser = parser
-		self.member_parsers = []
-
-		#frame = gdb.selected_frame()
-		h_sequence_p = gdb.parse_and_eval("(HSequence*) parser->env")
-		num_parsers = gdb.parse_and_eval("((HSequence *) parser->env)->len")
-		# TODO: should GDB do the array indexing operation, or should the Python code?
-		# TODO: top_level_parse.create_or_get_parser(address)
-		#[gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") for index in range(0, num_parsers)]
-		# TODO: maybe the loop can be replaced with a list comprehension
-		#[top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)]
 
-		for index in range(0, num_parsers):
-			parser_p = gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]")
-			parser_obj = top_level_parse.add_or_get_parser(parser_p)
-			if parser_obj.name is None:
-				parser_obj.name_parser( self.name_from_vtable(parser_obj) )
-			self.member_parsers.append(parser_obj)
+		member_parser_p = gdb.parse_and_eval("((HBindEnv *) parser->env)->p")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		continuation_p = gdb.parse_and_eval("((HBindEnv *) parser->env)->k")
+		self.continuation = int(continuation_p)
+		env_p = gdb.parse_and_eval("((HBindEnv *) parser->env)->env")
+		self.env = int(env_p)
 
 	def __str__(self):
-		return str([str(parser) for parser in self.member_parsers])
+		return str([str(self.member_parser), str(hex(self.continuation)), str(hex(self.env))])
 
-
-class IgnoreEnv:
+class BitsEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
 		self.parser = parser
-		self.member_parser_pointers = []
 
-		ignored_p = gdb.parse_and_eval("(HParser*) parser->env")
+		self.length =  gdb.parse_and_eval("((HBitsEnv *) parser->env)->length")
+		self.signedp =  gdb.parse_and_eval("((HBitsEnv *) parser->env)->signedp")
 
-		# Ideally we could look up ignored_p using top_level_parse, but it will not find a result if perform_lowlevel_parse(ignored_p) wasn't called yet
-		#TODO: method to add parser in TopLevelParse
+class ButNotEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
 
-		'''
-		try:
-			parser_obj = top_level_parse.parser_objs[ignored_p]
-		except KeyError:
-			# Create a parser object with no name and the address of the parser
-			parser_obj = Parser(None, int(ignored_p))
-			top_level_parse.parser_objs[int(ignored_p)] = parser_obj
+		p1_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p1")
+		self.p1 = top_level_parse.add_or_get_parser(p1_p)
+		p2_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p2")
+		self.p2 = top_level_parse.add_or_get_parser(p2_p)
 
-		self.member_parsers.append(parser_obj)
-		'''
-		ignore_obj = top_level_parse.add_or_get_parser(ignored_p)
-		self.member_parsers.append(ignore_p)
+	def __str__(self):
+		return str([str(self.p1), str(self.p2)])
 
 class ActionEnv:
 	def __init__(self, parser, top_level_parse):
@@ -180,6 +158,16 @@ class ChEnv(HParserEnv):
 	def __str__(self):
 		return str(self.ch_value)
 
+class CharsetEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		self.charset_arg = gdb.parse_and_eval("(HCharset) parser->env")
+
+	def __str__(self):
+		return str(self.charset_arg)
+
+# TODO: numeric parameter passed to parse_and_eval
 class ChoiceEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
 		super().__init__(parser, top_level_parse)
@@ -205,6 +193,119 @@ class ChoiceEnv(HParserEnv):
 	def __str__(self):
 		return str([str(parser) for parser in self.member_parsers])
 
+class DifferenceEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		p1_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p1")
+		self.p1 = top_level_parse.add_or_get_parser(p1_p)
+		p2_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p2")
+		self.p2 = top_level_parse.add_or_get_parser(p2_p)
+
+	def __str__(self):
+		return str([str(self.p1), str(self.p2)])
+
+class EndEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+	def __str__(self):
+		return "[]"
+
+class EndiannessEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("((HParseEndianness *) parser->env)->p")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		endianness_val = gdb.parse_and_eval("((HParseEndianness *) parser->env)->endianness")
+		self.endianness = int(endianness_val)
+
+	def __str__(self):
+		return str([str(self.member_parser), str(self.endianness)])
+
+class EpsilonEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+	def __str__(self):
+		return "[]"
+
+class IgnoreEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		ignored_p = gdb.parse_and_eval("(HParser*) parser->env")
+
+		ignore_obj = top_level_parse.add_or_get_parser(ignored_p)
+		self.member_parser = ignore_obj
+
+	def __str__(self):
+		return str([str(self.member_parser)])
+
+# TODO: numeric parameter passed to parse_and_eval
+class IgnoreSeqEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+		self.member_parsers = []
+
+		ignoreseq_p = gdb.parse_and_eval("(HIgnoreSeq*) parser->env")
+		seq_len = gdb.parse_and_eval("((HIgnoreSeq*) parser->env)->len")
+		which = gdb.parse_and_eval("((HIgnoreSeq*) parser->env)->which")
+		self.which = int(which)
+
+		for index in range(0, seq_len):
+			parser_p = gdb.parse_and_eval("((HIgnoreSeq*) parser->env)->parsers[" + str(index) + "]")
+			parser_obj = top_level_parse.add_or_get_parser(parser_p)
+			# TODO: name parsers on add?
+			# TODO: move updates to parsers to TopLevelParse?
+			if parser_obj.name is None:
+				parser_obj.name_parser( self.name_from_vtable(parser_obj) )
+			self.member_parsers.append(parser_obj)
+
+	def __str__(self):
+		return str([str(self.which), str([str(p) for p in self.member_parsers]))
+
+class IndirectEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("((HIndirectEnv*) parser->env)->parser")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		touched = gdb.parse_and_eval("((HIndirectEnv*) parser->env)->touched")
+		self.touched = bool(touched)
+
+	def __str__(self):
+		return str([str(self.member_parser), str(self.touched)])
+
+class IntRangeEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("((HRange*) parser->env)->p")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		lower_val = gdb.parse_and_eval("((HRange*) parser->env)->lower")
+		self.lower = int(lower_val)
+		upper_val = gdb.parse_and_eval("((HRange*) parser->env)->upper")
+		self.upper = int(upper_val)
+
+
+	def __str__(self):
+		return str([str(self.member_parser), str(self.lower), str(self.upper)])
+
+class LengthValueEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		length_parser_p = gdb.parse_and_eval("((HLenVal *) parser->env)->length")
+		self.length_parser = top_level_parse.add_or_get_parser(length_parser_p)
+
+		value_parser_p = gdb.parse_and_eval("((HLenVal *) parser->env)->value")
+		self.value_parser = top_level_parse.add_or_get_parser(value_parser_p)
+
+	def __str__(self):
+		return str([str(self.length_parser), str(self.value_parser)])
+
 class ManyEnv(HParserEnv):
 	def __init__(self, parser, top_level_parse):
 		self.parser = parser
@@ -247,18 +348,190 @@ class OptionalEnv(HParserEnv):
 	def __str__(self):
 		return str(self.member_parser)
 
+# TODO: numeric parameter passed to parse_and_eval
+class PermutationEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		super().__init__(parser, top_level_parse) # TODO: maybe move self.parser to base class. otherwise, is this needed?
+		self.parser = parser
+		self.member_parsers = []
+
+		#frame = gdb.selected_frame()
+		h_sequence_p = gdb.parse_and_eval("(HSequence*) parser->env")
+		num_parsers = gdb.parse_and_eval("((HSequence *) parser->env)->len")
+		# TODO: should GDB do the array indexing operation, or should the Python code?
+		# TODO: top_level_parse.create_or_get_parser(address)
+		#[gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") for index in range(0, num_parsers)]
+		# TODO: maybe the loop can be replaced with a list comprehension
+		#[top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)]
+
+		for index in range(0, num_parsers):
+			parser_p = gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]")
+			parser_obj = top_level_parse.add_or_get_parser(parser_p)
+			if parser_obj.name is None:
+				parser_obj.name_parser( self.name_from_vtable(parser_obj) )
+			self.member_parsers.append(parser_obj)
+
+	def __str__(self):
+		return str([str(parser) for parser in self.member_parsers])
+
+class SeekEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		offset_val = gdb.parse_and_eval("((HSeek *) parser->env)->offset")
+		self.offset = int(offset_val)
+		whence_val = gdb.parse_and_eval("((HSeek *) parser->env)->whence")
+		self.whence = int(whence_val)
+		#TODO: enum for whence?
+
+	def __str__(self, parser, top_level_parse):
+		return str([str(self.offset), str(self.whence)])
+
+class SkipEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		skip_val = gdb.parse_and_eval("((uintptr_t) parser->env)")
+		self.skip = int(skip_val)
+
+	def __str__(self):
+		return str(self.skip)
+
+class TellEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+	def __str__(self):
+		return "[]"
+
+# TODO: numeric parameter passed to parse_and_eval
+class SequenceEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		super().__init__(parser, top_level_parse) # TODO: maybe move self.parser to base class. otherwise, is this needed?
+		self.parser = parser
+		self.member_parsers = []
+
+		#frame = gdb.selected_frame()
+		h_sequence_p = gdb.parse_and_eval("(HSequence*) parser->env")
+		num_parsers = gdb.parse_and_eval("((HSequence *) parser->env)->len")
+		# TODO: should GDB do the array indexing operation, or should the Python code?
+		# TODO: top_level_parse.create_or_get_parser(address)
+		#[gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]") for index in range(0, num_parsers)]
+		# TODO: maybe the loop can be replaced with a list comprehension
+		#[top_level_parse.add_or_get_parser(gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) +"]")) for index in range(0, num_parsers)]
+
+		for index in range(0, num_parsers):
+			parser_p = gdb.parse_and_eval("((HSequence*) parser->env)->p_array[" + str(index) + "]")
+			parser_obj = top_level_parse.add_or_get_parser(parser_p)
+			if parser_obj.name is None:
+				parser_obj.name_parser( self.name_from_vtable(parser_obj) )
+			self.member_parsers.append(parser_obj)
+
+	def __str__(self):
+		return str([str(parser) for parser in self.member_parsers])
+
+class TokenEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		str_p = gdb.parse_and_eval("((HToken *) parser->env)->str")
+		self.token = str_p
+		str_len = gdb.parse_and_eval("((HToken *) parser->env)->len")
+		self.str_len = int(str_len)
+		#TODO: get str_len bytes from memory and save it
+
+	def __str__(self):
+		return str(hex(self.token))
+
+class UnimplemenetedEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+	def __str__(self):
+		return "[]"
+
+# TODO: print key as string if applicable
+class GetEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("((HStoredValue *) parse->env)->p")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		key_p = gdb.parse_and_eval("((HStoredValue *) parse->env)->key")
+		self.key = int(key_p)
+
+	def __str__(self):
+		return str([str(self.member_parser), str(hex(self.key))])
+
+class PutEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("((HStoredValue *) parse->env)->p")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+		key_p = gdb.parse_and_eval("((HStoredValue *) parse->env)->key")
+		self.key = int(key_p)
+
+	def __str__(self):
+		return str([str(self.member_parser), str(hex(self.key))])
+
+class WhitespaceEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		member_parser_p = gdb.parse_and_eval("(HParser *) parse->env")
+		self.member_parser = top_level_parse.add_or_get_parser(member_parser_p)
+
+	def __str__(self):
+		return str([str(self.member_parser)])
+
+class XorEnv(HParserEnv):
+	def __init__(self, parser, top_level_parse):
+		self.parser = parser
+
+		p1_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p1")
+		self.p1 = top_level_parse.add_or_get_parser(p1_p)
+		p2_p = gdb.parse_and_eval("((HTwoParsers *) parser->env)->p2")
+		self.p2 = top_level_parse.add_or_get_parser(p2_p)
+
+	def __str__(self):
+		return str([str(self.p1), str(self.p2)])
+
 
 vtable_to_env = {
-	'sequence_vt': SequenceEnv,
-	'ignore_vt': IgnoreEnv,
 	'action_vt': ActionEnv,
 	'and_vt': AndEnv,
 	'attr_bool_vt': AttrBoolEnv,
+	'bind_vt': BindEnv,
+	'bits_vt': BitsEnv,
+	'butnot_vt': ButNotEnv,
 	'ch_vt': ChEnv,
+	'charset_vt': CharsetEnv,
+	'choice_vt': ChoiceEnv,
+	'difference_vt': DifferenceEnv,
+	'end_vt', EndEnv,
+	'endianness_vt': EndiannessEnv,
+	'epsilon_vt': EpsilonEnv,
+	'ignore_vt': IgnoreEnv,
+	'ignoreseq_vt': IgnoreSeqEnv,
+	'indirect_vt': IndirectEnv,
+	'int_range_vt': IntRangeEnv,
+	'length_value_vt': LengthValueEnv,
 	'many_vt': ManyEnv,
 	'not_vt': NotEnv,
 	'nothing_vt': NothingEnv,
-	'optional_vt': OptionalEnv
+	'optional_vt': OptionalEnv,
+	'permutation_vt': PermutationEnv,
+	'seek_vt': SeekEnv,
+	'skip_vt': SkipEnv,
+	'tell_vt': TellEnv,
+	'sequence_vt': SequenceEnv,
+	'token_vt': TokenEnv,
+	'unimplemented_vt': UnimplemenetedEnv,
+	'get_vt': GetEnv,
+	'put_vt': PutEnv,
+	'whitespace_vt': WhitespaceEnv,
+	'xor_vt': XorEnv
 	}
 
 # When given a Parser object, decompose_parser() deduces its type from the vtable, and returns the appropriate HParserEnv subclass, containing member parsers and args
-- 
GitLab