From 5f1ec2d5081934e50df55c277ec9d7ca50b65ba9 Mon Sep 17 00:00:00 2001
From: pompolic <pompolic@special-circumstanc.es>
Date: Thu, 11 May 2023 23:04:39 +0200
Subject: [PATCH] WIP commit

- progress towards gathering up allocations, then distributing them across parsers in TopLevelParse
---
 gdb-port/parser.py          | 30 +++++++++++++++++++++++++-----
 gdb-port/top-level-parse.py | 12 ++++++++++++
 2 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/gdb-port/parser.py b/gdb-port/parser.py
index 5cc5cd5..034f782 100644
--- a/gdb-port/parser.py
+++ b/gdb-port/parser.py
@@ -89,6 +89,7 @@ class ParserStack:
 		self.p_stack = []
 		self.unclaimed_mem_use = 0
 		self.stack_events = []
+		self.pending_allocs = {}
 		# Represents the index of a "waterline" on the stack_events for the purpose of counting allocations
 		# This is the index of the last event (stack push or pop) where the relevant allocations have been committed to TopLevelParse's memory stats.
 		# Due to the semantics of stack events, this means that if self.committed points to a push event, nothing in that "frame" has been committed yet. if it points to a pop event, everything in that "frame" has been committed
@@ -144,7 +145,9 @@ class ParserStack:
 			# Additionally, it would probably make more sense to store it at the ParserStack, storing (frame, bytes) pairs
 			# TODO: where should commit_at_push() be called? HDoParseBreakpoint determines whether to stop, which ParserStack doesn't know about. could be a bool parameter to push()
 			if should_commit:
-				self.commit_at_push()
+				allocs = self.commit_at_push()
+				#for addr, alloc in allocs.items():
+				#	self.pending_allocs[addr] = self.pending_allocs.get(addr, 0) + alloc
 
 	def pop(self):
 		parser_obj = self.peek()
@@ -156,6 +159,9 @@ class ParserStack:
 			self.stack_events.append((StackEvent.POP, int(self.parse_state_gdbval['arena']['arena_malloc_bytes']), parser_obj))
 
 			allocs = self.commit_at_pop()
+			# TODO: where do we commit unclaimed allocs in a parserstack?
+			#for addr, alloc in allocs.items():
+			#	self.pending_allocs[addr] = self.pending_allocs.get(addr, 0) + alloc
 			#print("adding mem use: parser:", str(parser_obj), "arena:", hex(int(self.arena)), "bytes:", allocated_bytes) # DEBUG
 			#print("adding mem use (alternate): parser:", str(parser_obj), "arena:", hex(int(self.arena)), "bytes:", allocs) # DEBUG
 			#parser_obj.add_mem_use(int(self.arena), allocs)
@@ -269,9 +275,13 @@ class ParserStack:
 		#print("commit_at_pop: current_frame_alloc", current_frame_alloc) # DEBUG
 		pop_allocs = {}
 		parser_cache = {}
+		ev_list_length = len(ev_list)
 
 		# TODO: clean up
-		if len(ev_list) == 1:
+		# TODO: ideas for getting better performance: try to see if we can just commit allocations when execution actually pauses
+		# TODO: maybe numpy could help
+
+		if ev_list_length == 1:
 			# Compare bytes allocated in arena to last known value, which will be the additional bytes allocated since reentering the stack frame
 			#alloc_size = self.stack_events[ev_start][1] - current_event[1]
 			#current_event[2].add_mem_use(int(self.arena), current_frame_alloc)
@@ -279,16 +289,21 @@ class ParserStack:
 		#	print("commit_at_pop: pop_allocs:", { current_event[2].address: current_frame_alloc }) # DEBUG
 			pop_allocs[current_event[2].address] = pop_allocs.get(current_event[2].address, 0) + current_frame_alloc
 			parser_cache[current_event[2].address] = current_event[2]
+			current_event[2].add_mem_use(int(self.arena), current_frame_alloc)
+			profiler.disable() # DEBUG
+			return pop_allocs
 		#	return { current_event[2].address: current_frame_alloc }
-		elif len(ev_list) > 1:
+		elif ev_list_length > 1:
 			#bytes_list = [ev[1] for ev in ev_list]
+			bigger = ev_list[1:]
+			smaller = ev_list[:-1]
 
 			#differences = map(lambda smaller, bigger: bigger-smaller, bytes_list[:-1], bytes_list[1:])
-			differences = [bigger[1]-smaller[1] for bigger, smaller in zip(ev_list[1:], ev_list[:-1])]
+			#differences = [bigger[1]-smaller[1] for bigger, smaller in zip(ev_list[1:], ev_list[:-1])]
 			# TODO: this could be a list comprehension too
 			for index, event in enumerate(ev_list[:-1]):
 				parser_cache[int(event[2].address)] = event[2]
-				alloc = differences[index]
+				alloc = bigger[index][1] - smaller[index][1]
 				pop_allocs[event[2].address] = pop_allocs.get(event[2].address, 0) + alloc
 				#alloc = next(differences)
 				#print("commit_at_pop: alloc:", alloc) # DEBUG
@@ -338,3 +353,8 @@ class ParserStack:
 	# checking self.committed will show if all the relevant allocations before revisiting the current stack frame have been committed.
 	# - self.stack_events[self.committed] == current_event
 
+	def get_pending_allocations()
+		return self.pending_allocs
+
+	def clear_pending_allocations()
+		self.pending_allocs = {}
diff --git a/gdb-port/top-level-parse.py b/gdb-port/top-level-parse.py
index 80e0fa5..8b3dd9e 100644
--- a/gdb-port/top-level-parse.py
+++ b/gdb-port/top-level-parse.py
@@ -13,6 +13,7 @@ class TopLevelParse:
 		self.vt_types = None
 		self.parser_decombinator = None
 		self.debug_stop = False # DEBUG
+		self.pending_allocs = {}
 
 		# Counts stack pushes/pops to determine if stopping is needed for hammer-parse-after-apply
 		self.ast_stack_index = None
@@ -266,4 +267,15 @@ class TopLevelParse:
 
 	# TODO: get_avg_mem_use_all_arenas, get_total_mem_use
 
+	def gather_allocations_in_parser_stacks():
+		pending_allocs_by_arena = {}
+
+		for ps in self.parser_stacks:
+			allocs = ps.get_pending_allocations()
+			#pending_allocs_by_arena[ps.arena] = { ps.arena : ps.get_pending_allocations() }
+
+		# TODO: allocations are grouped by arena, then parser address. we get the arena address from the parser stack, which may be shared between parser stacks
+		# Therefore, it can't just be blindly assigned to a key with the arena's address. the two dicts need to be merged, with the values of duplicate entries added together
+		# However, the problem is that doing this naively would require two nested for loops minimum
+
 top_level_parse = TopLevelParse()
-- 
GitLab