From 5f1ec2d5081934e50df55c277ec9d7ca50b65ba9 Mon Sep 17 00:00:00 2001 From: pompolic <pompolic@special-circumstanc.es> Date: Thu, 11 May 2023 23:04:39 +0200 Subject: [PATCH] WIP commit - progress towards gathering up allocations, then distributing them across parsers in TopLevelParse --- gdb-port/parser.py | 30 +++++++++++++++++++++++++----- gdb-port/top-level-parse.py | 12 ++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/gdb-port/parser.py b/gdb-port/parser.py index 5cc5cd5..034f782 100644 --- a/gdb-port/parser.py +++ b/gdb-port/parser.py @@ -89,6 +89,7 @@ class ParserStack: self.p_stack = [] self.unclaimed_mem_use = 0 self.stack_events = [] + self.pending_allocs = {} # Represents the index of a "waterline" on the stack_events for the purpose of counting allocations # This is the index of the last event (stack push or pop) where the relevant allocations have been committed to TopLevelParse's memory stats. # Due to the semantics of stack events, this means that if self.committed points to a push event, nothing in that "frame" has been committed yet. if it points to a pop event, everything in that "frame" has been committed @@ -144,7 +145,9 @@ class ParserStack: # Additionally, it would probably make more sense to store it at the ParserStack, storing (frame, bytes) pairs # TODO: where should commit_at_push() be called? HDoParseBreakpoint determines whether to stop, which ParserStack doesn't know about. could be a bool parameter to push() if should_commit: - self.commit_at_push() + allocs = self.commit_at_push() + #for addr, alloc in allocs.items(): + # self.pending_allocs[addr] = self.pending_allocs.get(addr, 0) + alloc def pop(self): parser_obj = self.peek() @@ -156,6 +159,9 @@ class ParserStack: self.stack_events.append((StackEvent.POP, int(self.parse_state_gdbval['arena']['arena_malloc_bytes']), parser_obj)) allocs = self.commit_at_pop() + # TODO: where do we commit unclaimed allocs in a parserstack? + #for addr, alloc in allocs.items(): + # self.pending_allocs[addr] = self.pending_allocs.get(addr, 0) + alloc #print("adding mem use: parser:", str(parser_obj), "arena:", hex(int(self.arena)), "bytes:", allocated_bytes) # DEBUG #print("adding mem use (alternate): parser:", str(parser_obj), "arena:", hex(int(self.arena)), "bytes:", allocs) # DEBUG #parser_obj.add_mem_use(int(self.arena), allocs) @@ -269,9 +275,13 @@ class ParserStack: #print("commit_at_pop: current_frame_alloc", current_frame_alloc) # DEBUG pop_allocs = {} parser_cache = {} + ev_list_length = len(ev_list) # TODO: clean up - if len(ev_list) == 1: + # TODO: ideas for getting better performance: try to see if we can just commit allocations when execution actually pauses + # TODO: maybe numpy could help + + if ev_list_length == 1: # Compare bytes allocated in arena to last known value, which will be the additional bytes allocated since reentering the stack frame #alloc_size = self.stack_events[ev_start][1] - current_event[1] #current_event[2].add_mem_use(int(self.arena), current_frame_alloc) @@ -279,16 +289,21 @@ class ParserStack: # print("commit_at_pop: pop_allocs:", { current_event[2].address: current_frame_alloc }) # DEBUG pop_allocs[current_event[2].address] = pop_allocs.get(current_event[2].address, 0) + current_frame_alloc parser_cache[current_event[2].address] = current_event[2] + current_event[2].add_mem_use(int(self.arena), current_frame_alloc) + profiler.disable() # DEBUG + return pop_allocs # return { current_event[2].address: current_frame_alloc } - elif len(ev_list) > 1: + elif ev_list_length > 1: #bytes_list = [ev[1] for ev in ev_list] + bigger = ev_list[1:] + smaller = ev_list[:-1] #differences = map(lambda smaller, bigger: bigger-smaller, bytes_list[:-1], bytes_list[1:]) - differences = [bigger[1]-smaller[1] for bigger, smaller in zip(ev_list[1:], ev_list[:-1])] + #differences = [bigger[1]-smaller[1] for bigger, smaller in zip(ev_list[1:], ev_list[:-1])] # TODO: this could be a list comprehension too for index, event in enumerate(ev_list[:-1]): parser_cache[int(event[2].address)] = event[2] - alloc = differences[index] + alloc = bigger[index][1] - smaller[index][1] pop_allocs[event[2].address] = pop_allocs.get(event[2].address, 0) + alloc #alloc = next(differences) #print("commit_at_pop: alloc:", alloc) # DEBUG @@ -338,3 +353,8 @@ class ParserStack: # checking self.committed will show if all the relevant allocations before revisiting the current stack frame have been committed. # - self.stack_events[self.committed] == current_event + def get_pending_allocations() + return self.pending_allocs + + def clear_pending_allocations() + self.pending_allocs = {} diff --git a/gdb-port/top-level-parse.py b/gdb-port/top-level-parse.py index 80e0fa5..8b3dd9e 100644 --- a/gdb-port/top-level-parse.py +++ b/gdb-port/top-level-parse.py @@ -13,6 +13,7 @@ class TopLevelParse: self.vt_types = None self.parser_decombinator = None self.debug_stop = False # DEBUG + self.pending_allocs = {} # Counts stack pushes/pops to determine if stopping is needed for hammer-parse-after-apply self.ast_stack_index = None @@ -266,4 +267,15 @@ class TopLevelParse: # TODO: get_avg_mem_use_all_arenas, get_total_mem_use + def gather_allocations_in_parser_stacks(): + pending_allocs_by_arena = {} + + for ps in self.parser_stacks: + allocs = ps.get_pending_allocations() + #pending_allocs_by_arena[ps.arena] = { ps.arena : ps.get_pending_allocations() } + + # TODO: allocations are grouped by arena, then parser address. we get the arena address from the parser stack, which may be shared between parser stacks + # Therefore, it can't just be blindly assigned to a key with the arena's address. the two dicts need to be merged, with the values of duplicate entries added together + # However, the problem is that doing this naively would require two nested for loops minimum + top_level_parse = TopLevelParse() -- GitLab