parser-name-instrumentation.py

# perf script event handlers, generated by perf script -g python
# Licensed under the terms of the GNU GPL License version 2

# The common_* event handler fields are the most useful fields common to
# all events.  They don't necessarily correspond to the 'common_*' fields
# in the format files.  Those fields not available as handler params can
# be retrieved using Python functions of the form common_*(context).
# See the perf-script-python Documentation for the list of available functions.

# TODO: handlers for filters + postordinate parser fails to get named

from __future__ import print_function

import os
import sys

sys.path.append(os.environ['PERF_EXEC_PATH'] + \
	'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')

from perf_trace_context import *
from Core import *

from collections import defaultdict

# NOTE: Parsers are uniquely identified by their address in memory
# The memory use is further split up along different arenas

class Parser:
	#TODO: remove
	_parser_names = {}

	def __init__(self, name, address):
		self.name = name
		self.address = address
		self.bytes_used = {}
	
	def name_parser(self, name):
		#if self.address not in Parser._parser_names:
		#	Parser._parser_names[self.address] = name
		self.name = name

	def get_name_or_placeholder(self):
		if self.name is None:
			return "Wait for it... (if you're reading this, you found a bug)"
		else:
			return self.name

	def add_mem_use(self, state, size):
		if bytes_used.setdefault(state, None) is None:
			bytes_used[state] = size
		else:
			bytes_used[state] += size

	def get_mem_use(self, state=None):
		if state is None:
			return bytes_used
		else:
			return bytes_used.setdefault(state, 0)

class ParserStack:
	def __init__(self, parse_state, arena):
		self.parse_state = parse_state
		self.arena = arena
		self.p_stack = []

	def push(self, parser):
		self.p_stack.append(parser)

	def pop(self):
		return self.p_stack.pop()

	def peek(self):
		return self.p_stack[-1]

	def set_state(self, state):
		self.parse_state = state
	# Shortcut for setting the name property of the parser on the top of stack
	# In terms of tracing, *most* calls to a parser look something like this with the packrat backend:
	# h_do_parse()
	#	parse_foo()
	#		perform_lowlevel_parse()
	
	# perform_lowlevel_parse() is called when the memo table at that position is not filled in yet.
	# it calls the corresponding parse_* virtual function via the vtable, but other than that does not have type information
	# it's probably possible to extract type information, by comparing vtable addresses, but that seems painful
	
	# parse_foo() is the parser's corresponding virtual function in the frontend, which does not have the equivalent of a "this" pointer
	
	# So what we do to keep track of parsers is incrementally filling in the details for both
	
	# h_do_parse() is the backend's "actually run the parser" function, but does not get called for some parsers
	# (my intuition says that it gets called only for higher-order parsers)
	# also contains the decision logic about whether to call perform_lowlevel_parse()
	
	# possible scenarios:
	# h_do_parse()
	#	perform_lowlevel_parse()
	#		parse_foo()
	
	# h_do_parse()
	#	perform_lowlevel_parse()

	# h_do_parse()
	def name_top_parser(self, name):
		self.p_stack[-1].name_parser(name)

	def add_mem_use_each(self, size):
		for p in self.p_stack:
			p.bytes_used += size

	def add_mem_use_top(self, size):
		self.p_stack[-1].bytes_used += size
		
	def show_stack(self):
		print("stack would be printed here. Depth:", len(self.p_stack))
		#print([(p.get_name_or_placeholder(), hex(p.address)) for p in self.p_stack])

	def depth(self):
		return len(self.p_stack)

#parserStack = ParserStack(0,0)

# Class that is responsible for bookkeeping throughout the entire parse
# NB, this is slightly different terminology than the hammer API implicitly uses:
# There, a parse is started by h_parse(), and it is associated with a parse state.
# This corresponds to the ParserStack above.
# Subsequent h_do_parse()s with the same parser state belong to the same parse

# The TopLevelParse class is initialized in trace_begin(), and is used until the end of the trace
class TopLevelParse:
	def __init__(self):
		self.parser_stacks = []
		self.parser_objs = {}

	# Called from h_packrat_parse()'s handler, where the parse state and arena get initialized
	def enter_h_packrat_parse(self, parser):
		# TODO: add a parser stack or something?
		parser_stack = ParserStack(None, None)
		self.parser_stacks.append(parser_stack)
		return 0

	def enter_h_do_parse(self, parse_state, arena, parser):
		parser_stack = self.peek_parserstack()
		if parser_stack.parse_state is None and parser_stack.parse_state != parse_state:
			self.first_h_do_parse_after_packrat_parse(parse_state, arena)

	# Called from h_do_parse()'s handler, at which point we know the addresses of the state and arena
	def first_h_do_parse_after_packrat_parse(self, parse_state, arena):
		parser_stack = self.peek_parserstack()
		parser_stack.set_state(parse_state)

	# Popping the stack of stack of parsers
	def return_from_h_packrat_parse(self):
		old_stack = self.parser_stacks.pop()
		if old_stack.depth() > 0:
			print("Warning: parser stack not empty but parse is successful?")
		# TODO: capture the return value in the probe, so we can tell if the parse was successful

	# Memoize the parser object for this particular address, then push it on the stack
	# Returns the parser object we just initalized (or the one already existing)
	def enter_perform_lowlevel_parse(self, parser_addr):
		try:
			parser_obj = self.parser_objs[parser_addr]
		except KeyError:
			# Create a parser object with no name and the address of the parser
			parser_obj = Parser(None, parser_addr)
			self.parser_objs[parser_addr] = parser_obj

		parser_stack = self.peek_parserstack()
		parser_stack.push(parser_obj)
		return parser_obj

	def return_from_perform_lowlevel_parse(self):
		parser_stack = self.peek_parserstack()
		parser_obj = parser_stack.pop()
		# debug print here

	def parse_virtual(self, parser_name):
		parser_obj = self.peek_parser()
		if parser_obj.name and parser_obj.name != parser_name:
			print("Warning: parser already named! This is a bug. old name: %s, new name: %s" % (parser_obj.name, parser_name))

		parser_obj.name_parser(parser_name)

	def peek_parserstack(self):
		# TODO: handle empty stack
		return self.parser_stacks[-1]

	def peek_parser(self):
		return self.parser_stacks[-1].peek()

#TODO: refactor memory use code into TopLevelParse, as well as using the Parser objects to hold memory use

mem_use = {}
top_level_parse = TopLevelParse()

def get_mem_use(parser, arena):
	global mem_use
	return mem_use[parser].setdefault(arena, 0)