Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# perf script event handlers, generated by perf script -g python
# Licensed under the terms of the GNU GPL License version 2
# The common_* event handler fields are the most useful fields common to
# all events. They don't necessarily correspond to the 'common_*' fields
# in the format files. Those fields not available as handler params can
# be retrieved using Python functions of the form common_*(context).
# See the perf-script-python Documentation for the list of available functions.
# TODO: handlers for filters + postordinate parser fails to get named
from __future__ import print_function
import os
import sys
sys.path.append(os.environ['PERF_EXEC_PATH'] + \
'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
from perf_trace_context import *
from Core import *
from collections import defaultdict
# NOTE: Parsers are uniquely identified by their address in memory
# The memory use is further split up along different arenas
class Parser:
#TODO: remove
_parser_names = {}
def __init__(self, name, address):
self.name = name
self.address = address
self.bytes_used = {}
def name_parser(self, name):
#if self.address not in Parser._parser_names:
# Parser._parser_names[self.address] = name
self.name = name
def get_name_or_placeholder(self):
if self.name is None:
return "Wait for it... (if you're reading this, you found a bug)"
else:
return self.name
def add_mem_use(self, state, size):
if bytes_used.setdefault(state, None) is None:
bytes_used[state] = size
else:
bytes_used[state] += size
def get_mem_use(self, state=None):
if state is None:
return bytes_used
else:
return bytes_used.setdefault(state, 0)
class ParserStack:
def __init__(self, parse_state, arena):
self.parse_state = parse_state
self.arena = arena
self.p_stack = []
def push(self, parser):
self.p_stack.append(parser)
def pop(self):
return self.p_stack.pop()
def peek(self):
return self.p_stack[-1]
def set_state(self, state):
self.parse_state = state
# Shortcut for setting the name property of the parser on the top of stack
# In terms of tracing, *most* calls to a parser look something like this with the packrat backend:
# h_do_parse()
# parse_foo()
# perform_lowlevel_parse()
# perform_lowlevel_parse() is called when the memo table at that position is not filled in yet.
# it calls the corresponding parse_* virtual function via the vtable, but other than that does not have type information
# it's probably possible to extract type information, by comparing vtable addresses, but that seems painful
# parse_foo() is the parser's corresponding virtual function in the frontend, which does not have the equivalent of a "this" pointer
# So what we do to keep track of parsers is incrementally filling in the details for both
# h_do_parse() is the backend's "actually run the parser" function, but does not get called for some parsers
# (my intuition says that it gets called only for higher-order parsers)
# also contains the decision logic about whether to call perform_lowlevel_parse()
# possible scenarios:
# h_do_parse()
# perform_lowlevel_parse()
# parse_foo()
# h_do_parse()
# perform_lowlevel_parse()
# h_do_parse()
def name_top_parser(self, name):
self.p_stack[-1].name_parser(name)
def add_mem_use_each(self, size):
for p in self.p_stack:
p.bytes_used += size
def add_mem_use_top(self, size):
self.p_stack[-1].bytes_used += size
def show_stack(self):
print("stack would be printed here. Depth:", len(self.p_stack))
#print([(p.get_name_or_placeholder(), hex(p.address)) for p in self.p_stack])
def depth(self):
return len(self.p_stack)
#parserStack = ParserStack(0,0)
# Class that is responsible for bookkeeping throughout the entire parse
# NB, this is slightly different terminology than the hammer API implicitly uses:
# There, a parse is started by h_parse(), and it is associated with a parse state.
# This corresponds to the ParserStack above.
# Subsequent h_do_parse()s with the same parser state belong to the same parse
# The TopLevelParse class is initialized in trace_begin(), and is used until the end of the trace
class TopLevelParse:
def __init__(self):
self.parser_stacks = []
self.parser_objs = {}
# Called from h_packrat_parse()'s handler, where the parse state and arena get initialized
def enter_h_packrat_parse(self, parser):
# TODO: add a parser stack or something?
parser_stack = ParserStack(None, None)
self.parser_stacks.append(parser_stack)
return 0
def enter_h_do_parse(self, parse_state, arena, parser):
parser_stack = self.peek_parserstack()
if parser_stack.parse_state is None and parser_stack.parse_state != parse_state:
self.first_h_do_parse_after_packrat_parse(parse_state, arena)
# Called from h_do_parse()'s handler, at which point we know the addresses of the state and arena
def first_h_do_parse_after_packrat_parse(self, parse_state, arena):
parser_stack = self.peek_parserstack()
parser_stack.set_state(parse_state)
# Popping the stack of stack of parsers
def return_from_h_packrat_parse(self):
old_stack = self.parser_stacks.pop()
if old_stack.depth() > 0:
print("Warning: parser stack not empty but parse is successful?")
# TODO: capture the return value in the probe, so we can tell if the parse was successful
# Memoize the parser object for this particular address, then push it on the stack
# Returns the parser object we just initalized (or the one already existing)
def enter_perform_lowlevel_parse(self, parser_addr):
try:
parser_obj = self.parser_objs[parser_addr]
except KeyError:
# Create a parser object with no name and the address of the parser
parser_obj = Parser(None, parser_addr)
self.parser_objs[parser_addr] = parser_obj
parser_stack = self.peek_parserstack()
parser_stack.push(parser_obj)
return parser_obj
def return_from_perform_lowlevel_parse(self):
parser_stack = self.peek_parserstack()
parser_obj = parser_stack.pop()
# debug print here
def parse_virtual(self, parser_name):
parser_obj = self.peek_parser()
if parser_obj.name and parser_obj.name != parser_name:
print("Warning: parser already named! This is a bug. old name: %s, new name: %s" % (parser_obj.name, parser_name))
parser_obj.name_parser(parser_name)
def peek_parserstack(self):
# TODO: handle empty stack
return self.parser_stacks[-1]
def peek_parser(self):
return self.parser_stacks[-1].peek()
#TODO: refactor memory use code into TopLevelParse, as well as using the Parser objects to hold memory use
mem_use = {}
top_level_parse = TopLevelParse()
def get_mem_use(parser, arena):
global mem_use
return mem_use[parser].setdefault(arena, 0)
Loading
Loading full blame...