diff --git a/HACKING b/HACKING
index 869c327574673b97402e235c576bc756740fa1dd..acee9c2ce8310722e20ec0be60708972da1cced4 100644
@@ -10,3 +10,42 @@ In particular, these names, and the macros that use them, are:
     Used by a_new and company. Should be an HParseState*
 - mm__:
     Used by h_new and h_free. Should be an HAllocator*
+Function suffixes
+Many functions come in several variants, to handle receiving optional
+parameters or parameters in multiple different forms.  For example,
+often, you have a global memory manager that is used for an entire
+program. In this case, you can leave off the memory manager arguments
+off, letting them be implicit instead. Further, it is often convenient
+to pass an array or va_list to a function instead of listing the
+arguments inline (eg, for wrapping a function, generating the
+arguments programattically, or writing bindings for another language.
+Because we have found that most variants fall into a fairly small set
+of forms, and to minimize the amount of API calls that users need to
+remember, there is a consistent naming scheme for these function
+variants: the function name is followed by two underscores and a set
+of single-character "flags" indicating what optional features that
+particular variant has (in alphabetical order, of course):
+  __a: takes variadic arguments as a void*[]
+  __m: takes a memory manager as the first argument, to override the system memory manager.
+  __v: Takes the variadic argument list as a va_list
+Memory managers
+If the __m function variants are used or system_allocator is
+overridden, there come some difficult questions to answer,
+particularly regarding the behavior when multiple memory managers are
+combined. As a general rule of thumb (exceptions will be explicitly
+documented), assume that
+   If you have a function f, which is passed a memory manager m and
+   returns a value r, any function that uses r as a parameter must
+   also be told to use m as a memory manager.
+In other words, don't let the (memory manager) streams cross.
\ No newline at end of file
diff --git a/NOTES b/NOTES
index 84b8c4637aea271c54bb505aae4680208c3610f4..77d899db433cd143136658209f1e02ecdd2fbb6e 100644
--- a/NOTES
+++ b/NOTES
@@ -35,4 +35,3 @@ what the comments say.
 TODO: implement datastructure linearization func
 TODO: implement free func for parsers
-TODO: Remove glib dependency (i.e., GQueue and GHashtable)
\ No newline at end of file
diff --git a/TODO b/TODO
new file mode 100644
index 0000000000000000000000000000000000000000..10ad174f868d2d283e681144d5df3d12dbb84868
--- /dev/null
+++ b/TODO
@@ -0,0 +1,3 @@
+- Make h_action functions be called only after parse is complete.
+- Allow alternative input streams (eg, zlib, base64)
+  - Bonus points if layered...
\ No newline at end of file
diff --git a/src/Makefile b/src/Makefile
index de340e7b3226864a12656d02c5f8562ad2152bdf..21b7ac5164f930f0c3de993f8fd9892e4633cf48 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -25,17 +25,29 @@ PARSERS := \
 	attr_bool \
-OUTPUTS := bitreader.o \
-	   hammer.o \
-	   bitwriter.o \
-	   libhammer.a \
-	   pprint.o \
-	   allocator.o \
-	   datastructures.o \
-	   system_allocator.o \
+	packrat
+	bitreader.o \
+	hammer.o \
+	bitwriter.o \
+	pprint.o \
+	allocator.o \
+	datastructures.o \
+	system_allocator.o \
+	benchmark.o \
+	compile.o \
+	$(PARSERS:%=parsers/%.o) \
+	$(BACKENDS:%=backends/%.o)
+TESTS := t_benchmark.o
+OUTPUTS := libhammer.a \
 	   test_suite.o \
 	   test_suite \
-	   $(PARSERS:%=parsers/%.o)
+	   $(HAMMER_PARTS) \
+	   $(TESTS)
 TOPLEVEL := ../
@@ -44,8 +56,7 @@ include ../common.mk
 all: libhammer.a
-libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \
-	$(PARSERS:%=parsers/%.o)
+libhammer.a: $(HAMMER_PARTS)
 bitreader.o: test_suite.h
 hammer.o: hammer.h
@@ -56,6 +67,6 @@ all: test_suite
 test: test_suite
 	./test_suite -v
-test_suite: test_suite.o libhammer.a
+test_suite: test_suite.o $(TESTS) libhammer.a
 	$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
diff --git a/src/backends/packrat.c b/src/backends/packrat.c
new file mode 100644
index 0000000000000000000000000000000000000000..b884b73c3d0d99c0558dc16f12ab758f87186627
--- /dev/null
+++ b/src/backends/packrat.c
@@ -0,0 +1,15 @@
+#include "../internal.h"
+int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
+  return 0; // No compilation necessary, and everything should work
+	    // out of the box.
+HParseResult *h_packrat_parse(HAllocator* mm__, HParser* parser, HParseState* parse_state) {
+  return NULL; // TODO: fill this in.
+HParserBackendVTable h__packrat_backend_vtable = {
+  .compile = h_packrat_compile,
+  .parse = h_packrat_parse
diff --git a/src/benchmark.c b/src/benchmark.c
new file mode 100644
index 0000000000000000000000000000000000000000..62d89d9eaae5f2dae253f674dcfe59f8126210a2
--- /dev/null
+++ b/src/benchmark.c
@@ -0,0 +1,88 @@
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include "hammer.h"
+  Usage:
+  Create your parser (i.e., HParser*), and then call
+  HBenchmarkResults* results = h_benchmark(parser, testcases);
+  Then, you can format a report with:
+  h_benchmark_report(stdout, results);
+  or just generate code to make the parser run as fast as possible with:
+  h_benchmark_dump_optimized_code(stdout, results);
+HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) {
+  // For now, just output the results to stderr
+  HParserTestcase* tc = testcases;
+  HParserBackend backend = PB_MIN;
+  for (backend = PB_MIN; backend < PB_MAX; backend++) {
+    fprintf(stderr, "Compiling for backend %d ... ", backend);
+    // Step 1: Compile grammar for given parser...
+    if (h_compile(parser, PB_MIN, NULL) == -1) {
+      // backend inappropriate for grammar...
+      fprintf(stderr, "failed\n");
+      continue;
+    }
+    int tc_failed = 0;
+    // Step 1: verify all test cases.
+    for (tc = testcases; tc->input != NULL; tc++) {
+      HParseResult *res = h_parse(parser, tc->input, tc->length);
+      char* res_unamb;
+      if (res != NULL) {
+	res_unamb = h_write_result_unamb(res->ast);
+      } else
+	res_unamb = NULL;
+      if ((res_unamb == NULL && tc->output_unambiguous == NULL)
+	  || (strcmp(res_unamb, tc->output_unambiguous) != 0)) {
+	// test case failed...
+	fprintf(stderr, "failed\n");
+	// We want to run all testcases, for purposes of generating a
+	// report. (eg, if users are trying to fix a grammar for a
+	// faster backend)
+	tc_failed++;
+      }
+      h_parse_result_free(res);
+    }
+    if (tc_failed > 0) {
+      // Can't use this parser; skip to the next
+      fprintf(stderr, "Backend failed testcases; skipping benchmark\n");
+      continue;
+    }
+    for (tc = testcases; tc->input != NULL; tc++) {
+      // The goal is to run each testcase for at least 50ms each
+      // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer)
+      int count = 1, cur;
+      struct timespec ts_start, ts_end;
+      long long time_diff;
+      do {
+	count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway.
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start);
+	for (cur = 0; cur < count; cur++) {
+	  h_parse_result_free(h_parse(parser, tc->input, tc->length));
+	}
+	clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end);
+	// time_diff is in ns
+	time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec);
+      } while (time_diff < 100000000);
+      fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases),  time_diff / count);
+    }
+  }
+  return NULL;
+void h_benchmark_report(FILE* stream, HBenchmarkResults* result) {
+  // TODO: fill in this function
diff --git a/src/compile.c b/src/compile.c
new file mode 100644
index 0000000000000000000000000000000000000000..e24839dea2ffbae091dd395ec1b4d2ca8a0c3034
--- /dev/null
+++ b/src/compile.c
@@ -0,0 +1,15 @@
+// This file contains functions related to managing multiple parse backends
+#include "hammer.h"
+#include "internal.h"
+static HParserBackendVTable *backends[PB_MAX] = {
+  &h__packrat_backend_vtable,
+int h_compile(HParser* parser, HParserBackend backend, const void* params) {
+  return h_compile__m(&system_allocator, parser, backend, params);
+int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) {
+  return backends[backend]->compile(mm__, parser, params);
diff --git a/src/hammer.h b/src/hammer.h
index 1c1a6ceef17124904969f9fcd1136a26d8a51d05..15ff6ec631fab4a8b2dc5c3911ee2cb8cc977a85 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -30,6 +30,12 @@ typedef int bool;
 typedef struct HParseState_ HParseState;
+typedef enum HParserBackend_ {
+  PB_MIN = 0,
+  PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
+} HParserBackend;
 typedef enum HTokenType_ {
@@ -112,6 +118,17 @@ typedef struct HParser_ {
   void *env;
 } HParser;
+// {{{ Stuff for benchmarking
+typedef struct HParserTestcase_ {
+  unsigned char* input;
+  size_t length;
+  char* output_unambiguous;
+} HParserTestcase;
+typedef struct HBenchmarkResults_ {
+} HBenchmarkResults;
+// }}}
 // {{{ Preprocessor definitions
 #define HAMMER_FN_DECL_NOARG(rtype_t, name)		\
   rtype_t name(void);					\
@@ -519,6 +536,15 @@ HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok);
 HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta);
+ * Build parse tables for the given parser backend. See the
+ * documentation for the parser backend in question for information
+ * about the [params] parameter, or just pass in NULL for the defaults.
+ *
+ * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
+ */
+HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
  * TODO: Document me
@@ -541,4 +567,10 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len);
 void h_bit_writer_free(HBitWriter* w);
+// {{{ Benchmark functions
+HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases);
+void h_benchmark_report(FILE* stream, HBenchmarkResults* results);
+void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results);
+// }}}
 #endif // #ifndef HAMMER_HAMMER__H
diff --git a/src/internal.h b/src/internal.h
index a24cc0e7a518b03e9051e7e1495a7e9a013d82b2..8a8f97209f202c7247fce0db4e3d77506fc986a7 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -109,6 +109,12 @@ struct HParseState_ {
   HHashTable *recursion_heads;
+typedef struct HParserBackendVTable_ {
+  int (*compile)(HAllocator *mm__, HParser* parser, const void* params);
+  HParseResult* (*parse)(HAllocator *mm__, HParser* parser, HParseState* parse_state);
+} HParserBackendVTable;
 /* The (location, parser) tuple used to key the cache.
@@ -173,6 +179,10 @@ typedef struct HParserCacheValue_t {
 } HParserCacheValue;
+// Backends {{{
+extern HParserBackendVTable h__packrat_backend_vtable;
+// }}}
 // TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
 long long h_read_bits(HInputStream* state, int count, char signed_p);
diff --git a/src/t_benchmark.c b/src/t_benchmark.c
new file mode 100644
index 0000000000000000000000000000000000000000..ad682b886d9c9cbc9a183182ebc84451c2b9604c
--- /dev/null
+++ b/src/t_benchmark.c
@@ -0,0 +1,14 @@
+// At this point, this is just a compile/link test.
+#include "hammer.h"
+HParserTestcase testcases[] = {
+  {NULL, 0, NULL}
+void test_benchmark_1() {
+  HParser *parser = NULL; // TODO: fill this in.
+  HBenchmarkResults *res = h_benchmark(parser, testcases);
+  h_benchmark_report(stderr, res);