diff --git a/.gitignore b/.gitignore
index 8101f080f272a845eb7cc6cf9b6447d0f17cf469..65465f4273f6d08208f273676fc465345e01c4f8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,27 +1,36 @@
+# generated files
 *.o
-*~
+*.os
+*.so
 *.a
+*.gem
+*.pyc
 *.class
-*.so
-jni/com*.h
-src/test_suite
-lib/hush
+libhammer.pc
+build/
 examples/dns
 examples/base64
 examples/base64_sem1
 examples/base64_sem2
-TAGS
-*.swp
-*.swo
+jni/com*.h
+src/test_suite
+
+# coverage and profiling stuff
+*.gcov
+*.gcda
+*.gcno
+gmon.out
+
+# editor leftovers
+*~
+*.sw?
 \#*
+
+# misc
+lib/hush
+TAGS
 docs/milestone2.dot.pdf
 *.dot.pdf
 Session.vim
-*.gcov
 cscope.out
-build/
-libhammer.pc
 .sconsign.dblite
-*.os
-*.pyc
-*.gem
diff --git a/HACKING b/HACKING
index 44f59912c50edd2bf76f01dbe19652926abc41e6..56a818ad43040b0f5b26eea535b3f6955f96166d 100644
--- a/HACKING
+++ b/HACKING
@@ -6,12 +6,13 @@ internal anaphoric macros use. Chances are that if you use these names
 for other things, you're gonna have a bad time.
 
 In particular, these names, and the macros that use them, are:
-- state:
-    Used by a_new and company. Should be an HParseState*
-- mm__:
-    Used by h_new and h_free. Should be an HAllocator*
-- stk__:
-    Used in desugaring. Should be an HCFStack*
+
+- `state`:
+    Used by `a_new` and company. Should be an `HParseState*`.
+- `mm__`:
+    Used by `h_new` and `h_free`. Should be an `HAllocator*`.
+- `stk__`:
+    Used in desugaring. Should be an `HCFStack*`.
 
 Function suffixes
 =================
@@ -21,9 +22,9 @@ parameters or parameters in multiple different forms.  For example,
 often, you have a global memory manager that is used for an entire
 program. In this case, you can leave off the memory manager arguments
 off, letting them be implicit instead. Further, it is often convenient
-to pass an array or va_list to a function instead of listing the
-arguments inline (eg, for wrapping a function, generating the
-arguments programattically, or writing bindings for another language.
+to pass an array or `va_list` to a function instead of listing the
+arguments inline (e.g., for wrapping a function, generating the
+arguments programatically, or writing bindings for another language.)
 
 Because we have found that most variants fall into a fairly small set
 of forms, and to minimize the amount of API calls that users need to
@@ -32,21 +33,22 @@ variants: the function name is followed by two underscores and a set
 of single-character "flags" indicating what optional features that
 particular variant has (in alphabetical order, of course):
 
-  __a: takes variadic arguments as a void*[] (not implemented yet, but will be soon. 
-  __m: takes a memory manager as the first argument, to override the system memory manager.
-  __v: Takes the variadic argument list as a va_list
-
+- `__a`: takes variadic arguments as a `void*[]` (not implemented yet,
+  but will be soon.)
+- `__m`: takes a memory manager as the first argument, to override the
+  system memory manager.
+- `__v`: Takes the variadic argument list as a `va_list`.
 
 Memory managers
 ===============
 
-If the __m function variants are used or system_allocator is
+If the `__m` function variants are used or `system_allocator` is
 overridden, there come some difficult questions to answer,
 particularly regarding the behavior when multiple memory managers are
 combined. As a general rule of thumb (exceptions will be explicitly
 documented), assume that
 
-   If you have a function f, which is passed a memory manager m and
+>  If you have a function f, which is passed a memory manager m and
    returns a value r, any function that uses r as a parameter must
    also be told to use m as a memory manager.
 
@@ -57,7 +59,7 @@ Language-independent test suite
 
 There is a language-independent representation of the Hammer test
 suite in `lib/test-suite`.  This is intended to be used with the
-tsparser.pl prolog library, along with a language-specific frontend.
+tsparser.pl Prolog library, along with a language-specific frontend.
 
 Only the C# frontend exists so far; to regenerate the test suites using it, run
 
diff --git a/Makefile b/Makefile
index 09aa037b487ff0c210810246275a77a76c882fdd..37d808572ffc98178f918aae1b68cf639091ed3e 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@
 # and kick off a recursive make
 # Also, "make src/all" turns into "make -C src all"
 
-SUBDIRS = src examples jni
+SUBDIRS = src examples src/bindings/jni
 
 include config.mk
 TOPLEVEL=.
diff --git a/SConstruct b/SConstruct
index 8aa8c816c624408068d4b70337adbc9a351d0fbc..9349525eb22aa978f1a64015dbf3f6ce6cca97e3 100644
--- a/SConstruct
+++ b/SConstruct
@@ -14,7 +14,7 @@ if platform.system() == 'Windows':
 vars = Variables(None, ARGUMENTS)
 vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
 vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
-vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
+vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'jni', 'perl', 'php', 'python', 'ruby']))
 vars.Add('python', 'Python interpreter', 'python')
 
 tools = ['default', 'scanreplace']
@@ -73,6 +73,18 @@ AddOption('--coverage',
           action='store_true',
           help='Build with coverage instrumentation')
 
+AddOption('--force-debug',
+          dest='force_debug',
+          default=False,
+          action='store_true',
+          help='Build with debug symbols, even in the opt variant')
+
+AddOption('--gprof',
+          dest='gprof',
+          default=False,
+          action="store_true",
+          help='Build with profiling instrumentation for gprof')
+
 AddOption('--in-place',
           dest='in_place',
           default=False,
@@ -106,7 +118,15 @@ if env['CC'] == 'cl':
         ]
     )
 else:
-    env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
+    if env['PLATFORM'] == 'darwin':
+        # It's reported -D_POSIX_C_SOURCE breaks the Mac OS build; I think we
+        # may need _DARWIN_C_SOURCE instead/in addition to, but let's wait to
+        # have access to a Mac to test/repo
+        env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
+    else:
+        # Using -D_POSIX_C_SOURCE=200809L here, not on an ad-hoc basis when,
+        # #including, is important
+        env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
 
 # Linker options
 if env['PLATFORM'] == 'darwin':
@@ -120,14 +140,31 @@ else:
     env.MergeFlags('-lrt')
 
 if GetOption('coverage'):
-    env.Append(CFLAGS=['--coverage'],
-               CXXFLAGS=['--coverage'],
-               LDFLAGS=['--coverage'])
+    env.Append(CCFLAGS=['--coverage'],
+               LDFLAGS=['--coverage'],
+               LINKFLAGS=['--coverage'])
     if env['CC'] == 'gcc':
         env.Append(LIBS=['gcov'])
     else:
         env.ParseConfig('llvm-config --ldflags')
 
+if GetOption('force_debug'):
+    if env['CC'] == 'cl':
+        env.Append(CCFLAGS=['/Z7'])
+    else:
+        env.Append(CCFLAGS=['-g'])
+
+if GetOption('gprof'):
+    if env['CC'] == 'gcc' and env['CXX'] == 'g++':
+        env.Append(CCFLAGS=['-pg'],
+		   LDFLAGS=['-pg'],
+                   LINKFLAGS=['-pg'])
+        env['GPROF'] = 1
+    else:
+        print("Can only use gprof with gcc")
+        Exit(1)
+        
+
 dbg = env.Clone(VARIANT='debug')
 if env['CC'] == 'cl':
     dbg.Append(CCFLAGS=['/Z7'])
diff --git a/docs/hammerman.3 b/docs/hammerman.3
index cf1654b9943dd6e5160b3e5b519faa7c76210554..f3cf7e12b12d439a5026d3b23d5ff3bfea499e35 100644
--- a/docs/hammerman.3
+++ b/docs/hammerman.3
@@ -77,11 +77,13 @@ Benchmarking for parsing backends -- determine empirically which backend will be
 11
 12      HParseResult *result = h_parse(hello_parser, input, inputsize);
 13      if(result) {
-14          printf("yay!\n");
+14          printf("yay!\\n");
 15      } else {
-16          printf("boo!\n");
+16          printf("boo!\\n");
 17      }
-18  }
+18      h_parse_result_free(result);
+19      return 0 == result;
+20  }
 .fi
 .SH "AUTHOR"
 .sp
diff --git a/examples/SConscript b/examples/SConscript
index b34b85a1cd469386b752bc3721a8b54954315e2a..28c5734d830cad028ee10c3df8ee7a344bb01088 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -3,11 +3,18 @@ from __future__ import absolute_import, division, print_function
 Import('env')
 
 example = env.Clone()
-example.Append(LIBS="hammer", LIBPATH="../src")
+
+if 'GPROF' in env and env['GPROF'] == 1:
+    hammer_lib_name="hammer_pg"
+else:
+    hammer_lib_name="hammer"
+
+example.Append(LIBS=hammer_lib_name, LIBPATH="../src")
 
 dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
+ttuser = example.Program('ttuser', 'ttuser.c')
 base64 = example.Program('base64', 'base64.c')
 base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
 base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
 ties = example.Program('ties', ['ties.c', 'grammar.c'])
-env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
\ No newline at end of file
+env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
diff --git a/examples/base64.c b/examples/base64.c
index 17264da9441d8fb7008496cc901c6a5f471a2db5..ddc162c0e4164e23ebef79ea4e3411f5ecf84cab 100644
--- a/examples/base64.c
+++ b/examples/base64.c
@@ -11,8 +11,6 @@
 #include <inttypes.h>
 #include "../src/hammer.h"
 
-#define DEBUG
-
 const HParser* document = NULL;
 
 void init_parser(void)
@@ -27,60 +25,17 @@ void init_parser(void)
     HParser *equals = h_ch('=');
 
     HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
-    HParser *bsfdig_4bit = h_choice(
-        h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'),
-        h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'),
-        h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL);
-    HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL);
-
-    HParser *base64_quad = h_sequence(bsfdig, bsfdig, bsfdig, bsfdig, NULL);
-    HParser *base64_quads = h_many(base64_quad);
-
-    HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, h_end_p(), NULL);
-    HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, h_end_p(), NULL);
-    HParser *base64_ending = h_choice(h_end_p(), base64_2, base64_1, NULL);
-    HParser *base64 = h_sequence(base64_quads, base64_ending, NULL);
-    // why does this parse "A=="?!
-    // why does this parse "aaA=" but not "aA=="?!
-
-    document = base64;
-}
-
-
-#include <string.h>
-#include <assert.h>
-#define TRUE (1)
-#define FALSE (0)
-
-void assert_parse(int expected, char *data) {
-    HParseResult *result;
-
-    size_t datasize = strlen(data);
-    result = h_parse(document, (void*)data, datasize);
-    if((result != NULL) != expected) {
-        fprintf(stderr, "Test failed: %s\n", data);
-    }
-#ifdef DEBUG
-    else {
-        fprintf(stderr, "Test succeeded: %s\n", data);
-        fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
-        h_pprint(stdout, result->ast, 0, 0);
-    }
-#endif
-}
-
-void test() {
-    assert_parse(TRUE, "");
-    assert_parse(TRUE, "YQ==");
-    assert_parse(TRUE, "YXU=");
-    assert_parse(TRUE, "YXVy");
-    assert_parse(TRUE, "QVVSIFNBUkFG");
-    assert_parse(TRUE, "QVVSIEhFUlUgU0FSQUY=");
-    assert_parse(FALSE, "A");
-    assert_parse(FALSE, "A=");
-    assert_parse(FALSE, "A==");
-    assert_parse(FALSE, "AAA==");
-    assert_parse(FALSE, "aa==");
+    HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
+    HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
+    HParser *base64_3 = h_repeat_n(bsfdig, 4);
+    HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
+    HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
+    HParser *base64 = h_sequence(h_many(base64_3),
+                                       h_optional(h_choice(base64_2,
+                                                           base64_1, NULL)),
+                                       NULL);
+
+    document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL);
 }
 
 
@@ -94,8 +49,6 @@ int main(int argc, char **argv)
 
     init_parser();
 
-    test();
-
     inputsize = fread(input, 1, sizeof(input), stdin);
     fprintf(stderr, "inputsize=%zu\ninput=", inputsize);
     fwrite(input, 1, inputsize, stderr);
@@ -104,6 +57,7 @@ int main(int argc, char **argv)
     if(result) {
         fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
         h_pprint(stdout, result->ast, 0, 0);
+        h_parse_result_free(result);
         return 0;
     } else {
         return 1;
diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c
index afbbef841cc0ef0593e68a1ca7101eacc976f474..7127d1eb4738c450fba5d3a9b8ab1fa3ac32496a 100644
--- a/examples/base64_sem1.c
+++ b/examples/base64_sem1.c
@@ -149,12 +149,13 @@ HParser *init_parser(void)
 
 #include <stdio.h>
 
+const HParser *parser;  // Allocated statically to suppress leak warnings
+
 int main(int argc, char **argv)
 {
     uint8_t input[102400];
     size_t inputsize;
-    const HParser *parser;
-    const HParseResult *result;
+    HParseResult *result;
 
     parser = init_parser();
 
@@ -166,6 +167,7 @@ int main(int argc, char **argv)
     if(result) {
         fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
         h_pprint(stdout, result->ast, 0, 0);
+        h_parse_result_free(result);
         return 0;
     } else {
         return 1;
diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c
index b8f7b4a20312dcf39695ba52cdcf9573376d6c69..dac7e7ab0021198b76849da2bfe86af8864a9e9d 100644
--- a/examples/base64_sem2.c
+++ b/examples/base64_sem2.c
@@ -153,12 +153,13 @@ const HParser *init_parser(void)
 
 #include <stdio.h>
 
+const HParser *parser;  // Allocated statically to suppress leak warnings
+
 int main(int argc, char **argv)
 {
     uint8_t input[102400];
     size_t inputsize;
-    const HParser *parser;
-    const HParseResult *result;
+    HParseResult *result;
 
     parser = init_parser();
 
@@ -170,6 +171,7 @@ int main(int argc, char **argv)
     if(result) {
         fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
         h_pprint(stdout, result->ast, 0, 0);
+        h_parse_result_free(result);
         return 0;
     } else {
         return 1;
diff --git a/examples/ttuser.c b/examples/ttuser.c
new file mode 100644
index 0000000000000000000000000000000000000000..c16e4625bb72d64f7803eec5f360b2cb98d10892
--- /dev/null
+++ b/examples/ttuser.c
@@ -0,0 +1,140 @@
+/*
+ * Example parser that demonstrates the use of user-defined token types.
+ *
+ * Note the custom printer function that hooks into h_pprint().
+ */
+
+#include "../src/hammer.h"
+#include "../src/glue.h"
+
+
+/*
+ * custom tokens
+ */
+
+HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC;
+
+void
+pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	/* 
+	 * Pretty-printer rules:
+	 *
+	 *  - Output 'indent' spaces after every newline you produce.
+	 *  - Do not add indent on the first line of output.
+	 *  - Do not add a trailing newline.
+	 *  - Indent sub-objects by adding 'delta' to 'indent'.
+	 */
+
+	if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
+		fprintf(stream, "\n%*s", indent, "");
+	h_pprint(stream, tok->user, indent, delta);
+}
+
+/* XXX define umamb_sub as well */
+
+void
+init(void)
+{
+	TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
+	TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
+	TT_OBJ  = h_allocate_token_new("object", NULL, pprint);
+	TT_ADJ  = h_allocate_token_new("adjective", NULL, pprint);
+	TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
+}
+
+
+/*
+ * semantic actions
+ *
+ * Normally these would be more interesting, but for this example, we just wrap
+ * our tokens in their intended types.
+ */
+HParsedToken *act_subj(const HParseResult *p, void *u) {
+	return H_MAKE(SUBJ, (void *)p->ast);
+}
+HParsedToken *act_pred(const HParseResult *p, void *u) {
+	return H_MAKE(PRED, (void *)p->ast);
+}
+HParsedToken *act_obj(const HParseResult *p, void *u) {
+	return H_MAKE(OBJ, (void *)p->ast);
+}
+HParsedToken *act_adj(const HParseResult *p, void *u) {
+	return H_MAKE(ADJ, (void *)p->ast);
+}
+HParsedToken *act_advc(const HParseResult *p, void *u) {
+	return H_MAKE(ADVC, (void *)p->ast);
+}
+
+
+/*
+ * grammar
+ */
+
+HParser *
+build_parser(void)
+{
+	/* words */
+	#define W(X)	h_whitespace(h_literal((const uint8_t *)(#X)))
+	H_RULE(art,	h_choice(W(a), W(the), NULL));
+	H_RULE(noun,	h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
+			    W(bear), W(fence), W(tree), W(car), W(cow), NULL));
+	H_RULE(verb,	h_choice(W(eats), W(jumps), W(falls), NULL));
+	H_ARULE(adj,	h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
+			    W(magenta), W(yellow), W(black), W(brown), NULL));
+	H_RULE(adverb,	h_choice(W(with), W(over), W(after), NULL));
+	#undef W
+
+	/* phrases */
+	H_RULE(nphrase,	h_sequence(art, h_many(adj), noun, NULL));
+
+	/* sentence structure */
+	H_ARULE(subj,	nphrase);
+	H_ARULE(pred,	verb);
+	H_ARULE(obj,	nphrase);
+	H_ARULE(advc,	h_sequence(adverb, nphrase, NULL));
+	H_RULE(sentnc,	h_sequence(subj, pred,
+			    h_optional(obj), h_optional(advc), NULL));
+
+	return sentnc;
+}
+
+
+/*
+ * main routine: read, parse, print
+ *
+ * input e.g.:
+ * "the quick brown fox jumps the fence with a cyan lion"
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+
+int
+main(int argc, char **argv)
+{
+	uint8_t input[1024];
+	size_t sz;
+	const HParser *parser;
+	const HParseResult *result;
+
+	init();
+	parser = build_parser();
+
+	sz = fread(input, 1, sizeof(input), stdin);
+	if (!feof(stdin)) {
+		fprintf(stderr, "too much input\n");
+		return 1;
+	}
+
+	result = h_parse(parser, input, sz);
+	if (!result) {
+		fprintf(stderr, "no parse\n");
+		return 1;
+	}
+
+        h_pprintln(stdout, result->ast);
+        fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
+	    result->bit_length / 8, sz);
+        return 0;
+}
diff --git a/src/SConscript b/src/SConscript
index d109d25d65fc45fa19eeb256f62aa6ace6f358ce..f060ba129a0f13e3f3f51b4ee6150767a3e41158 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -56,7 +56,8 @@ parsers = ['parsers/%s.c'%s for s in
             'unimplemented',
             'whitespace',
             'xor',
-            'value']]
+            'value',
+            'seek']]
 
 backends = ['backends/%s.c' % s for s in
             ['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
@@ -103,10 +104,21 @@ if env['PLATFORM'] == 'win32':
     # prevent collision between .lib from dll and .lib for static lib
     static_library_name = 'hammer_s'
 
-libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
+if 'GPROF' in env and env['GPROF'] == 1:
+    # Disable the shared library (it won't work with gprof) and rename the static one
+    build_shared_library=False
+    static_library_name = 'hammer_pg'
+
+# Markers for later
+libhammer_static = None
+libhammer_shared = None
+
+if build_shared_library:
+    libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
                                      SHLIBVERSION=hammer_shlib_version)
 libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
-if build_shared_library:
+
+if libhammer_shared is not None:
     Default(libhammer_shared, libhammer_static)
     env.Install('$libpath', [libhammer_static, libhammer_shared])
 else:
@@ -121,14 +133,20 @@ env.Install('$pkgconfigpath', '../../../libhammer.pc')
 if GetOption('with_tests'):
     testenv = env.Clone()
     testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
-    testenv.Append(LIBS=['hammer'])
+    if libhammer_shared is not None:
+        testenv.Append(LIBS=['hammer'])
+    else:
+        testenv.Append(LIBS=[static_library_name])
     testenv.Prepend(LIBPATH=['.'])
     ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS='--coverage' if testenv.GetOption('coverage') else None)
     ctest = Alias('testc', [ctestexec], ''.join(['env LD_LIBRARY_PATH=', os.path.dirname(ctestexec[0].path), ' ', ctestexec[0].path]))
     AlwaysBuild(ctest)
     testruns.append(ctest)
 
-Export('libhammer_static libhammer_shared')
+if libhammer_shared is not None:
+    Export('libhammer_static libhammer_shared')
+else:
+    Export('libhammer_static')
 
 for b in env['bindings']:
     env.SConscript(['bindings/%s/SConscript' % b])
diff --git a/src/allocator.c b/src/allocator.c
index cc259e605c56573b506f39194793e804ab4bf8b6..2ff5cacaa0e05da47ac851ef1ff71239ed5cde3b 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -29,24 +29,35 @@ struct arena_link {
   // For efficiency, we should probably allocate the arena links in 
   // their own slice, and link to a block directly. That can be
   // implemented later, though, with no change in interface.
-  struct arena_link *next; // It is crucial that this be the first item; so that 
-                           // any arena link can be casted to struct arena_link**.
-
+  struct arena_link *next;
   size_t free;
   size_t used;
   uint8_t rest[];
-} ;
+};
 
 struct HArena_ {
   struct arena_link *head;
   struct HAllocator_ *mm__;
+  /* does mm__ zero blocks for us? */
+  bool malloc_zeros;
   size_t block_size;
   size_t used;
   size_t wasted;
+#ifdef DETAILED_ARENA_STATS
+  size_t mm_malloc_count, mm_malloc_bytes;
+  size_t memset_count, memset_bytes;
+  size_t arena_malloc_count, arena_malloc_bytes;
+  size_t arena_su_malloc_count, arena_su_malloc_bytes;
+  size_t arena_si_malloc_count, arena_si_malloc_bytes;
+  size_t arena_lu_malloc_count, arena_lu_malloc_bytes;
+  size_t arena_li_malloc_count, arena_li_malloc_bytes;
+#endif
 
   jmp_buf *except;
 };
 
+static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
+
 void* h_alloc(HAllocator* mm__, size_t size) {
   void *p = mm__->alloc(mm__, size);
   if(!p)
@@ -61,7 +72,6 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
   struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size);
   assert(ret != NULL);
   assert(link != NULL);
-  memset(link, 0, sizeof(struct arena_link) + block_size);
   link->free = block_size;
   link->used = 0;
   link->next = NULL;
@@ -69,6 +79,19 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
   ret->block_size = block_size;
   ret->used = 0;
   ret->mm__ = mm__;
+#ifdef DETAILED_ARENA_STATS
+  ret->mm_malloc_count = 2;
+  ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size;
+  ret->memset_count = 0;
+  ret->memset_bytes = 0;
+  ret->arena_malloc_count = ret->arena_malloc_bytes = 0;
+  ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0;
+  ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0;
+  ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0;
+  ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0;
+#endif
+  /* XXX provide a mechanism to indicate mm__ returns zeroed blocks */
+  ret->malloc_zeros = false;
   ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
   ret->except = NULL;
   return ret;
@@ -90,39 +113,120 @@ static void *alloc_block(HArena *arena, size_t size)
   return block;
 }
 
-void* h_arena_malloc(HArena *arena, size_t size) {
+void * h_arena_malloc_noinit(HArena *arena, size_t size) {
+  return h_arena_malloc_raw(arena, size, false);
+}
+
+void * h_arena_malloc(HArena *arena, size_t size) {
+  return h_arena_malloc_raw(arena, size, true);
+}
+
+static void * h_arena_malloc_raw(HArena *arena, size_t size,
+                                 bool need_zero) {
+  struct arena_link *link = NULL;
+  void *ret = NULL;
+
   if (size <= arena->head->free) {
-    // fast path..
-    void* ret = arena->head->rest + arena->head->used;
+    /* fast path.. */
+    ret = arena->head->rest + arena->head->used;
     arena->used += size;
     arena->wasted -= size;
     arena->head->used += size;
     arena->head->free -= size;
-    return ret;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_si_malloc_count);
+      arena->arena_si_malloc_bytes += size;
+    } else {
+      ++(arena->arena_su_malloc_count);
+      arena->arena_su_malloc_bytes += size;
+    }
+#endif
   } else if (size > arena->block_size) {
-    // We need a new, dedicated block for it, because it won't fit in a standard sized one.
-    // This involves some annoying casting...
-    arena->used += size;
-    arena->wasted += sizeof(struct arena_link*);
-    void* link = alloc_block(arena, size + sizeof(struct arena_link*));
+    /*
+     * We need a new, dedicated block for it, because it won't fit in a
+     * standard sized one.
+     *
+     * NOTE:
+     *
+     * We used to do a silly casting dance to treat blocks like this
+     * as special cases and make the used/free fields part of the allocated
+     * block, but the old code was not really proper portable C and depended
+     * on a bunch of implementation-specific behavior.  We could have done it
+     * better with a union in struct arena_link, but the memory savings is
+     * only 0.39% for a 64-bit machine, a 4096-byte block size and all
+     * large allocations *only just one byte* over the block size, so I
+     * question the utility of it.  We do still slip the large block in
+     * one position behind the list head so it doesn't cut off a partially
+     * filled list head.
+     *
+     * -- andrea
+     */
+    link = alloc_block(arena, size + sizeof(struct arena_link));
     assert(link != NULL);
-    memset(link, 0, size + sizeof(struct arena_link*));
-    *(struct arena_link**)link = arena->head->next;
-    arena->head->next = (struct arena_link*)link;
-    return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
+    arena->used += size;
+    arena->wasted += sizeof(struct arena_link);
+    link->used = size;
+    link->free = 0;
+    link->next = arena->head->next;
+    arena->head->next = link;
+    ret = link->rest;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_li_malloc_count);
+      arena->arena_li_malloc_bytes += size;
+    } else {
+      ++(arena->arena_lu_malloc_count);
+      arena->arena_lu_malloc_bytes += size;
+    }
+#endif
   } else {
-    // we just need to allocate an ordinary new block.
-    struct arena_link *link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
+    /* we just need to allocate an ordinary new block. */
+    link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
     assert(link != NULL);
-    memset(link, 0, sizeof(struct arena_link) + arena->block_size);
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->mm_malloc_count);
+    arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size;
+#endif
     link->free = arena->block_size - size;
     link->used = size;
     link->next = arena->head;
     arena->head = link;
     arena->used += size;
     arena->wasted += sizeof(struct arena_link) + arena->block_size - size;
-    return link->rest;
+    ret = link->rest;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_si_malloc_count);
+      arena->arena_si_malloc_bytes += size;
+    } else {
+      ++(arena->arena_su_malloc_count);
+      arena->arena_su_malloc_bytes += size;
+    }
+#endif
   }
+
+  /*
+   * Zeroize if necessary
+   */
+  if (need_zero && !(arena->malloc_zeros)) {
+    memset(ret, 0, size);
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->memset_count);
+    arena->memset_bytes += size;
+#endif
+  }
+
+  return ret;
 }
 
 void h_arena_free(HArena *arena, void* ptr) {
@@ -146,4 +250,20 @@ void h_delete_arena(HArena *arena) {
 void h_allocator_stats(HArena *arena, HArenaStats *stats) {
   stats->used = arena->used;
   stats->wasted = arena->wasted;
+#ifdef DETAILED_ARENA_STATS
+  stats->mm_malloc_count = arena->mm_malloc_count;
+  stats->mm_malloc_bytes = arena->mm_malloc_bytes;
+  stats->memset_count = arena->memset_count;
+  stats->memset_bytes = arena->memset_bytes;
+  stats->arena_malloc_count = arena->arena_malloc_count;
+  stats->arena_malloc_bytes = arena->arena_malloc_bytes;
+  stats->arena_su_malloc_count = arena->arena_su_malloc_count;
+  stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes;
+  stats->arena_si_malloc_count = arena->arena_si_malloc_count;
+  stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes;
+  stats->arena_lu_malloc_count = arena->arena_lu_malloc_count;
+  stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes;
+  stats->arena_li_malloc_count = arena->arena_li_malloc_count;
+  stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
+#endif
 }
diff --git a/src/allocator.h b/src/allocator.h
index dc88af68f22895f584065a491463b3f8576c09e9..06d1e6f59dd32987979079c4a7b01d09b13547e6 100644
--- a/src/allocator.h
+++ b/src/allocator.h
@@ -38,6 +38,8 @@ extern "C" {
 # define ATTR_MALLOC(n)
 #endif
 
+/* #define DETAILED_ARENA_STATS */
+
 // TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
 typedef struct HAllocator_ {
   void* (*alloc)(struct HAllocator_* allocator, size_t size);
@@ -51,6 +53,7 @@ typedef struct HArena_ HArena ; // hidden implementation
 
 HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
 
+void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
 void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
 void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
 void h_delete_arena(HArena *arena);
@@ -59,6 +62,26 @@ void h_arena_set_except(HArena *arena, jmp_buf *except);
 typedef struct {
   size_t used;
   size_t wasted;
+#ifdef DETAILED_ARENA_STATS
+  size_t mm_malloc_count;
+  size_t mm_malloc_bytes;
+  size_t memset_count;
+  size_t memset_bytes;
+  size_t arena_malloc_count;
+  size_t arena_malloc_bytes;
+  /* small, uninited */
+  size_t arena_su_malloc_count;
+  size_t arena_su_malloc_bytes;
+  /* small, inited */
+  size_t arena_si_malloc_count;
+  size_t arena_si_malloc_bytes;
+  /* large, uninited */
+  size_t arena_lu_malloc_count;
+  size_t arena_lu_malloc_bytes;
+  /* large, inited */
+  size_t arena_li_malloc_count;
+  size_t arena_li_malloc_bytes;
+#endif
 } HArenaStats;
 
 void h_allocator_stats(HArena *arena, HArenaStats *stats);
diff --git a/src/backends/glr.c b/src/backends/glr.c
index 3abe6f55bf480eadb45b4a00a1d00ab46377cefe..781b9aeede2f4c2b0d382f600ef3e8819fa5bae6 100644
--- a/src/backends/glr.c
+++ b/src/backends/glr.c
@@ -14,7 +14,7 @@ int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
   }
   int result = h_lalr_compile(mm__, parser, params);
 
-  if(result == -1 && parser->backend_data) {
+  if(result == -2 && parser->backend_data) {
     // table is there, just has conflicts? nevermind, that's okay.
     result = 0;
   }
@@ -225,6 +225,8 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
       HLREngine *engine = h_slist_pop(engines);
       const HLRAction *action = h_lrengine_action(engine);
       glr_step(&result, engback, engine, action);
+      // XXX detect ambiguous results - two engines terminating at the same pos
+      // -> kill both engines, i.e. ignore if there is a later unamb. success
     }
 
     // swap the lists
diff --git a/src/backends/lalr.c b/src/backends/lalr.c
index 648261cdc98b3c9f1f63dab1dbbab35edde9fae0..cc7ff36c70963f7056a99276d36969fdaaf79a4e 100644
--- a/src/backends/lalr.c
+++ b/src/backends/lalr.c
@@ -31,18 +31,24 @@ static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
 {
   HLRAction *action = lrtable_lookup(table, x, A);
   assert(action != NULL);
+
+  // we are interested in a transition out of state x, i.e. a shift action.
+  // while there could also be reduce actions associated with A in state x,
+  // those are not what we are here for. so if action is a conflict, search it
+  // for the shift. there will only be one and it will be the bottom element.
+  if(action->type == HLR_CONFLICT) {
+    HSlistNode *x;
+    for(x=action->branches->head; x; x=x->next) {
+      action = x->elem;
+      assert(action->type != HLR_CONFLICT); // no nesting of conflicts
+      if(action->type == HLR_SHIFT)
+        break;
+    }
+    assert(x != NULL && x->next == NULL);   // shift found at the bottom
+  }
   assert(action->type == HLR_SHIFT);
-  return action->nextstate;
-}
 
-static inline HLRTransition *transition(HArena *arena,
-                                        size_t x, const HCFChoice *A, size_t y)
-{
-  HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
-  t->from = x;
-  t->symbol = A;
-  t->to = y;
-  return t;
+  return action->nextstate;
 }
 
 // no-op on terminal symbols
@@ -69,8 +75,8 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
     HCFChoice **iBj = items;
     for(; *B; B++, iBj++) {
       size_t j = follow_transition(table, i, *B);
-      HLRTransition *i_B_j = transition(arena, i, *B, j);
-      *iBj = h_hashtable_get(eg->tmap, i_B_j);
+      HLRTransition i_B_j = {i, *B, j};
+      *iBj = h_hashtable_get(eg->tmap, &i_B_j);
       assert(*iBj != NULL);
       i = j;
     }
@@ -279,18 +285,18 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
   }
   HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
   if(g == NULL)     // backend not suitable (language not context-free)
-    return -1;
+    return 2;
 
   HLRDFA *dfa = h_lr0_dfa(g);
   if (dfa == NULL) {     // this should normally not happen
     h_cfgrammar_free(g);
-    return -1;
+    return 3;
   }
 
   HLRTable *table = h_lr0_table(g, dfa);
   if (table == NULL) {   // this should normally not happen
     h_cfgrammar_free(g);
-    return -1;
+    return 4;
   }
 
   if(has_conflicts(table)) {
@@ -300,7 +306,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
     if(eg == NULL) {    // this should normally not happen
       h_cfgrammar_free(g);
       h_lrtable_free(table);
-      return -1;
+      return 5;
     }
 
     // go through the inadequate states; replace inadeq with a new list
@@ -332,7 +338,11 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
             const HStringMap *fs = h_follow(1, eg->grammar, lhs);
             assert(fs != NULL);
             assert(fs->epsilon_branch == NULL);
-            assert(!h_stringmap_empty(fs));
+            // NB: there is a case where fs can be empty: when reducing by lhs
+            // would lead to certain parse failure, by means of h_nothing_p()
+            // for instance. in that case, the below code correctly adds no
+            // reduce action.
+            assert(!h_stringmap_empty(fs)); // XXX
 
             // for each lookahead symbol, put action into table cell
             if(terminals_put(table->tmap[state], fs, action) < 0)
@@ -345,11 +355,13 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
         h_slist_push(table->inadeq, (void *)(uintptr_t)state);
       }
     }
+
+    h_cfgrammar_free(eg->grammar);
   }
 
   h_cfgrammar_free(g);
   parser->backend_data = table;
-  return has_conflicts(table)? -1 : 0;
+  return has_conflicts(table)? -2 : 0;
 }
 
 void h_lalr_free(HParser *parser)
diff --git a/src/backends/llk.c b/src/backends/llk.c
index 62d33561f7c909a98ad9585be315d620c7a1be34..b2e89e334c127cd2f498d03a612c8cfd199dc24d 100644
--- a/src/backends/llk.c
+++ b/src/backends/llk.c
@@ -238,7 +238,7 @@ int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params)
     // the table was ambiguous
     h_cfgrammar_free(grammar);
     h_llktable_free(table);
-    return -1;
+    return -2;
   }
   parser->backend_data = table;
 
diff --git a/src/backends/lr.c b/src/backends/lr.c
index f2ac4956d80358e51d35c0e70484013bbfde212a..6919bf6d0a8d284c95167adb07023914a49f89b4 100644
--- a/src/backends/lr.c
+++ b/src/backends/lr.c
@@ -420,6 +420,12 @@ void h_lr_parse_start(HSuspendedParser *s)
   s->backend_state = engine;
 }
 
+// cf. comment before run_trace in regex.c
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunknown-pragmas"
+#pragma GCC diagnostic ignored "-Wclobbered"
+#endif
 bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream)
 {
   HLREngine *engine = s->backend_state;
@@ -457,6 +463,10 @@ bool h_lr_parse_chunk(HSuspendedParser* s, HInputStream *stream)
   *stream = engine->input;
   return !run;  // done if engine no longer running
 }
+// Reenable -Wclobber
+#if defined(__GNUC__) && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
 
 HParseResult *h_lr_parse_finish(HSuspendedParser *s)
 {
diff --git a/src/backends/packrat.c b/src/backends/packrat.c
index e561b7ef2e00fa970f17b2889137a22640e20903..289803703bf32a13c8a7922fdc919e7085b2cda2 100644
--- a/src/backends/packrat.c
+++ b/src/backends/packrat.c
@@ -3,6 +3,17 @@
 #include "../internal.h"
 #include "../parsers/parser_internal.h"
 
+/* #define DETAILED_PACKRAT_STATISTICS */
+
+#ifdef DETAILED_PACKRAT_STATISTICS
+static size_t packrat_hash_count = 0;
+static size_t packrat_hash_bytes = 0;
+static size_t packrat_cmp_count = 0;
+static size_t packrat_cmp_bytes = 0;
+#endif
+
+static uint32_t cache_key_hash(const void* key);
+
 // short-hand for creating lowlevel parse cache values (parse result case)
 static
 HParserCacheValue * cached_result(HParseState *state, HParseResult *result) {
@@ -56,31 +67,38 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa
   return tmp_res;
 }
 
-HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) {
-  HParserCacheValue *cached = h_hashtable_get(state->cache, k);
+HParserCacheValue* recall(HParserCacheKey *k, HParseState *state, HHashValue keyhash) {
+  HParserCacheValue *cached = h_hashtable_get_precomp(state->cache, k, keyhash);
   HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos);
-  if (!head) { // No heads found
+
+  if (!head) {
+    /* No heads found */
     return cached;
-  } else { // Some heads found
+  } else {
+    /* Some heads found */
     if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) {
-      // Nothing in the cache, and the key parser is not involved
+      /* Nothing in the cache, and the key parser is not involved */
       cached = cached_result(state, NULL);
       cached->input_stream = k->input_pos;
     }
     if (h_slist_find(head->eval_set, k->parser)) {
-      // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. 
+      /*
+       * Something is in the cache, and the key parser is in the eval set.
+       * Remove the key parser from the eval set of the head.
+       */
       head->eval_set = h_slist_remove_all(head->eval_set, k->parser);
       HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser);
-      // update the cache
+      /* update the cache */
       if (!cached) {
-	cached = cached_result(state, tmp_res);
-	h_hashtable_put(state->cache, k, cached);
+        cached = cached_result(state, tmp_res);
+        h_hashtable_put_precomp(state->cache, k, cached, keyhash);
       } else {
-	cached->value_type = PC_RIGHT;
-	cached->right = tmp_res;
-	cached->input_stream = state->input_stream;
+        cached->value_type = PC_RIGHT;
+        cached->right = tmp_res;
+        cached->input_stream = state->input_stream;
       }
     }
+
     return cached;
   }
 }
@@ -180,36 +198,50 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab
 /* Warth's recursion. Hi Alessandro! */
 HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
   HParserCacheKey *key = a_new(HParserCacheKey, 1);
+  HHashValue keyhash;
+  HLeftRec *base = NULL;
+  HParserCacheValue *m = NULL, *cached = NULL;
+
   key->input_pos = state->input_stream; key->parser = parser;
-  HParserCacheValue *m = NULL;
+  keyhash = cache_key_hash(key);
+
   if (parser->vtable->higher) {
-    m = recall(key, state);
+    m = recall(key, state, keyhash);
   }
-  // check to see if there is already a result for this object...
+
+  /* check to see if there is already a result for this object... */
   if (!m) {
-    // It doesn't exist, so create a dummy result to cache
-    HLeftRec *base = NULL;
-    // But only cache it now if there's some chance it could grow; primitive parsers can't
+    /*
+     * But only cache it now if there's some chance it could grow; primitive
+     * parsers can't
+     */
     if (parser->vtable->higher) {
       base = a_new(HLeftRec, 1);
       base->seed = NULL; base->rule = parser; base->head = NULL;
       h_slist_push(state->lr_stack, base);
-      // cache it
-      h_hashtable_put(state->cache, key, cached_lr(state, base));
-      // parse the input
+      /* cache it */
+      h_hashtable_put_precomp(state->cache, key,
+                              cached_lr(state, base), keyhash);
     }
+
+    /* parse the input */
     HParseResult *tmp_res = perform_lowlevel_parse(state, parser);
     if (parser->vtable->higher) {
-      // the base variable has passed equality tests with the cache
+      /* the base variable has passed equality tests with the cache */
       h_slist_pop(state->lr_stack);
-      // update the cached value to our new position
-      HParserCacheValue *cached = h_hashtable_get(state->cache, key);
+      /* update the cached value to our new position */
+      cached = h_hashtable_get_precomp(state->cache, key, keyhash);
       assert(cached != NULL);
       cached->input_stream = state->input_stream;
     }
-    // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one
+
+    /*
+     * setupLR, used below, mutates the LR to have a head if appropriate,
+     * so we check to see if we have one
+     */
     if (!base || NULL == base->head) {
-      h_hashtable_put(state->cache, key, cached_result(state, tmp_res));
+      h_hashtable_put_precomp(state->cache, key,
+                              cached_result(state, tmp_res), keyhash);
       return tmp_res;
     } else {
       base->seed = tmp_res;
@@ -217,7 +249,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
       return res;
     }
   } else {
-    // it exists!
+    /* it exists! */
     state->input_stream = m->input_stream;
     if (PC_LEFT == m->value_type) {
       setupLR(parser, state, m->left);
@@ -239,17 +271,34 @@ void h_packrat_free(HParser *parser) {
 }
 
 static uint32_t cache_key_hash(const void* key) {
+#ifdef DETAILED_PACKRAT_STATISTICS
+  ++(packrat_hash_count);
+  packrat_hash_bytes += sizeof(HParserCacheKey);
+#endif
   return h_djbhash(key, sizeof(HParserCacheKey));
 }
+
 static bool cache_key_equal(const void* key1, const void* key2) {
+#ifdef DETAILED_PACKRAT_STATISTICS
+  ++(packrat_cmp_count);
+  packrat_cmp_bytes += sizeof(HParserCacheKey);
+#endif
   return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0;
 }
 
 static uint32_t pos_hash(const void* key) {
+#ifdef DETAILED_PACKRAT_STATISTICS
+  ++(packrat_hash_count);
+  packrat_hash_bytes += sizeof(HInputStream);
+#endif
   return h_djbhash(key, sizeof(HInputStream));
 }
 
 static bool pos_equal(const void* key1, const void* key2) {
+#ifdef DETAILED_PACKRAT_STATISTICS
+  ++(packrat_cmp_count);
+  packrat_cmp_bytes += sizeof(HInputStream);
+#endif
   return memcmp(key1, key2, sizeof(HInputStream)) == 0;
 }
 
@@ -271,6 +320,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
   parse_state->lr_stack = h_slist_new(arena);
   parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash);
   parse_state->arena = arena;
+  parse_state->symbol_table = NULL;
   HParseResult *res = h_do_parse(parser, parse_state);
   h_slist_free(parse_state->lr_stack);
   h_hashtable_free(parse_state->recursion_heads);
diff --git a/src/backends/regex.c b/src/backends/regex.c
index 2b6cf95d77098dc5e4561b9ac5a01a47610d8845..fee5a5ed2bb40f862ff9204f6c65e536cf109db9 100644
--- a/src/backends/regex.c
+++ b/src/backends/regex.c
@@ -7,6 +7,8 @@
 
 #undef a_new
 #define a_new(typ, count) a_new_(arena, typ, count)
+#undef a_new0
+#define a_new0(typ, count) a_new0_(arena, typ, count)
 // Stack VM
 typedef enum HSVMOp_ {
   SVM_PUSH, // Push a mark. There is no VM insn to push an object.
@@ -67,13 +69,13 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
     goto end;
 
   HSArray *heads_n = heads_a, *heads_p = heads_b;
-  uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
-  HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
+  uint8_t *insn_seen = a_new0(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
+  HRVMThread *ip_queue = a_new0(HRVMThread, prog->length);
   size_t ipq_top;
 
 #define THREAD ip_queue[ipq_top-1]
 #define PUSH_SVM(op_, arg_) do { \
-	  HRVMTrace *nt = a_new(HRVMTrace, 1); \
+	  HRVMTrace *nt = a_new0(HRVMTrace, 1); \
 	  nt->arg = (arg_);		       \
 	  nt->opcode = (op_);		       \
 	  nt->next = THREAD.trace;	       \
@@ -81,7 +83,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
 	  THREAD.trace = nt;		       \
   } while(0)
 
-  ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread
+  ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new0(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread
   
   size_t off = 0;
   int live_threads = 1; // May be redundant
@@ -223,7 +225,7 @@ bool svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) {
  * the second return; here, the only variables that could matter for
  * are arena and ctx (because they're referenced in "goto fail").
  */
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunknown-pragmas"
 #pragma GCC diagnostic ignored "-Wclobbered"
@@ -257,7 +259,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
       if (!svm_stack_ensure_cap(mm__, ctx, 1)) {
 	goto fail;
       }
-      tmp_res = a_new(HParsedToken, 1);
+      tmp_res = a_new0(HParsedToken, 1);
       tmp_res->token_type = TT_MARK;
       tmp_res->index = cur->input_pos;
       tmp_res->bit_offset = 0;
@@ -288,7 +290,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
       break;
     case SVM_ACCEPT:
       assert(ctx->stack_count <= 1);
-      HParseResult *res = a_new(HParseResult, 1);
+      HParseResult *res = a_new0(HParseResult, 1);
       if (ctx->stack_count == 1) {
 	res->ast = ctx->stack[0];
       } else {
@@ -311,7 +313,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
   return NULL;
 }
 // Reenable -Wclobber
-#ifdef __GNUC__
+#if defined(__GNUC__) && !defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 
@@ -428,9 +430,10 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params
   prog->actions = NULL;
   prog->allocator = mm__;
   if (setjmp(prog->except)) {
-    return false;
+    return 3;
   }
   if (!h_compile_regex(prog, parser)) {
+    // this shouldn't normally fail when isValidRegular() returned true
     h_free(prog->insns);
     h_free(prog->actions);
     h_free(prog);
diff --git a/src/benchmark.c b/src/benchmark.c
index b6a2876fa0a1a85711c610b1d2bc5f1143c77f87..7d56c32e7c17bd204fd76b1cdd5b8d6680aeea57 100644
--- a/src/benchmark.c
+++ b/src/benchmark.c
@@ -46,7 +46,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
   for (backend = PB_MIN; backend <= PB_MAX; backend++) {
     ret->results[backend].backend = backend;
     // Step 1: Compile grammar for given parser...
-    if (h_compile(parser, backend, NULL) == -1) {
+    if (h_compile(parser, backend, NULL)) {
       // backend inappropriate for grammar...
       fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]);
       ret->results[backend].compile_success = false;
diff --git a/src/bindings/jni/ConfigureJNI.py b/src/bindings/jni/ConfigureJNI.py
new file mode 100644
index 0000000000000000000000000000000000000000..573bd682192e88618cba6c9b847128a98d6e08db
--- /dev/null
+++ b/src/bindings/jni/ConfigureJNI.py
@@ -0,0 +1,99 @@
+#!python 
+
+from __future__ import absolute_import, division, print_function
+
+import os
+import sys
+
+def walkDirs(path):
+    """helper function to get a list of all subdirectories"""
+    def addDirs(pathlist, dirname, names):
+        """internal function to pass to os.walk"""
+        print("in addDirs")
+        for n in names:
+            f = os.path.join(dirname, n)
+            if os.path.isdir(f):
+                pathlist.append(f)
+    pathlist = [path]
+    os.walk(path, addDirs, pathlist)
+    print(pathlist)
+    return pathlist
+
+def ConfigureJNI(env):
+    """Configure the given environment for compiling Java Native Interface
+       c or c++ language files."""
+    
+    print( "Configuring JNI includes")
+
+    if not env.get('JAVAC'):
+        print( "The Java compiler must be installed and in the current path.")
+        return 0
+
+    # first look for a shell variable called JAVA_HOME
+    java_base = os.environ.get('JAVA_HOME')
+    if not java_base:
+        if sys.platform == 'darwin':
+            # Apple's OS X has its own special java base directory
+            java_base = '/System/Library/Frameworks/JavaVM.framework'
+        else:
+            # Search for the java compiler
+            print ("JAVA_HOME environment variable is not set. Searching for java... ")
+            jcdir = os.path.dirname(env.WhereIs('javac'))
+            if not jcdir:
+                print( "not found.")
+                return 0
+            # assuming the compiler found is in some directory like
+            # /usr/jdkX.X/bin/javac, java's home directory is /usr/jdkX.X
+            java_base = os.path.join(jcdir, "..")
+            print( "found.")
+
+    if sys.platform == 'cygwin':
+        # Cygwin and Sun Java have different ideas of how path names
+        # are defined. Use cygpath to convert the windows path to
+        # a cygwin path. i.e. C:\jdkX.X to /cygdrive/c/jdkX.X
+        java_base = os.popen("cygpath -up '"+java_base+"'").read().replace( \
+                 '\n', '')
+
+    if sys.platform == 'darwin':
+        # Apple does not use Sun's naming convention
+        java_headers = [os.path.join(java_base, 'Headers')]
+        java_libs = [os.path.join(java_base, 'Libraries')]
+    else:
+        # windows and linux
+        java_headers = [os.path.join(java_base, 'include')]
+        java_libs = [os.path.join(java_base, 'lib')]
+        # Sun's windows and linux JDKs keep system-specific header
+        # files in a sub-directory of include
+        if java_base == '/usr' or java_base == '/usr/local':
+            # too many possible subdirectories. Just use defaults
+            java_headers.append(os.path.join(java_headers[0], 'win32'))
+            java_headers.append(os.path.join(java_headers[0], 'linux'))
+            java_headers.append(os.path.join(java_headers[0], 'solaris'))
+        else:
+            # add all subdirs of 'include'. The system specific headers
+            # should be in there somewhere
+            java_headers = walkDirs(java_headers[0])
+
+    if not any(os.path.exists(os.path.join(path, 'jni.h'))
+               for path in java_headers):
+        print("Can't find jni.h in %s" % java_headers)
+        return 0
+
+    # add Java's include and lib directory to the environment
+    java_headers.append(os.path.join(java_headers[0], 'linux'))
+    env.Append(CPPPATH = java_headers)
+    env.Append(LIBPATH = java_libs)
+
+    # add any special platform-specific compilation or linking flags
+    if sys.platform == 'darwin':
+        env.Append(SHLINKFLAGS = '-dynamiclib -framework JavaVM')
+        env['SHLIBSUFFIX'] = '.jnilib'
+    elif sys.platform == 'cygwin':
+        env.Append(CCFLAGS = '-mno-cygwin')
+        env.Append(SHLINKFLAGS = '-mno-cygwin -Wl,--kill-at')
+
+    # Add extra potentially useful environment variables
+    env['JAVA_HOME'] = java_base
+    env['JNI_CPPPATH'] = java_headers
+    env['JNI_LIBPATH'] = java_libs
+    return 1
\ No newline at end of file
diff --git a/jni/Example.java b/src/bindings/jni/Example.java
similarity index 75%
rename from jni/Example.java
rename to src/bindings/jni/Example.java
index 4e6d76825aa64ddeb9e916ea0596e457ff2751f0..52f89e27b42b1ee0f28658a5eba9df3a08674749 100644
--- a/jni/Example.java
+++ b/src/bindings/jni/Example.java
@@ -84,6 +84,20 @@ public static void main(String args[])
     Parser i3parsers[] = {Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()};
     handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()), i3, i3.length));
 
+    out("permutation");
+    byte ch3[] = {(byte) 'a', (byte) 'b', (byte) 'c'};
+    handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'a'), Hammer.ch((byte)'b'), Hammer.ch((byte)'c')), ch3, ch3.length));
+    handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'b'), Hammer.ch((byte)'a'), Hammer.ch((byte)'c')), ch3, ch3.length));
+    
+    out("skip");
+    byte ch6[] = {(byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'};
+    handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.skip((int)32), Hammer.ch((byte)'f')), ch6, ch6.length));
+    
+    out("seek");
+    final int SEEK_SET = 0;	/* Seek from beginning of file.  */
+    //final int SEEK_CUR = 1;	/* Seek from current position.  */
+    //final int SEEK_END = 2;	/* Seek from end of file.  */
+    handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.seek((int)40, (int)SEEK_SET), Hammer.ch((byte)'f')), ch6, ch6.length));
     
 }
 
diff --git a/jni/Makefile b/src/bindings/jni/Makefile
similarity index 87%
rename from jni/Makefile
rename to src/bindings/jni/Makefile
index 85be973388223c6b9332c0d72a54f8a283e2c899..ea257872242da2e27c70eef26825137d4247d154 100644
--- a/jni/Makefile
+++ b/src/bindings/jni/Makefile
@@ -5,17 +5,18 @@ CSOURCES := com_upstandinghackers_hammer_Hammer.c com_upstandinghackers_hammer_P
 # ls *.h *.o *.so com/upstandinghackers/hammer/*.class | grep -v jhammer.h | tr '\n' ' '; replace single $ with $$
 OUTPUTS := com/upstandinghackers/hammer/Action.class com/upstandinghackers/hammer/Hammer.class com_upstandinghackers_hammer_Hammer.h com_upstandinghackers_hammer_Hammer.o com/upstandinghackers/hammer/Hammer\$TokenType.class com_upstandinghackers_hammer_Hammer_TokenType.h com/upstandinghackers/hammer/ParsedToken.class com_upstandinghackers_hammer_ParsedToken.h com_upstandinghackers_hammer_ParsedToken.o com/upstandinghackers/hammer/Parser.class com/upstandinghackers/hammer/ParseResult.class com_upstandinghackers_hammer_ParseResult.h com_upstandinghackers_hammer_ParseResult.o com_upstandinghackers_hammer_Parser.h com_upstandinghackers_hammer_Parser.o com/upstandinghackers/hammer/Predicate.class libjhammer.so
 
-TOPLEVEL := ../
+TOPLEVEL := ../../../
 
 JC=javac
 JH=javah
 CP=com/upstandinghackers/hammer
 PACKAGE=com.upstandinghackers.hammer
 
-include ../common.mk
+include ../../../common.mk
 
-JNI_INCLUDE := /usr/lib/jvm/java-6-openjdk/include/
-CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE) 
+JNI_INCLUDE := /usr/lib/jvm/java-8-oracle/include/
+JNI_INCLUDE_LINUX := /usr/lib/jvm/java-8-oracle/include/linux
+CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE) -I $(JNI_INCLUDE_LINUX)
 
 %.java: $(call ifsilent,| $(HUSH))
 	$(call hush, "Compiling Java source $@") $(JC) $(CP)/$@
@@ -23,7 +24,7 @@ CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
 all: javacc prepare compile link
 
 link: compile
-	$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../src/*.o ../src/backends/*.o ../src/parsers/*.o
+	$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../../../src/*.o ../../../src/backends/*.o ../../../src/parsers/*.o
 
 $(CSOURCES): prepare
 	$(call hush, "Compiling $@") $(CC) -c $(CFLAGS) $@
diff --git a/jni/NOTES b/src/bindings/jni/NOTES
similarity index 100%
rename from jni/NOTES
rename to src/bindings/jni/NOTES
diff --git a/src/bindings/jni/SConscript b/src/bindings/jni/SConscript
new file mode 100644
index 0000000000000000000000000000000000000000..81582868ff07bff5d46d162a9d8ed2936dd52e28
--- /dev/null
+++ b/src/bindings/jni/SConscript
@@ -0,0 +1,42 @@
+# -*- python -*-
+
+from __future__ import absolute_import, division, print_function
+
+import os, os.path
+import sys
+Import('env libhammer_shared testruns targets')
+from src.bindings.jni.ConfigureJNI import ConfigureJNI
+
+javaenv = env.Clone()
+
+if not ConfigureJNI(javaenv):
+    print("Java Native Interface is required... Exiting")
+    Exit(0)
+
+
+javaenv.Append(CPPPATH=[".", "../.."],
+              LIBS=['hammer'],
+              LIBPATH=["../.."])
+
+# compile java classes
+jni_classes = javaenv.Java(".", "#src/bindings/jni/com")
+
+print(jni_classes)
+jni_headers = javaenv.JavaH(".", jni_classes)
+print(jni_headers)
+Default(jni_classes)
+Default(jni_headers)
+
+#print(javaenv.Dump())
+
+shlib_env = env.Clone(CPPPATH=javaenv['JNI_CPPPATH'] + ['../..'],
+              LIBS=['hammer'],
+              LIBPATH=["../.."])
+csources = ['com_upstandinghackers_hammer_Hammer.c',
+			'com_upstandinghackers_hammer_ParsedToken.c',
+			'com_upstandinghackers_hammer_Parser.c',
+			'com_upstandinghackers_hammer_ParseResult.c']
+
+libjhammer_shared = shlib_env.SharedLibrary('libjhammer', csources)
+Default(libjhammer_shared)
+
diff --git a/jni/com/upstandinghackers/hammer/Action.java b/src/bindings/jni/com/upstandinghackers/hammer/Action.java
similarity index 100%
rename from jni/com/upstandinghackers/hammer/Action.java
rename to src/bindings/jni/com/upstandinghackers/hammer/Action.java
diff --git a/jni/com/upstandinghackers/hammer/Hammer.java b/src/bindings/jni/com/upstandinghackers/hammer/Hammer.java
similarity index 85%
rename from jni/com/upstandinghackers/hammer/Hammer.java
rename to src/bindings/jni/com/upstandinghackers/hammer/Hammer.java
index 3e06a91eca4b15eebbfe28c99547829a3c2d6068..2b77a564af480140a21b8d2aa992c49d6bf2bcf4 100644
--- a/jni/com/upstandinghackers/hammer/Hammer.java
+++ b/src/bindings/jni/com/upstandinghackers/hammer/Hammer.java
@@ -53,10 +53,12 @@ public class Hammer
     public static native Parser middle(Parser p, Parser x, Parser q);
 //    public static native Parser action(Parser p, Action a);
     public static native Parser in(byte[] charset, int length);
+    public static native Parser notIn(byte[] charset, int length);
     public static native Parser endP();
     public static native Parser nothingP();
     public static native Parser sequence(Parser... parsers);
     public static native Parser choice(Parser... parsers);
+    public static native Parser permutation(Parser... parsers);
     public static native Parser butNot(Parser p1, Parser p2);
     public static native Parser difference(Parser p1, Parser p2);
     public static native Parser xor(Parser p1, Parser p2);
@@ -73,4 +75,11 @@ public class Hammer
     public static native Parser and(Parser p);
     public static native Parser not(Parser p);
     public static native Parser indirect();
+//    public static native Parser bindIndirect(Parser indirect, Parser inner);
+    public static native Parser withEndianess(byte endianess, Parser p);
+//    public static native Parser bind(Parser p, HContinuation k, void *env);
+    public static native Parser skip(int n);
+    public static native Parser seek(int n, int whence);
+    public static native Parser tell();
+    
 }
diff --git a/jni/com/upstandinghackers/hammer/ParseResult.java b/src/bindings/jni/com/upstandinghackers/hammer/ParseResult.java
similarity index 100%
rename from jni/com/upstandinghackers/hammer/ParseResult.java
rename to src/bindings/jni/com/upstandinghackers/hammer/ParseResult.java
diff --git a/jni/com/upstandinghackers/hammer/ParsedToken.java b/src/bindings/jni/com/upstandinghackers/hammer/ParsedToken.java
similarity index 100%
rename from jni/com/upstandinghackers/hammer/ParsedToken.java
rename to src/bindings/jni/com/upstandinghackers/hammer/ParsedToken.java
diff --git a/jni/com/upstandinghackers/hammer/Parser.java b/src/bindings/jni/com/upstandinghackers/hammer/Parser.java
similarity index 100%
rename from jni/com/upstandinghackers/hammer/Parser.java
rename to src/bindings/jni/com/upstandinghackers/hammer/Parser.java
diff --git a/jni/com/upstandinghackers/hammer/Predicate.java b/src/bindings/jni/com/upstandinghackers/hammer/Predicate.java
similarity index 100%
rename from jni/com/upstandinghackers/hammer/Predicate.java
rename to src/bindings/jni/com/upstandinghackers/hammer/Predicate.java
diff --git a/jni/com_upstandinghackers_hammer_Hammer.c b/src/bindings/jni/com_upstandinghackers_hammer_Hammer.c
similarity index 83%
rename from jni/com_upstandinghackers_hammer_Hammer.c
rename to src/bindings/jni/com_upstandinghackers_hammer_Hammer.c
index f83414bc8bd9c443a338e16300f4eb3bd3637d37..7087792789092167c4c3a1adcfa4487bbaf01a5e 100644
--- a/jni/com_upstandinghackers_hammer_Hammer.c
+++ b/src/bindings/jni/com_upstandinghackers_hammer_Hammer.c
@@ -157,6 +157,13 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_in
 }
 
 
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_notIn
+  (JNIEnv *env, jclass class, jbyteArray charset, jint length)
+{
+    RETURNWRAP(env, h_not_in((uint8_t*) ((*env)->GetByteArrayElements(env, charset, NULL)), (size_t)length));
+}
+
+
 JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_endP
   (JNIEnv *env, jclass class)
 {
@@ -227,6 +234,34 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_choice
 }
 
 
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_permutation
+  (JNIEnv *env, jclass class, jobjectArray permutation)
+{
+    jsize length;
+    void **parsers;
+    int i;
+    jobject current;
+    const HParser *result;
+
+    length = (*env)->GetArrayLength(env, permutation);
+    parsers = malloc(sizeof(HParser *)*(length+1));
+    if(NULL==parsers)
+    {
+        return NULL;
+    }
+
+    for(i=0; i<length; i++)
+    {
+        current = (*env)->GetObjectArrayElement(env, permutation, (jsize)i);
+        parsers[i] = UNWRAP(env, current);
+    }
+    parsers[length] = NULL;
+
+    result = h_permutation__a(parsers);
+    RETURNWRAP(env, result);
+}
+
+
 JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_butNot
   (JNIEnv *env, jclass class, jobject p, jobject q)
 {
@@ -332,4 +367,29 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_indirect
 }
 
 
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_with_endianness
+  (JNIEnv *env, jclass class, jbyte endianess, jobject p)
+{
+    RETURNWRAP(env, h_with_endianness((char) endianess, UNWRAP(env, p)));
+}
 
+
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_skip
+  (JNIEnv *env, jclass class, jint n)
+{
+    RETURNWRAP(env, h_skip((size_t) n));
+}
+
+
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_seek
+  (JNIEnv *env, jclass class, jint offset, jint whence)
+{
+    RETURNWRAP(env, h_seek((ssize_t) offset, (int) whence));
+}
+
+
+JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_tell
+  (JNIEnv *env, jclass class)
+{
+    RETURNWRAP(env, h_tell());
+}
diff --git a/jni/com_upstandinghackers_hammer_ParseResult.c b/src/bindings/jni/com_upstandinghackers_hammer_ParseResult.c
similarity index 100%
rename from jni/com_upstandinghackers_hammer_ParseResult.c
rename to src/bindings/jni/com_upstandinghackers_hammer_ParseResult.c
diff --git a/jni/com_upstandinghackers_hammer_ParsedToken.c b/src/bindings/jni/com_upstandinghackers_hammer_ParsedToken.c
similarity index 100%
rename from jni/com_upstandinghackers_hammer_ParsedToken.c
rename to src/bindings/jni/com_upstandinghackers_hammer_ParsedToken.c
diff --git a/jni/com_upstandinghackers_hammer_Parser.c b/src/bindings/jni/com_upstandinghackers_hammer_Parser.c
similarity index 100%
rename from jni/com_upstandinghackers_hammer_Parser.c
rename to src/bindings/jni/com_upstandinghackers_hammer_Parser.c
diff --git a/jni/jhammer.h b/src/bindings/jni/jhammer.h
similarity index 100%
rename from jni/jhammer.h
rename to src/bindings/jni/jhammer.h
diff --git a/src/bindings/lua/hammer.lua b/src/bindings/lua/hammer.lua
new file mode 100644
index 0000000000000000000000000000000000000000..2ee1656a098633801610a4ee181366d13dd69d10
--- /dev/null
+++ b/src/bindings/lua/hammer.lua
@@ -0,0 +1,371 @@
+local ffi = require("ffi")
+ffi.cdef[[
+typedef enum HParserBackend_ {
+  PB_MIN = 0,
+  PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
+  PB_REGULAR,
+  PB_LLk,
+  PB_LALR,
+  PB_GLR,
+  PB_MAX = PB_GLR
+} HParserBackend;
+
+typedef enum HTokenType_ {
+  TT_NONE = 1,
+  TT_BYTES = 2,
+  TT_SINT = 4,
+  TT_UINT = 8,
+  TT_SEQUENCE = 16,
+  TT_RESERVED_1, // reserved for backend-specific internal use
+  TT_ERR = 32,
+  TT_USER = 64,
+  TT_MAX
+} HTokenType;
+
+typedef struct HBytes_ {
+  const uint8_t *token;
+  size_t len;
+} HBytes;
+
+typedef struct HArena_ HArena ; // hidden implementation
+
+typedef struct HCountedArray_ {
+  size_t capacity;
+  size_t used;
+  HArena * arena;
+  struct HParsedToken_ **elements;
+} HCountedArray;
+
+typedef struct HParsedToken_ {
+  HTokenType token_type;
+  union {
+    HBytes bytes;
+    int64_t sint;
+    uint64_t uint;
+    double dbl;
+    float flt;
+    HCountedArray *seq; // a sequence of HParsedToken's
+    void *user;
+  };
+  size_t index;
+  size_t bit_length;
+  char bit_offset;
+} HParsedToken;
+
+typedef struct HParseResult_ {
+  const HParsedToken *ast;
+  int64_t bit_length;
+  HArena * arena;
+} HParseResult;
+
+typedef struct HParserVtable_ HParserVtable;
+typedef struct HCFChoice_ HCFChoice;
+
+typedef struct HParser_ {
+  const HParserVtable *vtable;
+  HParserBackend backend;
+  void* backend_data;
+  void *env;
+  HCFChoice *desugared;
+} HParser;
+
+typedef struct HAllocator_ HAllocator;
+
+typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data);
+typedef bool (*HPredicate)(HParseResult *p, void* user_data);
+typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
+
+HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);
+HParser* h_token(const uint8_t *str, const size_t len);
+HParser* h_ch(const uint8_t c);
+HParser* h_ch_range(const uint8_t lower, const uint8_t upper);
+HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
+HParser* h_bits(size_t len, bool sign);
+HParser* h_int64();
+HParser* h_int32();
+HParser* h_int16();
+HParser* h_int8();
+HParser* h_uint64();
+HParser* h_uint32();
+HParser* h_uint16();
+HParser* h_uint8();
+HParser* h_whitespace(const HParser* p);
+HParser* h_left(const HParser* p, const HParser* q);
+HParser* h_right(const HParser* p, const HParser* q);
+HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
+HParser* h_action(const HParser* p, const HAction a, void* user_data);
+HParser* h_in(const uint8_t *charset, size_t length);
+HParser* h_not_in(const uint8_t *charset, size_t length);
+HParser* h_end_p();
+HParser* h_nothing_p();
+HParser* h_sequence(HParser* p, ...);
+HParser* h_choice(HParser* p, ...);
+HParser* h_permutation(HParser* p, ...);
+HParser* h_butnot(const HParser* p1, const HParser* p2);
+HParser* h_difference(const HParser* p1, const HParser* p2);
+HParser* h_xor(const HParser* p1, const HParser* p2);
+HParser* h_many(const HParser* p);
+HParser* h_many1(const HParser* p);
+HParser* h_repeat_n(const HParser* p, const size_t n);
+HParser* h_optional(const HParser* p);
+HParser* h_ignore(const HParser* p);
+HParser* h_sepBy(const HParser* p);
+HParser* h_sepBy1(const HParser* p);
+HParser* h_epsilon_p();
+HParser* h_length_value(const HParser* length, const HParser* value);
+HParser* h_attr_bool(const HParser* p, HPredicate pred, void* user_data);
+HParser* h_and(const HParser* p);
+HParser* h_not(const HParser* p);
+HParser* h_indirect(const HParser* p);
+void h_bind_indirect(HParser* indirect, const HParser* inner);
+HParser* h_with_endianness(char endianness, const HParser* p);
+HParser* h_put_value(const HParser* p, const char* name);
+HParser* h_get_value(const char* name);
+HParser* h_bind(const HParser *p, HContinuation k, void *env);
+
+int h_compile(HParser* parser, HParserBackend backend, const void* params);
+
+static const uint8_t BYTE_BIG_ENDIAN = 0x1;
+static const uint8_t BIT_BIG_ENDIAN = 0x2;
+static const uint8_t BYTE_LITTLE_ENDIAN = 0x0;
+static const uint8_t BIT_LITTLE_ENDIAN = 0x0;
+]]
+local h = ffi.load("hammer")
+
+local function helper(a, n, b, ...)
+  if   n == 0 then return a
+  else             return b, helper(a, n-1, ...) end
+end
+local function append(a, ...)
+  return helper(a, select('#', ...), ...)
+end
+
+local mt = {
+  __index = {
+    parse = function(p, str) return h.h_parse(p, str, #str) end,
+  },
+}
+local hammer = {}
+hammer.parser = ffi.metatype("HParser", mt)
+
+local counted_array
+local arr_mt = {
+  __index = function(table, key)
+    return table.elements[key]
+  end,
+  __len = function(table) return table.used end,
+  __ipairs = function(table)
+    local i, n = 0, #table
+    return function()
+      i = i + 1
+      if i <= n then
+        return i, table.elements[i]
+      end
+    end
+  end,
+  __call = function(self)
+    ret = {}
+    for i, v in ipairs(self)
+      do ret[#ret+1] = v
+    end
+    return ret
+  end
+}
+counted_array = ffi.metatype("HCountedArray", arr_mt)
+
+local bytes_mt = {
+  __call = function(self)
+    local ret = ""
+    for i = 0, tonumber(ffi.cast("uintptr_t", ffi.cast("void *", self.len)))-1
+      do ret = ret .. string.char(self.token[i])
+    end
+    return ret
+  end
+}
+local byte_string = ffi.metatype("HBytes", bytes_mt)
+
+local token_types = ffi.new("HTokenType")
+
+local parsed_token
+local tok_mt = {
+  __call = function(self)
+     if self.token_type == ffi.C.TT_BYTES then
+       return self.bytes()
+     elseif self.token_type == ffi.C.TT_SINT then
+       return tonumber(ffi.cast("intptr_t", ffi.cast("void *", self.sint)))
+     elseif self.token_type == ffi.C.TT_UINT then
+       return tonumber(ffi.cast("uintptr_t", ffi.cast("void *", self.uint)))
+     elseif self.token_type == ffi.C.TT_SEQUENCE then
+       return self.seq()
+     end
+  end
+}
+parsed_token = ffi.metatype("HParsedToken", tok_mt)
+
+function hammer.token(str)
+  return h.h_token(str, #str)
+end
+function hammer.ch(c)
+  if type(c) == "number" then
+    return h.h_ch(c)
+  else
+    return h.h_ch(c:byte())
+  end
+end
+function hammer.ch_range(lower, upper)
+  if type(lower) == "number" and type(upper) == "number" then
+    return h.h_ch_range(lower, upper)
+  -- FIXME this is really not thorough type checking
+  else
+    return h.h_ch_range(lower:byte(), upper:byte())
+  end
+end
+function hammer.int_range(parser, lower, upper)
+  return h.h_int_range(parser, lower, upper)
+end
+function hammer.bits(len, sign)
+  return h.h_bits(len, sign)
+end
+function hammer.int64()
+  return h.h_int64()
+end
+function hammer.int32()
+  return h.h_int32()
+end
+function hammer.int16()
+  return h.h_int16()
+end
+function hammer.int8()
+  return h.h_int8()
+end
+function hammer.uint64()
+  return h.h_uint64()
+end
+function hammer.uint32()
+  return h.h_uint32()
+end
+function hammer.uint16()
+  return h.h_uint16()
+end
+function hammer.uint8()
+  return h.h_uint8()
+end
+function hammer.whitespace(parser)
+  return h.h_whitespace(parser)
+end
+function hammer.left(parser1, parser2)
+  return h.h_left(parser1, parser2)
+end
+function hammer.right(parser1, parser2)
+  return h.h_right(parser1, parser2)
+end
+function hammer.middle(parser1, parser2, parser3)
+  return h.h_middle(parser1, parser2, parser3)
+end
+-- There could also be an overload of this that doesn't
+-- bother with the env pointer, and passes it as NIL by
+-- default, but I'm not going to deal with overloads now.
+function hammer.action(parser, action, user_data)
+  local cb = ffi.cast("HAction", action)
+  return h.h_action(parser, cb, user_data)
+end
+function hammer.in_(charset)
+  local cs = ffi.new("const unsigned char[" .. #charset .. "]", charset)
+  return h.h_in(cs, #charset)
+end
+function hammer.not_in(charset)
+  return h.h_not_in(charset, #charset)
+end
+function hammer.end_p()
+  return h.h_end_p()
+end
+function hammer.nothing_p()
+  return h.h_nothing_p()
+end
+function hammer.sequence(parser, ...)
+  local parsers = append(nil, ...)
+  return h.h_sequence(parser, parsers)
+end
+function hammer.choice(parser, ...)
+  local parsers = append(nil, ...)
+  return h.h_choice(parser, parsers)
+end
+function hammer.permutation(parser, ...)
+  local parsers = append(nil, ...)
+  return h.h_permutation(parser, parsers)
+end
+function hammer.butnot(parser1, parser2)
+  return h.h_butnot(parser1, parser2)
+end
+function hammer.difference(parser1, parser2)
+  return h.h_difference(parser1, parser2)
+end
+function hammer.xor(parser1, parser2)
+  return h.h_xor(parser1, parser2)
+end
+function hammer.many(parser)
+  return h.h_many(parser)
+end
+function hammer.many1(parser)
+  return h.h_many1(parser)
+end
+function hammer.repeat_n(parser, n)
+  return h.h_repeat_n(parser, n)
+end
+function hammer.optional(parser)
+  return h.h_optional(parser)
+end
+function hammer.ignore(parser)
+  return h.h_ignore(parser)
+end
+function hammer.sepBy(parser)
+  return h.h_sepBy(parser)
+end
+function hammer.sepBy1(parser)
+  return h.h_sepBy1(parser)
+end
+function hammer.epsilon_p()
+  return h.h_epsilon_p()
+end
+function hammer.length_value(length, value)
+  return h.h_length_value(length, value)
+end
+function hammer.attr_bool(parser, predicate, user_data)
+  local cb = ffi.cast("HPredicate", predicate)
+  return h.h_attr_bool(parser, cb, user_data)
+end
+function hammer.and_(parser)
+  return h.h_and(parser)
+end
+function hammer.not_(parser)
+  return h.h_not(parser)
+end
+function hammer.indirect(parser)
+  return h.h_indirect(parser)
+end
+function hammer.bind_indirect(indirect, inner)
+  return h.h_bind_indirect(indirect, inner)
+end
+function hammer.with_endianness(endianness, parser)
+  return h.h_with_endianness(endianness, parser)
+end
+function hammer.put_value(parser, name)
+  return h.h_put_value(parser, name)
+end
+function hammer.get_value(name)
+  return h.h_get_value(name)
+end
+function hammer.bind(parser, continuation, env)
+  local cb = ffi.cast("HContinuation", continuation)
+  return h.h_bind(parser, cb, env)
+end
+
+function hammer.compile(parser, backend, params)
+  return h.h_compile(parser, backend, params)
+end
+
+hammer.BYTE_BIG_ENDIAN = 0x1;
+hammer.BIT_BIG_ENDIAN = 0x2;
+hammer.BYTE_LITTLE_ENDIAN = 0x0;
+hammer.BIT_LITTLE_ENDIAN = 0x0;
+return hammer
\ No newline at end of file
diff --git a/src/bindings/lua/test.lua b/src/bindings/lua/test.lua
new file mode 100644
index 0000000000000000000000000000000000000000..cc32ce28ddfa003ea9c6c1eebbe7036e087c3685
--- /dev/null
+++ b/src/bindings/lua/test.lua
@@ -0,0 +1,844 @@
+describe("Combinator tests", function()
+  local hammer
+
+  setup(function()
+    hammer = require("hammer")
+    ffi = require("ffi")
+  end)
+
+  teardown(function()
+    hammer = nil
+  end)
+  
+  describe("Token tests", function()
+    local parser = hammer.token("95" .. string.char(0xa2))
+    it("parses a token", function()
+      local ret = parser:parse("95" .. string.char(0xa2))
+      assert.are.same("95" .. string.char(0xa2), ret.ast.bytes())
+    end)
+    it("does not parse an incomplete token", function()
+      local ret = parser:parse("95")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Char tests", function()
+    local parser = hammer.ch(0xa2)
+    it("parses a matching char", function()
+      local ret = parser:parse(string.char(0xa2))
+      assert.are.same(string.char(0xa2), string.char(ret.ast()))
+    end)
+    it("rejects a non-matching char", function()
+      local ret = parser:parse(string.char(0xa3))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Char range tests", function()
+    local parser = hammer.ch_range("a", "c")
+    it("parses a char in the range", function()
+      local ret = parser:parse("b")
+      assert.are.same("b", string.char(ret.ast()))
+    end)
+    it("rejects a char outside the range", function()
+      local ret = parser:parse("d")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Signed 64-bit int tests", function()
+    local parser = hammer.int64()
+    it("parses a valid 64-bit int", function()
+      local ret = parser:parse(string.char(0xff, 0xff, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00))
+      assert.are.same(-0x200000000, ret.ast.sint)
+    end)
+    it("does not parse an invalid 64-bit int", function()
+      local ret = parser:parse(string.char(0xff, 0xff, 0xff, 0xfe, 0x00, 0x00, 0x00))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Signed 32-bit int tests", function()
+    local parser = hammer.int32()
+    it("parses a valid 32-bit int", function()
+      local ret = parser:parse(string.char(0xff, 0xfe, 0x00, 0x00))
+      assert.are.same(-0x20000, ret.ast.sint)
+    end)
+    it("does not parse an invalid 32-bit int", function()
+      local ret = parser:parse(string.char(0xff, 0xfe, 0x00))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Signed 16-bit int tests", function()
+    local parser = hammer.int16()
+    it("parses a valid 16-bit int", function()
+      local ret = parser:parse(string.char(0xfe, 0x00))
+      assert.are.same(-0x200, ret.ast.sint)
+    end)
+    it("does not parse an invalid 16-bit int", function()
+      local ret = parser:parse(string.char(0xfe))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Signed 8-bit int tests", function()
+    local parser = hammer.int8()
+    it("parses a valid 8-bit int", function()
+      local ret = parser:parse(string.char(0x88))
+      assert.are.same(-0x78, ret.ast.sint)
+    end)
+    it("does not parse an invalid 8-bit int", function()
+      local ret = parser:parse("")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Unsigned 64-bit int tests", function()
+    local parser = hammer.uint64()
+    it("parses a valid 64-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00))
+      assert.are.same(0x200000000, ret.ast())
+    end)
+    it("does not parse an invalid 64-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Unsigned 32-bit int tests", function()
+    local parser = hammer.uint32()
+    it("parses a valid 32-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x00, 0x02, 0x00, 0x00))
+      assert.are.same(0x20000, ret.ast())
+    end)
+    it("does not parse an invalid 32-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x00, 0x02, 0x00))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Unsigned 16-bit int tests", function()
+    local parser = hammer.uint16()
+    it("parses a valid 16-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x02, 0x00))
+      assert.are.same(0x200, ret.ast())
+    end)
+    it("does not parse an invalid 16-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x02))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Unsigned 8-bit int tests", function()
+    local parser = hammer.uint8()
+    it("parses a valid 8-bit unsigned int", function()
+      local ret = parser:parse(string.char(0x78))
+      assert.are.same(0x78, ret.ast())
+    end)
+    it("does not parse an invalid 8=bit unsigned int", function()
+      local ret = parser:parse("")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Integer range tests", function()
+    local parser = hammer.int_range(hammer.uint8(), 3, 10)
+    it("parses a value in the range", function()
+      local ret = parser:parse(string.char(0x05))
+      assert.are.same(5, ret.ast())
+    end)
+    it("does not parse a value outside the range", function()
+      local ret = parser:parse(string.char(0xb))
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Whitespace tests", function()
+    local parser = hammer.whitespace(hammer.ch("a"))
+    local parser2 = hammer.whitespace(hammer.end_p())
+    it("parses a string with no whitespace", function()
+      local ret = parser:parse("a")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("parses a string with a leading space", function()
+      local ret = parser:parse(" a")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("parses a string with leading spaces", function()
+      local ret = parser:parse("  a")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("parses a string with a leading tab", function()
+      local ret = parser:parse("\ta")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("does not parse a string with a leading underscore", function()
+      local ret = parser:parse("_a")
+      assert.is_falsy(ret)
+    end)
+    it("parses an empty string", function()
+      local ret = parser2:parse("")
+      assert.are.same(nil, ret.ast)
+    end)
+    it("parses a whitespace-only string", function()
+      local ret = parser2:parse("  ")
+      assert.are.same(nil, ret.ast)
+    end)
+    it("does not parse a string with leading whitespace and a trailing character", function()
+      local ret = parser2:parse("  x")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Leftmost-parser tests", function()
+    local parser = hammer.left(hammer.ch("a"), hammer.ch(" "))
+    it("parses the leftmost character", function()
+      local ret = parser:parse("a ")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("does not parse a string that is too short", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string that starts with the wrong character", function()
+      local ret = parser:parse(" ")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string with the wrong character in the second place", function()
+      local ret = parser:parse("ab")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Rightmost-parser tests", function()
+    local parser = hammer.right(hammer.ch(" "), hammer.ch("a"))
+    it("parses the rightmost character", function()
+      local ret = parser:parse(" a")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("does not parse a string that starts with the wrong character", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string that is too short", function()
+      local ret = parser:parse(" ")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string with the characters in the wrong order", function()
+      local ret = parser:parse("ba")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Middle-parser tests", function()
+    local parser = hammer.middle(hammer.ch(" "), hammer.ch("a"), hammer.ch(" "))
+    it("parses the middle character", function()
+      local ret = parser:parse(" a ")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("does not parse a string that is too short", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+      ret = parser:parse(" ")
+      assert.is_falsy(ret)
+      ret = parser:parse(" a")
+      assert.is_falsy(ret)
+      ret = parser:parse("a ")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string with the wrong character in the middle", function()
+      ret = parser:parse(" b ")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string that starts with the wrong character", function()
+      ret = parser:parse("ba ")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string that ends with the wrong character", function()
+      ret = parser:parse(" ab")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Semantic action tests", function()
+    local function upcase(result, user_data)
+      local chars = result.ast()
+      local ret = ""
+      for i, v in ipairs(chars)
+        do ret = ret .. string.char(v()):upper()
+      end
+      return ffi.new("HParsedToken", {hammer.TT_BYTES, ret})
+    end
+    local parser = hammer.action(hammer.sequence(hammer.choice(hammer.ch("a"), hammer.ch("A")), hammer.choice(hammer.ch("b"), hammer.ch("B"))), upcase, nil)
+    it("converts a lowercase 'ab' to uppercase", function()
+      local ret = parser:parse("ab")
+      assert.are.same("AB", ret.ast())
+    end)
+    it("accepts an uppercase 'AB' unchanged", function()
+      local ret = parser:parse("AB")
+      assert.are.same("AB", ret.ast())
+    end)
+    it("rejects strings that don't match the underlying parser", function()
+      local ret = parser:parse("XX")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Character set membership tests", function()
+    local parser = hammer.in_({"a", "b", "c"})
+    it("parses a character that is in the included set", function()
+      local ret = parser:parse("b")
+      assert.are.same("b", string.char(ret.ast()))
+    end)
+    it("does not parse a character that is not in the included set", function()
+      local ret = parser:parse("d")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Character set non-membership tests", function()
+    local parser = hammer.not_in({"a", "b", "c"})
+    it("parses a character that is not in the excluded set", function()
+      local ret = parser:parse("d")
+      assert.are.same("d", string.char(ret.ast()))
+    end)
+    it("does not parse a character that is in the excluded set", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("End-of-input tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.end_p())
+    it("parses a string that ends where it is expected to", function()
+      local ret = parser:parse("a")
+      assert.are.same({"a"}, ret.ast())
+    end)
+    it("does not parse a string that is too long", function()
+      local ret = parser:parse("aa")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Bottom parser tests", function()
+    local parser = hammer.nothing_p()
+    it("always fails", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Parser sequence tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.ch("b"))
+    local parser2 = hammer.sequence(hammer.ch("a"), hammer.whitespace(hammer.ch("b")))
+    it("parses a string matching the sequence", function()
+      local ret = parser:parse("ab")
+      assert.are.same({"a", "b"}, ret.ast())
+    end)
+    it("does not parse a string that is too short", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a string with the sequence out of order", function()
+      local ret = parser:parse("ba")
+      assert.is_falsy(ret)
+    end)
+    it("parses a whitespace-optional string with no whitespace", function()
+      local ret = parser2:parse("ab")
+      assert.are.same({"a", "b"}, ret.ast())
+    end)
+    -- it("parses a whitespace-optional string containing whitespace", function()
+    --   local ret = parser:parse("a b")
+    --   assert.are.same({"a", "b"}, ret.ast()) -- this is the line that segfaults
+    --   print("in sequence")
+    --   ret = parser:parse("a  b")
+    --   assert.are.same({"a", "b"}, ret.ast())
+    -- end)
+  end)
+
+  describe("Choice-of-parsers tests", function()
+    local parser = hammer.choice(hammer.ch("a"), hammer.ch("b"))
+    it("parses a character in the choice set", function()
+      local ret = parser:parse("a")
+      assert.are.same("a", string.char(ret.ast()))
+      ret = parser:parse("b")
+      assert.are.same("b", string.char(ret.ast()))
+    end)
+    it("does not parse a character not in the choice set", function()
+      local ret = parser:parse("c")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("X-but-not-Y tests", function()
+    local parser = hammer.butnot(hammer.ch("a"), hammer.token("ab"))
+    local parser2 = hammer.butnot(hammer.ch_range("0", "9"), hammer.ch("6"))
+    it("succeeds when 'a' matches but 'ab' doesn't", function()
+      local ret = parser:parse("a")
+      assert.are.same("a", string.char(ret.ast()))
+      ret = parser:parse("aa")
+      assert.are.same("a", string.char(ret.ast()))
+    end)
+    it("fails when p2's result is longer than p1's", function()
+      local ret = parser:parse("ab")
+      assert.is_falsy(ret)
+    end)
+    it("fails when p2's result is the same length as p1's", function()
+      local ret = parser2:parse("6")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Difference-of-parsers tests", function()
+    local parser = hammer.difference(hammer.token("ab"), hammer.ch("a"))
+    it("succeeds when 'ab' matches and its result is longer than the result for 'a'", function()
+      local ret = parser:parse("ab")
+      assert.are.same("ab", ret.ast())
+    end)
+    it("fails if 'ab' doesn't match", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("XOR-of-parsers tests", function()
+    local parser = hammer.xor(hammer.ch_range("0", "6"), hammer.ch_range("5", "9"))
+    it("parses a value only in the first range", function()
+      local ret = parser:parse("0")
+      assert.are.same("0", string.char(ret.ast()))
+    end)
+    it("parses a value only in the second range", function()
+      local ret = parser:parse("9")
+      assert.are.same("9", string.char(ret.ast()))
+    end)
+    it("does not parse a value inside both ranges", function()
+      local ret = parser:parse("5")
+      assert.is_falsy(ret)
+    end)
+    it("does not parse a value outside the range", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Kleene * tests", function()
+    local parser = hammer.many(hammer.choice(hammer.ch("a"), hammer.ch("b")))
+    it("parses an empty string", function()
+      local ret = parser:parse("")
+      assert.are.same({}, ret.ast())
+    end)
+    it("parses a single repetition of the pattern", function()
+      local ret = parser:parse("a")
+      assert.are.same({"a"}, ret.ast())
+      ret = parser:parse("b")
+      assert.are.same({"b"}, ret.ast())
+    end)
+    it("parses multiple repetitions of the pattern", function()
+      local ret = parser:parse("aabbaba")
+      assert.are.same({"a", "a", "b", "b", "a", "b", "a"}, ret.ast())
+    end)
+  end)
+
+  describe("Kleene + tests", function()
+    local parser = hammer.many1(hammer.choice(hammer.ch("a"), hammer.ch("b")))
+    it("does not parse an empty string", function()
+      local ret = parser:parse("")
+      assert.is_falsy(ret)
+    end)
+    it("parses a single repetition of the pattern", function()
+      local ret = parser:parse("a")
+      assert.are.same({"a"}, ret.ast())
+      ret = parser:parse("b")
+      assert.are.same({"b"}, ret.ast())
+    end)
+    it("parses multiple repetitions of the pattern", function()
+      local ret = parser:parse("aabbaba")
+      assert.are.same({"a", "a", "b", "b", "a", "b", "a"}, ret.ast())
+    end)
+    it("does not parse a string that does not start with one of the patterns to repeat", function()
+      local ret = parser:parse("daabbabadef")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Fixed-number-of-repetitions tests", function()
+    local parser = hammer.repeat_n(hammer.choice(hammer.ch("a"), hammer.ch("b")), 2)
+    it("does not parse a string without enough repetitions", function()
+      local ret = parser:parse("adef")
+      assert.is_falsy(ret)
+    end)
+    it("parses a string containing the correct number of repetitions", function()
+      local ret = parser:parse("abdef")
+      assert.are.same({"a", "b"}, ret.ast())
+    end)
+    it("does not parse a string that does not start with a character in the repetition set", function()
+      local ret = parser:parse("dabdef")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Kleene ? tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.optional(hammer.choice(hammer.ch("b"), hammer.ch("c"))), hammer.ch("d"))
+    it("parses a string containing either optional character", function()
+      local ret = parser:parse("abd")
+      assert.are.same({"a", "b", "d"}, ret.ast())
+      ret = parser:parse("acd")
+      assert.are.same({"a", "c", "d"}, ret.ast())
+    end)
+    it("parses a string missing one of the optional characters", function()
+      local ret = parser:parse("ad")
+      assert.are.same({"a", {}, "d"}, ret.ast())
+    end)
+    it("does not parse a string containing a character not among the optional ones", function()
+      local ret = parser:parse("aed")
+      assert.is_falsy(ret.ast)
+    end)
+  end)
+
+  describe("'ignore' decorator tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.ignore(hammer.ch("b")), hammer.ch("c"))
+    it("parses a string containing the pattern to ignore, and leaves that pattern out of the result", function()
+      local ret = parser:parse("abc")
+      assert.are.same({"a", "c"}, ret.ast())
+    end)
+    it("does not parse a string not containing the pattern to ignore", function()
+      local ret = parser:parse("ac")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Possibly-empty separated lists", function()
+    local parser = hammer.sepBy(hammer.choice(hammer.ch("1"), hammer.ch("2"), hammer.ch("3")), hammer.ch(","))
+    it("parses an ordered list", function()
+      local ret = parser:parse("1,2,3")
+      assert.are.same({"1", "2", "3"}, ret.ast())
+    end)
+    it("parses an unordered list", function()
+      local ret = parser:parse("1,3,2")
+      assert.are.same({"1", "3", "2"}, ret.ast())
+    end)
+    it("parses a list not containing all options", function()
+      local ret = parser:parse("1,3")
+      assert.are.same({"1", "3"}, ret.ast())
+    end)
+    it("parses a unary list", function()
+      local ret = parser:parse("3")
+      assert.are.same({"3"}, ret.ast())
+    end)
+    it("parses an empty list", function()
+      local ret = parser:parse("")
+      assert.are.same({}, ret.ast())
+    end)
+  end)
+
+  describe("Non-empty separated lists", function()
+    local parser = hammer.sepBy1(hammer.choice(hammer.ch("1"), hammer.ch("2"), hammer.ch("3")), hammer.ch(","))
+    it("parses an ordered list", function()
+      local ret = parser:parse("1,2,3")
+      assert.are.same({"1", "2", "3"}, ret.ast())
+    end)
+    it("parses an unordered list", function()
+      local ret = parser:parse("1,3,2")
+      assert.are.same({"1", "3", "2"}, ret.ast())
+    end)
+    it("parses a list not containing all options", function()
+      local ret = parser:parse("1,3")
+      assert.are.same({"1", "3"}, ret.ast())
+    end)
+    -- it("parses a unary list", function()
+    --   local ret = parser:parse("3")
+    --   print("in sepBy1")
+    --   assert.are.same({"3"}, ret.ast()) -- this line also segfaults
+    -- end)
+    it("does not parse an empty list", function()
+      local ret = parser:parse("")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Empty string tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.epsilon_p(), hammer.ch("b"))
+    local parser2 = hammer.sequence(hammer.epsilon_p(), hammer.ch("a"))
+    local parser3 = hammer.sequence(hammer.ch("a"), hammer.epsilon_p())
+    it("parses an empty string between two characters", function()
+      local ret = parser:parse("ab")
+      assert.are.same({"a", "b"}, ret.ast())
+    end)
+    it("parses an empty string before a character", function()
+      local ret = parser2:parse("a")
+      assert.are.same({"a"}, ret.ast())
+    end)
+    it("parses an empty string after a character", function()
+      local ret = parser3:parse("a")
+      assert.are.same({"a"}, ret.ast())
+    end)
+  end)
+
+  describe("Attribute validation tests", function()
+    local function equals(result, user_data)
+      return result.ast.seq.elements[0].uint == result.ast.seq.elements[1].uint
+    end
+    local parser = hammer.attr_bool(hammer.many1(hammer.choice(hammer.ch("a"), hammer.ch("b"))), equals)
+    it("parses successfully when both characters are the same (i.e., the validation function succeeds)", function()
+      local ret = parser:parse("aa")
+      assert.are.same({"a", "a"}, ret.ast())
+      ret = parser:parse("bb")
+      assert.are.same({"b", "b"}, ret.ast())
+    end)
+    it("does not parse successfully when the characters are different (i.e., the validation function fails)", function()
+      local ret = parser:parse("ab")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Matching lookahead tests", function()
+    local parser = hammer.sequence(hammer.and_(hammer.ch("0")), hammer.ch("0"))
+    local parser2 = hammer.sequence(hammer.and_(hammer.ch("0")), hammer.ch("1"))
+    local parser3 = hammer.sequence(hammer.ch("1"), hammer.and_(hammer.ch("2")))
+    it("parses successfully when the lookahead matches the next character to parse", function()
+      local ret = parser:parse("0")
+      assert.are.same({"0"}, ret.ast())
+    end)
+    it("does not parse successfully when the lookahead does not match the next character to parse", function()
+      local ret = parser2:parse("0")
+      assert.is_falsy(ret)
+    end)
+    it("parses successfully when the lookahead is there", function()
+      local ret = parser3:parse("12")
+      assert.are.same({"1"}, ret.ast())
+    end)
+  end)
+
+  describe("Non-matching lookahead tests", function()
+    local parser = hammer.sequence(hammer.ch("a"), hammer.choice(hammer.ch("+"), hammer.token("++")), hammer.ch("b"))
+    local parser2 = hammer.sequence(hammer.ch("a"), hammer.choice(hammer.sequence(hammer.ch("+"), hammer.not_(hammer.ch("+"))), hammer.token("++")), hammer.ch("b"))
+    it("parses a single plus correctly in the 'choice' example", function()
+      local ret = parser:parse("a+b")
+      assert.are.same({"a", "+", "b"}, ret.ast())
+    end)
+    it("does not parse a double plus correctly in the 'choice' example", function()
+      local ret = parser:parse("a++b")
+      assert.is_falsy(ret)
+    end)
+    it("parses a single plus correctly in the 'not' example", function()
+      local ret = parser2:parse("a+b")
+      assert.are.same({"a", {"+"}, "b"}, ret.ast())
+    end)
+    it("parses a double plus correctly in the 'not' example", function()
+      local ret = parser2:parse("a++b")
+      assert.are.same({"a", "++", "b"}, ret.ast())
+    end)
+  end)
+
+  describe("Left recursion tests", function()
+    local parser = hammer.indirect()
+    hammer.bind_indirect(parser, hammer.choice(hammer.sequence(parser, hammer.ch("a")), hammer.ch("a")))
+    -- it("parses the base case", function()
+    --   print("in leftrec")
+    --   local ret = parser:parse("a") -- this line segfaults
+    --   assert.are.same({"a"}, ret.ast())
+    -- end)
+    it("parses one level of recursion", function()
+      local ret = parser:parse("aa")
+      assert.are.same({"a", "a"}, ret.ast())
+    end)
+    it("parses two levels of recursion", function()
+      local ret = parser:parse("aaa")
+      assert.are.same({{"a", "a"}, "a"}, ret.ast())
+    end)
+  end)
+
+  describe("Right recursion tests", function()
+    local parser = hammer.indirect()
+    hammer.bind_indirect(parser, hammer.choice(hammer.sequence(hammer.ch("a"), parser), hammer.epsilon_p()))
+    it("parses the base case", function()
+      local ret = parser:parse("a")
+      assert.are.same({"a"}, ret.ast())
+    end)
+    it("parses one level of recursion", function()
+      local ret = parser:parse("aa")
+      assert.are.same({"a", {"a"}}, ret.ast())
+    end)
+    it("parses two levels of recursion", function()
+      local ret = parser:parse("aaa")
+      assert.are.same({"a", {"a", {"a"}}}, ret.ast())
+    end)
+  end)
+
+  describe("Endianness tests", function()
+    local bit = require("bit")
+    local u32 = hammer.uint32()
+    local u5 = hammer.bits(5, false)
+    local bb = bit.bor(hammer.BYTE_BIG_ENDIAN, hammer.BIT_BIG_ENDIAN)
+    local bl = bit.bor(hammer.BYTE_BIG_ENDIAN, hammer.BIT_LITTLE_ENDIAN)
+    local lb = bit.bor(hammer.BYTE_LITTLE_ENDIAN, hammer.BIT_BIG_ENDIAN)
+    local ll = bit.bor(hammer.BYTE_LITTLE_ENDIAN, hammer.BIT_LITTLE_ENDIAN)
+    local parser1 = hammer.with_endianness(bb, u32)
+    local parser2 = hammer.with_endianness(bb, u5)
+    local parser3 = hammer.with_endianness(ll, u32)
+    local parser4 = hammer.with_endianness(ll, u5)
+    local parser5 = hammer.with_endianness(bl, u32)
+    local parser6 = hammer.with_endianness(bl, u5)
+    local parser7 = hammer.with_endianness(lb, u32)
+    local parser8 = hammer.with_endianness(lb, u5)
+    it("parses big-endian cases", function()
+      local ret = parser1:parse("abcd")
+      assert.are.same(0x61626364, ret.ast())
+      ret = parser2:parse("abcd")
+      assert.are.same(0xc, ret.ast())
+    end)
+    it("parses little-endian cases", function()
+      local ret = parser3:parse("abcd")
+      assert.are.same(0x61626364, ret.ast())
+      ret = parser4:parse("abcd")
+      assert.are.same(0xc, ret.ast())
+    end)
+    it("parses mixed-endian cases", function()
+      local ret = parser5:parse("abcd")
+      assert.are.same(0x61626364, ret.ast())
+      ret = parser6:parse("abcd")
+      assert.are.same(0x1, ret.ast())
+      ret = parser7:parse("abcd")
+      assert.are.same(0x64636261, ret.ast())
+      ret = parser8:parse("abcd")
+      assert.are.same(0xc, ret.ast())
+    end)
+  end)
+
+  describe("Symbol table tests", function()
+    local parser = hammer.sequence(hammer.put_value(hammer.uint8(), "size"), hammer.token("foo"), hammer.length_value(hammer.get_value("size"), hammer.uint8()))
+    it("parses a string that has enough bytes for the specified length", function()
+      local ret = parser:parse(string.char(0x06) .. "fooabcdef")
+      assert.are.same("foo", ret.ast()[2])
+      assert.are.same({0x61, 0x62, 0x63, 0x64, 0x65, 0x66}, ret.ast()[3])
+    end)
+    it("does not parse a string that does not have enough bytes for the specified length", function()
+      local ret = parser:parse(string.char(0x06) .. "fooabcde")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  describe("Permutation tests", function()
+    local parser = hammer.permutation(hammer.ch("a"), hammer.ch("b"), hammer.ch("c"))
+    it("parses a permutation of 'abc'", function()
+      local ret = parser:parse("abc")
+      assert.are.same({"a", "b", "c"}, ret.ast())
+      ret = parser:parse("acb")
+      assert.are.same({"a", "c", "b"}, ret.ast())
+      ret = parser:parse("bac")
+      assert.are.same({"b", "a", "c"}, ret.ast())
+      ret = parser:parse("bca")
+      assert.are.same({"b", "c", "a"}, ret.ast())
+      ret = parser:parse("cab")
+      assert.are.same({"c", "a", "b"}, ret.ast())
+      ret = parser:parse("cba")
+      assert.are.same({"c", "b", "a"}, ret.ast())
+    end)
+    it("does not parse a string that is not a permutation of 'abc'", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+      ret = parser:parse("ab")
+      assert.is_falsy(ret)
+      ret = parser:parse("abb")
+      assert.is_falsy(ret)
+    end)
+    parser = hammer.permutation(hammer.ch("a"), hammer.ch("b"), hammer.optional(hammer.ch("c")))
+    it("parses a string that is a permutation of 'ab[c]'", function()
+      local ret = parser:parse("abc")
+      assert.are.same({"a", "b", "c"}, ret.ast())
+      ret = parser:parse("acb")
+      assert.are.same({"a", "c", "b"}, ret.ast())
+      ret = parser:parse("bac")
+      assert.are.same({"b", "a", "c"}, ret.ast())
+      ret = parser:parse("bca")
+      assert.are.same({"b", "c", "a"}, ret.ast())
+      ret = parser:parse("cab")
+      assert.are.same({"c", "a", "b"}, ret.ast())
+      ret = parser:parse("cba")
+      assert.are.same({"c", "b", "a"}, ret.ast())
+      ret = parser:parse("ab")
+      assert.are.same({"a", "b"}, ret.ast())
+      ret = parser:parse("ba")
+      assert.are.same({"b", "a"}, ret.ast())
+    end)
+    it("does not parse a string that is not a permutation of 'ab[c]'", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+      ret = parser:parse("b")
+      assert.is_falsy(ret)
+      ret = parser:parse("c")
+      assert.is_falsy(ret)
+      ret = parser:parse("ca")
+      assert.is_falsy(ret)
+      ret = parser:parse("cb")
+      assert.is_falsy(ret)
+      ret = parser:parse("cc")
+      assert.is_falsy(ret)
+      ret = parser:parse("ccab")
+      assert.is_falsy(ret)
+      ret = parser:parse("ccc")
+      assert.is_falsy(ret)
+    end)
+    parser = hammer.permutation(hammer.optional(hammer.ch("c")), hammer.ch("a"), hammer.ch("b"))
+    it("parses a string that is a permutation of '[c]ab'", function()
+      local ret = parser:parse("abc")
+      assert.are.same({"a", "b", "c"}, ret.ast())
+      ret = parser:parse("acb")
+      assert.are.same({"a", "c", "b"}, ret.ast())
+      ret = parser:parse("bac")
+      assert.are.same({"b", "a", "c"}, ret.ast())
+      ret = parser:parse("bca")
+      assert.are.same({"b", "c", "a"}, ret.ast())
+      ret = parser:parse("cab")
+      assert.are.same({"c", "a", "b"}, ret.ast())
+      ret = parser:parse("cba")
+      assert.are.same({"c", "b", "a"}, ret.ast())
+      ret = parser:parse("ab")
+      assert.are.same({"a", "b"}, ret.ast())
+      ret = parser:parse("ba")
+      assert.are.same({"b", "a"}, ret.ast())
+    end)
+    it("does not parse a string that is not a permutation of '[c]ab'", function()
+      local ret = parser:parse("a")
+      assert.is_falsy(ret)
+      ret = parser:parse("b")
+      assert.is_falsy(ret)
+      ret = parser:parse("c")
+      assert.is_falsy(ret)
+      ret = parser:parse("ca")
+      assert.is_falsy(ret)
+      ret = parser:parse("cb")
+      assert.is_falsy(ret)
+      ret = parser:parse("cc")
+      assert.is_falsy(ret)
+      ret = parser:parse("ccab")
+      assert.is_falsy(ret)
+      ret = parser:parse("ccc")
+      assert.is_falsy(ret)
+    end)
+  end)
+
+  -- describe("Monadic binding tests", function()
+  --   local function continuation(allocator, result, env)
+  --     local val = 0
+  --     for k, v in result.seq
+  --       do val = val*10 + v->uint - 48
+  --     end
+  --     if val > 26 then
+  --       return nil
+  --     else
+  --       return hammer.ch
+  --     end
+  --   end
+  --   local parser = hammer.bind(hammer.many1(hammer.ch_range("0", "9")), continuation, "a")
+  --   it("parses a ", function()
+  --     local ret = parser:parse()
+  --     assert.are.same(ret.ast., )
+  --   end)
+  --   it("does not parse a ", function()
+  --     local ret = parser:parse()
+  --     assert.is_falsy(ret)
+  --   end)
+  -- end)
+end)
diff --git a/src/bitreader.c b/src/bitreader.c
index fe21e439ec778aa39b3cbeb18c0b3ba4fbe337fd..0f0825b87c60697f4bd8aff727a3ffe4ecc19532 100644
--- a/src/bitreader.c
+++ b/src/bitreader.c
@@ -108,3 +108,77 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
   out <<= final_shift;
   return (out ^ msb) - msb; // perform sign extension
 }
+
+void h_skip_bits(HInputStream* stream, size_t count) {
+  size_t left;
+
+  if (count == 0)
+    return;
+
+  if (stream->overrun)
+    return;
+
+  if (stream->index == stream->length) {
+    stream->overrun = true;
+    return;
+  }
+
+  // consume from a partial byte?
+  left = 8 - stream->bit_offset - stream->margin;
+  if (count < left) {
+    stream->bit_offset += count;
+    return;
+  }
+  if (left < 8) {
+    stream->index += 1;
+    stream->bit_offset = 0;
+    stream->margin = 0;
+    count -= left;
+  }
+  assert(stream->bit_offset == 0);
+  assert(stream->margin == 0);
+
+  // consume full bytes
+  left = stream->length - stream->index;
+  if (count / 8 <= left) {
+    stream->index += count / 8;
+    count = count % 8;
+  } else {
+    stream->index = stream->length;
+    stream->overrun = true;
+    return;
+  }
+  assert(count < 8);
+
+  // final partial byte
+  if (count > 0 && stream->index == stream->length)
+    stream->overrun = true;
+  else
+    stream->bit_offset = count;
+}
+
+void h_seek_bits(HInputStream* stream, size_t pos) {
+  size_t pos_index = pos / 8;
+  size_t pos_offset = pos % 8;
+
+  /* seek within the current byte? */
+  if (pos_index == stream->index) {
+    stream->bit_offset = pos_offset;
+    return;
+  }
+
+  stream->margin = 0;
+
+  /* seek past the end? */
+  if ((pos_index > stream->length) ||
+      (pos_index == stream->length && pos_offset > 0)) {
+    stream->index = stream->length;
+    stream->bit_offset = 0;
+    stream->overrun = true;
+    return;
+  }
+
+  stream->index = pos_index;
+  stream->bit_offset = pos_offset;
+  stream->margin = 0;
+}
diff --git a/src/cfgrammar.c b/src/cfgrammar.c
index 77e7ecad7ea1a70597a4c7c70ee21d9184a6c672..3f4e647fa879cfd225379cbc9ff3502cc6d12f77 100644
--- a/src/cfgrammar.c
+++ b/src/cfgrammar.c
@@ -6,10 +6,25 @@
 #include <ctype.h>
 
 
+// type of pairs used as memoization keys by h_follow and h_first
+struct k_nt {size_t k; const HCFChoice *nt;};
+
 // a special map value for use when the map is used to represent a set
 static void * const INSET = (void *)(uintptr_t)1;
 
 
+static bool eq_k_nt(const void *p, const void *q)
+{
+  const struct k_nt *a=p, *b=q;
+  return a->k == b->k && a->nt == b->nt;
+}
+
+static HHashValue hash_k_nt(const void *p)
+{
+  const struct k_nt *x = p;
+  return h_hash_ptr(x->nt) * x->k;
+}
+
 HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
 {
   HCFGrammar *g = h_new(HCFGrammar, 1);
@@ -20,14 +35,17 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
   g->nts    = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
   g->start  = NULL;
   g->geneps = NULL;
-  g->first  = NULL;
-  g->follow = NULL;
-  g->kmax   = 0;    // will be increased as needed by ensure_k
+  g->first  = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
+  g->follow = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
 
   HStringMap *eps = h_stringmap_new(g->arena);
   h_stringmap_put_epsilon(eps, INSET);
   g->singleton_epsilon = eps;
 
+  HStringMap *end = h_stringmap_new(g->arena);
+  h_stringmap_put_end(end, INSET);
+  g->singleton_end = end;
+
   return g;
 }
 
@@ -42,6 +60,7 @@ void h_cfgrammar_free(HCFGrammar *g)
 // helpers
 static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
 static void collect_geneps(HCFGrammar *grammar);
+static void eliminate_dead_rules(HCFGrammar *g);
 
 
 HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
@@ -83,6 +102,9 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
     g->start = desugared;
   }
 
+  // simplifications
+  eliminate_dead_rules(g);
+
   // determine which nonterminals generate epsilon
   collect_geneps(g);
 
@@ -128,42 +150,6 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
   }
 }
 
-/* Increase g->kmax if needed, allocating enough first/follow slots. */
-static void ensure_k(HCFGrammar *g, size_t k)
-{
-  if (k <= g->kmax) {
-    return;
-  }
-  // NB: we don't actually use first/follow[0] but allocate it anyway
-  // so indices of the array correspond neatly to values of k
-
-  // allocate the new arrays
-  HHashTable **first  = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
-  HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
-
-  if (g->kmax > 0) {
-    // we are resizing, copy the old tables over
-    for(size_t i=0; i<=g->kmax; i++) {
-      first[i]  = g->first[i];
-      follow[i] = g->follow[i];
-    }
-  } else {
-    // we are initializing, allocate the first (in fact, dummy) tables
-    first[0]  = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
-    follow[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
-  }
-
-  // allocate the new tables
-  for(size_t i=g->kmax+1; i<=k; i++) {
-    first[i]  = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
-    follow[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
-  }
-
-  g->first = first;
-  g->follow = follow;
-  g->kmax = k;
-}
-
 bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
 {
   // XXX this can now also be implemented in terms of h_first:
@@ -232,6 +218,76 @@ static void collect_geneps(HCFGrammar *g)
   } while(g->geneps->used != prevused);
 }
 
+static bool mentions_symbol(HCFChoice **s, const HCFChoice *x)
+{
+  for(; *s; s++) {
+    if (*s == x)
+      return true;
+  }
+  return false;
+}
+
+static void remove_productions_with(HCFGrammar *g, const HCFChoice *x)
+{
+  HHashTableEntry *hte;
+  const HCFChoice *symbol;
+  size_t i;
+
+  for(i=0; i < g->nts->capacity; i++) {
+    for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
+      if (hte->key == NULL)
+        continue;
+      symbol = hte->key;
+      assert(symbol->type == HCF_CHOICE);
+
+      HCFSequence **p, **q;
+      for(p = symbol->seq; *p != NULL; ) {
+        if (mentions_symbol((*p)->items, x)) {
+          // remove production p
+          for(q=p; *(q+1) != NULL; q++);  // q = last production
+          *p = *q;                        // move q over p
+          *q = NULL;                      // delete old q
+        } else {
+          p++;
+        }
+      }
+    }
+  }
+}
+
+static void eliminate_dead_rules(HCFGrammar *g)
+{
+  HHashTableEntry *hte;
+  const HCFChoice *symbol = NULL;
+  size_t i;
+  bool found;
+
+  do {
+    found = false;
+    for(i=0; !found && i < g->nts->capacity; i++) {
+      for(hte = &g->nts->contents[i]; !found && hte; hte = hte->next) {
+        if (hte->key == NULL)
+          continue;
+        symbol = hte->key;
+        assert(symbol->type == HCF_CHOICE);
+
+        // this NT is dead if it has no productions
+        if (*symbol->seq == NULL)
+          found = true;
+      }
+    }
+    if (found) {
+      h_hashtable_del(g->nts, symbol);
+      remove_productions_with(g, symbol);
+    }
+  } while(found); // until nothing left to remove
+
+  // rebuild g->nts. there may now be symbols that no longer appear in any
+  // productions. we also might have removed g->start.
+  g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
+  collect_nts(g, g->start);
+}
+
 
 HStringMap *h_stringmap_new(HArena *a)
 {
@@ -240,6 +296,7 @@ HStringMap *h_stringmap_new(HArena *a)
   m->end_branch = NULL;
   m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
   m->arena = a;
+  m->taint = false;
   return m;
 }
 
@@ -396,30 +453,65 @@ bool h_stringmap_empty(const HStringMap *m)
           && h_hashtable_empty(m->char_branches));
 }
 
-const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
+static bool eq_stringmap(const void *a, const void *b)
+{
+  return h_stringmap_equal(a, b);
+}
+
+bool h_stringmap_equal(const HStringMap *a, const HStringMap *b)
+{
+  if (a->epsilon_branch != b->epsilon_branch)
+    return false;
+  if (a->end_branch != b->end_branch)
+    return false;
+  return h_hashtable_equal(a->char_branches, b->char_branches, eq_stringmap);
+}
+
+// helper for h_follow and h_first
+bool workset_equal(HHashTable *a, HHashTable *b)
 {
+  if (a == NULL || b == NULL)
+    return (a == b);
+  else
+    return h_hashtable_equal(a, b, eq_stringmap);
+}
+
+static const HStringMap *
+h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s);
+
+static const HStringMap *
+h_first_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
+{
+  HHashTable *ws = *pws;
   HStringMap *ret;
   HCFSequence **p;
   uint8_t c;
+  struct k_nt kx = {k,x};
+  struct k_nt *pkx = NULL;
+  bool taint = false;
 
   // shortcut: first_0(X) is always {""}
   if (k==0) {
     return g->singleton_epsilon;
   }
-  // memoize via g->first
-  ensure_k(g, k);
-  ret = h_hashtable_get(g->first[k], x);
+  // shortcut: first_k($) is always {$}
+  if (x->type == HCF_END) {
+    return g->singleton_end;
+  }
+
+  // check memoization and workset
+  ret = h_hashtable_get(g->first, &kx);
+  if (ret == NULL && ws != NULL)
+    ret = h_hashtable_get(ws, &kx);
   if (ret != NULL) {
     return ret;
   }
+
+  // not found, create result
   ret = h_stringmap_new(g->arena);
   assert(ret != NULL);
-  h_hashtable_put(g->first[k], x, ret);
 
   switch(x->type) {
-  case HCF_END:
-    h_stringmap_put_end(ret, INSET);
-    break;
   case HCF_CHAR:
     h_stringmap_put_char(ret, x->chr, INSET);
     break;
@@ -433,30 +525,75 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
     break;
   case HCF_CHOICE:
     // this is a nonterminal
+
+    // to avoid recursive loops, taint ret and place it in workset
+    ret->taint = true;
+    if (ws == NULL)
+      ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
+    pkx = h_arena_malloc(g->arena, sizeof kx);
+    *pkx = kx;
+    h_hashtable_put(ws, pkx, ret);
+
     // return the union of the first sets of all productions
-    for(p=x->seq; *p; ++p)
-      h_stringmap_update(ret, h_first_seq(k, g, (*p)->items));
+    for(p=x->seq; *p; ++p) {
+      const HStringMap *first_rhs = h_first_seq_work(k, g, pws, (*p)->items);
+      assert(ws == *pws); // call above did not change the workset pointer
+      taint |= first_rhs->taint;
+      h_stringmap_update(ret, first_rhs);
+    }
     break;
   default:  // should not be reached
-    assert_message(0, "unknown HCFChoice type");
+    assert_message(0, "unexpected HCFChoice type");
+  }
+
+  // immediately memoize ret and remove it from ws if untainted by recursion
+  if (!taint) {
+    if (pkx == NULL) {
+      pkx = h_arena_malloc(g->arena, sizeof kx);
+      *pkx = kx;
+    } else if (ws != NULL) {
+      // we already had a key, so ret might (will) be in ws; remove it.
+      h_hashtable_del(ws, pkx);
+    }
+    ret->taint = false;
+    h_hashtable_put(g->first, pkx, ret);
   }
   
   return ret;
 }
 
+const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
+{
+  HHashTable *ws, *bak;
+  const HStringMap *ret;
+
+  // fixpoint iteration on workset
+  ws = NULL;
+  do {
+    bak = ws;
+    ws = NULL;
+    ret = h_first_work(k, g, &ws, x);
+  } while(!workset_equal(ws, bak));
+
+  assert(ret != NULL);
+  return ret;
+}
+
 // helpers for h_first_seq, definitions below
 static bool is_singleton_epsilon(const HStringMap *m);
 static bool any_string_shorter(size_t k, const HStringMap *m);
 
 // pointer to functions like h_first_seq
-typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
+typedef const HStringMap *
+    (*StringSetFun)(size_t, HCFGrammar *, HHashTable **, HCFChoice **);
 
 // helper for h_first_seq and h_follow
-static void stringset_extend(HCFGrammar *g, HStringMap *ret,
+static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
                              size_t k, const HStringMap *as,
                              StringSetFun f, HCFChoice **tail);
 
-const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
+static const HStringMap *
+h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
 {
   // shortcut: the first set of the empty sequence, for any k, is {""}
   if (*s == NULL) {
@@ -467,11 +604,11 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
   HCFChoice *x = s[0];
   HCFChoice **tail = s+1;
 
-  const HStringMap *first_x = h_first(k, g, x);
+  const HStringMap *first_x = h_first_work(k, g, pws, x);
 
   // shortcut: if first_k(X) = {""}, just return first_k(tail)
   if (is_singleton_epsilon(first_x)) {
-    return h_first_seq(k, g, tail);
+    return h_first_seq_work(k, g, pws, tail);
   }
 
   // shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
@@ -483,8 +620,25 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
   HStringMap *ret = h_stringmap_new(g->arena);
 
   // extend the elements of first_k(X) up to length k from tail
-  stringset_extend(g, ret, k, first_x, h_first_seq, tail);
+  ret->taint = stringset_extend(g, pws, ret, k, first_x, h_first_seq_work, tail);
+
+  return ret;
+}
+
+const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
+{
+  HHashTable *ws, *bak;
+  const HStringMap *ret;
 
+  // fixpoint iteration on workset
+  ws = NULL;
+  do {
+    bak = ws;
+    ws = NULL;
+    ret = h_first_seq_work(k, g, &ws, s);
+  } while(!workset_equal(ws, bak));
+
+  assert(ret != NULL);
   return ret;
 }
 
@@ -546,13 +700,25 @@ static void remove_all_shorter(size_t k, HStringMap *m)
 }
 
 // h_follow adapted to the signature of StringSetFun
-static inline
-const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
+static const HStringMap *
+h_follow_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
 {
+  assert(pws == NULL);
   return h_follow(k, g, *s);
 }
 
-const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
+static const HStringMap *
+h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x);
+
+// h_follow_work adapted to the signature of StringSetFun
+static const HStringMap *
+h_follow_work_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
+{
+  return h_follow_work(k, g, pws, *s);
+}
+
+static const HStringMap *
+h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
 {
   // consider all occurances of X in g
   // the follow set of X is the union of:
@@ -564,28 +730,45 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
   //   { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
 
   HStringMap *ret;
+  HHashTable *ws = *pws;
+  struct k_nt kx = {k,x};
+  struct k_nt *pkx;
+  bool taint = false;
 
   // shortcut: follow_0(X) is always {""}
   if (k==0) {
     return g->singleton_epsilon;
   }
-  // memoize via g->follow
-  ensure_k(g, k);
-  ret = h_hashtable_get(g->follow[k], x);
+
+  // check memoization and workset
+  ret = h_hashtable_get(g->follow, &kx);
+  if (ret == NULL && ws != NULL)
+    ret = h_hashtable_get(ws, &kx);
   if (ret != NULL) {
     return ret;
   }
+
+  // not found, create result
   ret = h_stringmap_new(g->arena);
   assert(ret != NULL);
-  h_hashtable_put(g->follow[k], x, ret);
+
+  // to avoid recursive loops, taint ret and place it in workset
+  ret->taint = true;
+  if (ws == NULL)
+    ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
+  pkx = h_arena_malloc(g->arena, sizeof kx);
+  *pkx = kx;
+  h_hashtable_put(ws, pkx, ret);
 
   // if X is the start symbol, the end token is in its follow set
   if (x == g->start) {
     h_stringmap_put_end(ret, INSET);
   }
-  // iterate over g->nts
+
+  // iterate over g->nts, looking for X
   size_t i;
   HHashTableEntry *hte;
+  int x_found=0;
   for (i=0; i < g->nts->capacity; i++) {
     for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
       if (hte->key == NULL) {
@@ -600,19 +783,46 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
         HCFChoice **s = (*p)->items;        // production's right-hand side
         
         for (; *s; s++) {
-          if (*s == x) { // occurance found
+          if (*s == x) { // occurrence found
+            x_found=1;
             HCFChoice **tail = s+1;
 
             const HStringMap *first_tail = h_first_seq(k, g, tail);
 
             // extend the elems of first_k(tail) up to length k from follow(A)
-            stringset_extend(g, ret, k, first_tail, h_follow_, &a);
+            taint |= stringset_extend(g, pws, ret, k,
+                                      first_tail, h_follow_work_, &a);
           }
         }
       }
     }
   }
+  assert(x_found || x == g->start);        // no orphan non-terminals
+
+  // immediately memoize ret and remove it from ws if untainted by recursion
+  if (!taint) {
+    ret->taint = false;
+    h_hashtable_del(ws, pkx);
+    h_hashtable_put(g->follow, pkx, ret);
+  }
+
+  return ret;
+}
+
+const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
+{
+  HHashTable *ws, *bak;
+  const HStringMap *ret;
 
+  // fixpoint iteration on workset
+  ws = NULL;
+  do {
+    bak = ws;
+    ws = NULL;
+    ret = h_follow_work(k, g, &ws, x);
+  } while(!workset_equal(ws, bak));
+
+  assert(ret != NULL);
   return ret;
 }
 
@@ -629,7 +839,7 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
   // casting the const off of A below. note: stringset_extend does
   // not touch this argument, only passes it through to h_follow
   // in this case, which accepts it, once again, as const.
-  stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
+  stringset_extend(g, NULL, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
 
   // make sure there are only strings of length _exactly_ k
   remove_all_shorter(k, ret);
@@ -638,13 +848,17 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
 }
 
 // add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
-static void stringset_extend(HCFGrammar *g, HStringMap *ret,
+static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
                              size_t k, const HStringMap *as,
                              StringSetFun f, HCFChoice **tail)
 {
+  bool taint = false;
+
   if (as->epsilon_branch) {
     // for a="", add f_k(tail) to ret
-    h_stringmap_update(ret, f(k, g, tail));
+    const HStringMap *f_tail = f(k, g, pws, tail);
+    taint |= f_tail->taint;
+    h_stringmap_update(ret, f_tail);
   }
 
   if (as->end_branch) {
@@ -671,9 +885,11 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
       HStringMap *ret_ = h_stringmap_new(g->arena);
       h_stringmap_put_after(ret, c, ret_);
 
-      stringset_extend(g, ret_, k-1, as_, f, tail);
+      taint |= stringset_extend(g, pws, ret_, k-1, as_, f, tail);
     }
   }
+
+  return taint;
 }
 
 
@@ -818,13 +1034,15 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
   fputs(name, f);
   i += strlen(name);
   for(; i<column; i++) fputc(' ', f);
-  fputs(" ->", f);
 
   assert(nt->type == HCF_CHOICE);
   HCFSequence **p = nt->seq;
   if (*p == NULL) {
-    return;          // shouldn't happen
+    fputs(" -x\n", f);            // empty choice, e.g. h_nothing_p()
+    return;
   }
+
+  fputs(" ->", f);
   pprint_sequence(f, g, *p++);    // print first production on the same line
   for(; *p; p++) {                // print the rest below with "or" bars
     for(i=0; i<column; i++) fputc(' ', f);    // indent
@@ -835,6 +1053,8 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
 
 void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
 {
+  HAllocator *mm__ = g->mm__;
+
   if (g->nts->used < 1) {
     return;
   }
@@ -842,11 +1062,12 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
   // determine maximum string length of symbol names
   int len;
   size_t s;
-  for(len=1, s=26; s < g->nts->used; len++, s*=26); 
+  for(len=1, s=26; s < g->nts->used; len++, s*=26);
 
-  // iterate over g->nts
+  // iterate over g->nts and collect its entries in an ordered array
   size_t i;
   HHashTableEntry *hte;
+  const HCFChoice **arr = h_new(const HCFChoice *, g->nts->used);
   for(i=0; i < g->nts->capacity; i++) {
     for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
       if (hte->key == NULL) {
@@ -855,9 +1076,16 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
       const HCFChoice *a = hte->key;        // production's left-hand symbol
       assert(a->type == HCF_CHOICE);
 
-      pprint_ntrules(file, g, a, indent, len);
+      size_t id = (uintptr_t)hte->value;    // nonterminal id
+      assert(id < g->nts->used);
+      arr[id] = a;
     }
   }
+
+  // print rules in alphabetical order
+  for(i=0; i < g->nts->used; i++)
+    pprint_ntrules(file, g, arr[i], indent, len);
+  h_free(arr);
 }
 
 void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent)
diff --git a/src/cfgrammar.h b/src/cfgrammar.h
index 2e8ba83cee5c152baae1177ed7b99d45cf11042c..8945ecb97d0adc1aa1f69391f54726a156c91211 100644
--- a/src/cfgrammar.h
+++ b/src/cfgrammar.h
@@ -8,15 +8,15 @@ typedef struct HCFGrammar_ {
   HHashSet    *nts;     // HCFChoices, each representing the alternative
                         // productions for one nonterminal
   HHashSet    *geneps;  // set of NTs that can generate the empty string
-  HHashTable  **first;  // memoized first sets of the grammar's symbols
-  HHashTable  **follow; // memoized follow sets of the grammar's NTs
-  size_t      kmax;     // maximum lookahead depth allocated
+  HHashTable  *first;   // memoized first sets of the grammar's symbols
+  HHashTable  *follow;  // memoized follow sets of the grammar's NTs
   HArena      *arena;
   HAllocator  *mm__;
 
-  // constant set containing only the empty string.
-  // this is only a member of HCFGrammar because it needs a pointer to arena.
+  // constant sets containing only the empty string or end symbol.
+  // these are only members of HCFGrammar because they need a pointer to arena.
   const struct HStringMap_ *singleton_epsilon;
+  const struct HStringMap_ *singleton_end;
 } HCFGrammar;
 
 
@@ -37,6 +37,7 @@ typedef struct HStringMap_ {
   void *end_branch;             // points to leaf value
   HHashTable *char_branches;    // maps to inner nodes (HStringMaps)
   HArena *arena;
+  bool taint;                   // for use by h_follow() and h_first()
 } HStringMap;
 
 HStringMap *h_stringmap_new(HArena *a);
@@ -52,6 +53,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
 bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
 bool h_stringmap_present_epsilon(const HStringMap *m);
 bool h_stringmap_empty(const HStringMap *m);
+bool h_stringmap_equal(const HStringMap *a, const HStringMap *b);
 
 static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
  { return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
diff --git a/src/datastructures.c b/src/datastructures.c
index 451afb94ec39932dfe1f8c58aa82c0777f73b011..8a09b5ce755e8880542f02c82b3b3e0db4f2fa48 100644
--- a/src/datastructures.c
+++ b/src/datastructures.c
@@ -9,12 +9,14 @@
 
 
 HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
-  HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray));
+  /* _noinit here because we init all the elements below */
+  HCountedArray *ret = h_arena_malloc_noinit(arena, sizeof(HCountedArray));
   if (size == 0)
     size = 1;
   ret->used = 0;
   ret->capacity = size;
   ret->arena = arena;
+  /* we actually want to zero these */
   ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
   return ret;
 }
@@ -24,12 +26,21 @@ HCountedArray *h_carray_new(HArena * arena) {
 }
 
 void h_carray_append(HCountedArray *array, void* item) {
+  HParsedToken **elements;
+
   if (array->used >= array->capacity) {
-    HParsedToken **elements = h_arena_malloc(array->arena, (array->capacity *= 2) * sizeof(void*));
+    /* _noinit here; we init below */
+    elements = h_arena_malloc_noinit(array->arena,
+        (array->capacity *= 2) * sizeof(void*));
     for (size_t i = 0; i < array->used; i++)
       elements[i] = array->elements[i];
     for (size_t i = array->used; i < array->capacity; i++)
       elements[i] = 0;
+    /*
+     * XXX I hope we don't use this much, because h_arena_free() doesn't
+     * quite seem to be there and doing a lot of this would get pretty
+     * wasteful.
+     */
     h_arena_free(array->arena, array->elements);
     array->elements = elements;
   }
@@ -38,7 +49,8 @@ void h_carray_append(HCountedArray *array, void* item) {
 
 // HSlist
 HSlist* h_slist_new(HArena *arena) {
-  HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
+  /* _noinit here; we set every element of ret below */
+  HSlist *ret = h_arena_malloc_noinit(arena, sizeof(HSlist));
   ret->head = NULL;
   ret->arena = arena;
   return ret;
@@ -53,8 +65,12 @@ HSlist* h_slist_copy(HSlist *slist) {
     tail = ret->head;
     head = head->next;
     while (head != NULL) {
-      // append head item to tail in a new node
-      HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
+      /*
+       * append head item to tail in a new node
+       *
+       * use _noinit; we set every element of node after we allocate
+       */
+      HSlistNode *node = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
       node->elem = head->elem;
       node->next = NULL;
       tail = tail->next = node;
@@ -85,10 +101,11 @@ void* h_slist_pop(HSlist *slist) {
 }
 
 void h_slist_push(HSlist *slist, void* item) {
-  HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
+  /* use _noinit; we set every element of node */
+  HSlistNode *hnode = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
   hnode->elem = item;
   hnode->next = slist->head;
-  // write memory barrier here.
+  /* write memory barrier here. */
   slist->head = hnode;
 }
 
@@ -132,30 +149,34 @@ void h_slist_free(HSlist *slist) {
 }
 
 HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
-  HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
+  /* _noinit because all fields are set below */
+  HHashTable *ht = h_arena_malloc_noinit(arena, sizeof(HHashTable));
   ht->hashFunc = hashFunc;
   ht->equalFunc = equalFunc;
   ht->capacity = 64; // to start; should be tuned later...
   ht->used = 0;
   ht->arena = arena;
-  ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
+  /* _noinit because all fields of all entries are set in the loop */
+  ht->contents = h_arena_malloc_noinit(arena,
+      sizeof(HHashTableEntry) * ht->capacity);
   for (size_t i = 0; i < ht->capacity; i++) {
     ht->contents[i].key = NULL;
     ht->contents[i].value = NULL;
     ht->contents[i].next = NULL;
     ht->contents[i].hashval = 0;
   }
-  //memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
+
   return ht;
 }
 
-void* h_hashtable_get(const HHashTable* ht, const void* key) {
-  HHashValue hashval = ht->hashFunc(key);
+void * h_hashtable_get_precomp(const HHashTable *ht, const void *key,
+                               HHashValue hashval) {
+  HHashTableEntry *hte = NULL;
+
 #ifdef CONSISTENCY_CHECK
   assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
 #endif
 
-  HHashTableEntry *hte = NULL;
   for (hte = &ht->contents[hashval & (ht->capacity - 1)];
        hte != NULL;
        hte = hte->next) {
@@ -169,35 +190,63 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
       return hte->value;
     }
   }
+
   return NULL;
 }
 
+void * h_hashtable_get(const HHashTable *ht, const void *key) {
+  HHashValue hashval = ht->hashFunc(key);
+
+  return h_hashtable_get_precomp(ht, key, hashval);
+}
+
 void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
 
 void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
+  HHashTableEntry *old_contents, *new_contents;
   bool do_resize = false;
   size_t old_capacity = ht->capacity;
   while (n * 1.3 > ht->capacity) {
     ht->capacity *= 2;
     do_resize = true;
   }
-  if (!do_resize)
-    return;
-  HHashTableEntry *old_contents = ht->contents;
-  HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
-  ht->contents = new_contents;
-  ht->used = 0;
-  memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
-  for (size_t i = 0; i < old_capacity; ++i)
-    for (HHashTableEntry *entry = &old_contents[i];
-	 entry;
-	 entry = entry->next)
-      if (entry->key)
-	h_hashtable_put_raw(ht, entry);
-  //h_arena_free(ht->arena, old_contents);
+
+  if (do_resize) {
+    old_contents = ht->contents;
+    /* _noinit because we set the whole thing below */
+    new_contents = h_arena_malloc_noinit(ht->arena,
+        sizeof(HHashTableEntry) * ht->capacity);
+    ht->contents = new_contents;
+    ht->used = 0;
+    memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
+    for (size_t i = 0; i < old_capacity; ++i) {
+      for (HHashTableEntry *entry = &old_contents[i];
+           entry;
+           entry = entry->next) {
+        if (entry->key) {
+          h_hashtable_put_raw(ht, entry);
+        }
+      }
+    }
+    /* h_arena_free(ht->arena, old_contents); */
+  }
 }
 
-void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
+void h_hashtable_put_precomp(HHashTable *ht, const void *key, void *value,
+                             HHashValue hashval) {
+  HHashTableEntry entry = {
+    .key = key,
+    .value = value,
+    .hashval = hashval
+  };
+
+  /* Rebalance if necessary */
+  h_hashtable_ensure_capacity(ht, ht->used + 1);
+  /* Insert it */
+  h_hashtable_put_raw(ht, &entry);
+}
+
+void h_hashtable_put(HHashTable *ht, const void *key, void *value) {
   // # Start with a rebalancing
   h_hashtable_ensure_capacity(ht, ht->used + 1);
 
@@ -227,7 +276,7 @@ void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
     }
     // Add a new link...
     assert (hte->next == NULL);
-    hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
+    hte->next = h_arena_malloc_noinit(ht->arena, sizeof(HHashTableEntry));
     hte = hte->next;
     hte->next = NULL;
     ht->used++;
@@ -338,16 +387,18 @@ static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) {
 }
 
 // helper for hte_equal: are all elements of xs present in ys?
-static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
+static bool hte_subset(HEqualFunc eq, HEqualFunc value_eq,
+                       HHashTableEntry *xs, HHashTableEntry *ys)
 {
   for(; xs; xs=xs->next) {
     if(xs->key == NULL) continue;   // element not present
 
     HHashTableEntry *hte;
     for(hte=ys; hte; hte=hte->next) {
-      if(hte->key == xs->key) break; // assume an element is equal to itself
+      // assume an element is equal to itself
+      if(hte->key == xs->key && hte->value == xs->value) break;
       if(hte->hashval != xs->hashval) continue; // shortcut
-      if(eq(hte->key, xs->key)) break;
+      if(eq(hte->key, xs->key) && value_eq(hte->value, xs->value)) break;
     }
     if(hte == NULL) return false;   // element not found
   }
@@ -355,19 +406,20 @@ static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
 }
 
 // compare two lists of HHashTableEntries
-static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) {
-  return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys));
+static inline bool hte_equal(HEqualFunc eq, HEqualFunc value_eq,
+                             HHashTableEntry *xs, HHashTableEntry *ys) {
+  return (hte_same_length(xs, ys) && hte_subset(eq, value_eq, xs, ys));
 }
 
-/* Set equality of HHashSets.
+/* Equality of HHashTables.
  * Obviously, 'a' and 'b' must use the same equality function.
  * Not strictly necessary, but we also assume the same hash function.
  */
-bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
+bool h_hashtable_equal(const HHashSet *a, const HHashSet *b, HEqualFunc value_eq) {
   if(a->capacity == b->capacity) {
     // iterate over the buckets in parallel
     for(size_t i=0; i < a->capacity; i++) {
-      if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i]))
+      if(!hte_equal(a->equalFunc, value_eq, &a->contents[i], &b->contents[i]))
         return false;
     }
   } else {
@@ -377,6 +429,18 @@ bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
   return true;
 }
 
+static bool eq_dontcare(const void *p, const void *q) {
+  return true;
+}
+
+/* Set equality of HHashSets.
+ * Obviously, 'a' and 'b' must use the same equality function.
+ * Not strictly necessary, but we also assume the same hash function.
+ */
+bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
+  return h_hashtable_equal(a, b, eq_dontcare);
+}
+
 bool h_eq_ptr(const void *p, const void *q) {
   return (p==q);
 }
@@ -388,11 +452,26 @@ HHashValue h_hash_ptr(const void *p) {
 }
 
 uint32_t h_djbhash(const uint8_t *buf, size_t len) {
-  uint32_t hash = 5381;
+  uint32_t h = 5381;
+
+  while (len >= 16) {
+    h = h * 33 + buf[0];  h = h * 33 + buf[1];
+    h = h * 33 + buf[2];  h = h * 33 + buf[3];
+    h = h * 33 + buf[4];  h = h * 33 + buf[5];
+    h = h * 33 + buf[6];  h = h * 33 + buf[7];
+    h = h * 33 + buf[8];  h = h * 33 + buf[9];
+    h = h * 33 + buf[10]; h = h * 33 + buf[11];
+    h = h * 33 + buf[12]; h = h * 33 + buf[13];
+    h = h * 33 + buf[14]; h = h * 33 + buf[15];
+    len -= 16;
+    buf += 16;
+  }
+
   while (len--) {
-    hash = hash * 33 + *buf++;
+    h = h * 33 + *buf++;
   }
-  return hash;
+
+  return h;
 }
 
 void h_symbol_put(HParseState *state, const char* key, void *value) {
diff --git a/src/glue.c b/src/glue.c
index 58fe4175d4fd326b62c76449449a74768605ca9e..79e106c8bd902d6fe13ac485781222d202ea9dcc 100644
--- a/src/glue.c
+++ b/src/glue.c
@@ -60,15 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
 }
 
 HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) {
-  HCountedArray *seq = h_carray_new(p->arena);
-
-  act_flatten_(seq, p->ast);
-
-  HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
-  res->token_type = TT_SEQUENCE;
-  res->seq = seq;
-  res->index = p->ast->index;
-  res->bit_offset = p->ast->bit_offset;
+  HParsedToken *res = h_make_seq(p->arena);
+  act_flatten_(res->seq, p->ast);
   return res;
 }
 
@@ -106,7 +99,7 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n)
   return ret;
 }
 
-HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len)
+HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len)
 {
   HParsedToken *ret = h_make_(arena, TT_BYTES);
   ret->bytes.len = len;
@@ -128,6 +121,20 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val)
   return ret;
 }
 
+HParsedToken *h_make_double(HArena *arena, double val)
+{
+  HParsedToken *ret = h_make_(arena, TT_DOUBLE);
+  ret->dbl = val;
+  return ret;
+}
+
+HParsedToken *h_make_float(HArena *arena, float val)
+{
+  HParsedToken *ret = h_make_(arena, TT_FLOAT);
+  ret->flt = val;
+  return ret;
+}
+
 // XXX -> internal
 HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
 {
diff --git a/src/glue.h b/src/glue.h
index 0bbfe9cfa26ec1bb6376ff23aa3b2d6cc3b4e873..08e5255ab2572d5d828943cd98331081ab02fc02 100644
--- a/src/glue.h
+++ b/src/glue.h
@@ -195,9 +195,11 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
 HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
 HParsedToken *h_make_seq(HArena *arena);  // Makes empty sequence.
 HParsedToken *h_make_seqn(HArena *arena, size_t n);  // Makes empty sequence of expected size n.
-HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len);
+HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len);
 HParsedToken *h_make_sint(HArena *arena, int64_t val);
 HParsedToken *h_make_uint(HArena *arena, uint64_t val);
+HParsedToken *h_make_double(HArena *arena, double val);
+HParsedToken *h_make_float(HArena *arena, float val);
 
 // Standard short-hands to make tokens in an action.
 #define H_MAKE(TYP, VAL)  h_make(p->arena, (HTokenType)TT_ ## TYP, VAL)
@@ -206,6 +208,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
 #define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN)
 #define H_MAKE_SINT(VAL)  h_make_sint(p->arena, VAL)
 #define H_MAKE_UINT(VAL)  h_make_uint(p->arena, VAL)
+#define H_MAKE_DOUBLE(VAL) h_make_double(p->arena, VAL)
+#define H_MAKE_FLOAT(VAL) h_make_float(p->arena, VAL)
 
 // Extract (cast) type-specific value back from HParsedTokens...
 
@@ -218,6 +222,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
 #define H_ASSERT_BYTES(TOK)  h_assert_type(TT_BYTES, TOK)
 #define H_ASSERT_SINT(TOK)   h_assert_type(TT_SINT, TOK)
 #define H_ASSERT_UINT(TOK)   h_assert_type(TT_UINT, TOK)
+#define H_ASSERT_DOUBLE(TOK) h_assert_type(TT_DOUBLE, TOK)
+#define H_ASSERT_FLOAT(TOK)  h_assert_type(TT_FLOAT, TOK)
 
 // Assert expected type and return contained value.
 #define H_CAST(TYP, TOK)   ((TYP *) H_ASSERT(TYP, TOK)->user)
@@ -225,6 +231,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
 #define H_CAST_BYTES(TOK)  (H_ASSERT_BYTES(TOK)->bytes)
 #define H_CAST_SINT(TOK)   (H_ASSERT_SINT(TOK)->sint)
 #define H_CAST_UINT(TOK)   (H_ASSERT_UINT(TOK)->uint)
+#define H_CAST_DOUBLE(TOK) (H_ASSERT_DOUBLE(TOK)->dbl)
+#define H_CAST_FLOAT(TOK)  (H_ASSERT_FLOAT(TOK)->flt)
 
 // Sequence access...
 
@@ -247,7 +255,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
 #define H_INDEX_BYTES(SEQ, ...)  H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
 #define H_INDEX_SINT(SEQ, ...)   H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
 #define H_INDEX_UINT(SEQ, ...)   H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
-#define H_INDEX_TOKEN(SEQ, ...)  h_seq_index_path(SEQ, __VA_ARGS__, -1)
+#define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
+#define H_INDEX_FLOAT(SEQ, ...)  H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
+#define H_INDEX_TOKEN(SEQ, ...)  h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1)
 
 // Standard short-hand to access and cast elements on a sequence token.
 #define H_FIELD(TYP, ...)  H_INDEX(TYP, p->ast, __VA_ARGS__)
@@ -255,6 +265,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
 #define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
 #define H_FIELD_SINT(...)  H_INDEX_SINT(p->ast, __VA_ARGS__)
 #define H_FIELD_UINT(...)  H_INDEX_UINT(p->ast, __VA_ARGS__)
+#define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__)
+#define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__)
+#define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__)
 
 // Lower-level helper for h_seq_index.
 HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
diff --git a/src/hammer.h b/src/hammer.h
index 2efa75b5d2e097982bc88b8fe41f66711db62cbf..ab5ea080d4f325b88d5c4014de57dc3da5687baf 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -75,6 +75,8 @@ typedef enum HTokenType_ {
   TT_BYTES = 2,
   TT_SINT = 4,
   TT_UINT = 8,
+  TT_DOUBLE = 12,
+  TT_FLOAT = 13,
   TT_SEQUENCE = 16,
   TT_RESERVED_1, // reserved for backend-specific internal use
   TT_ERR = 32,
@@ -381,7 +383,7 @@ HParseResult* h_parse_finish(HSuspendedParser* s);
  */
 HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
 
-#define h_literal(s) h_token(s, sizeof(s)-1)
+#define h_literal(s) h_token(((const uint8_t *)(s)), sizeof(s)-1)
 
 /**
  * Given a single character, returns a parser that parses that 
@@ -548,6 +550,15 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
  */
 HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_sequence, HParser* p);
 
+/**
+ * Given an `h_sequence` and a list of indices, returns a parser that parses the sequence 
+ * but returns it without the results at the dropped indices. If a negative integer appears
+ * in the middle of the list, this combinator will silently ignore the rest of the list.
+ *
+ * Result token type: TT_SEQUENCE
+ */
+#define h_drop_from(p, ...) h_drop_from_(p, __VA_ARGS__, -1)
+HAMMER_FN_DECL_VARARGS(HParser*, h_drop_from_, HParser* p);
 /**
  * Given an array of parsers, p_array, apply each parser in order. The 
  * first parser to succeed is the result; if no parsers succeed, the 
@@ -802,6 +813,32 @@ HAMMER_FN_DECL(HParser*, h_get_value, const char* name);
  */
 HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env);
 
+/**
+ * This parser skips 'n' bits of input.
+ *
+ * Result: None. The HParseResult exists but its AST is NULL.
+ */
+HAMMER_FN_DECL(HParser*, h_skip, size_t n);
+
+/**
+ * The HParser equivalent of fseek(), 'h_seek' modifies the parser's input
+ * position.  Note that contrary to 'fseek', offsets are in bits, not bytes.
+ * The 'whence' argument uses the same values and semantics: SEEK_SET,
+ * SEEK_CUR, SEEK_END.
+ *
+ * Fails if the new input position would be negative or past the end of input.
+ *
+ * Result: TT_UINT. The new input position.
+ */
+HAMMER_FN_DECL(HParser*, h_seek, ssize_t offset, int whence);
+
+/**
+ * Report the current position in bits. Consumes no input.
+ *
+ * Result: TT_UINT. The current input position.
+ */
+HAMMER_FN_DECL_NOARG(HParser*, h_tell);
+
 /**
  * Free the memory allocated to an HParseResult when it is no longer needed.
  */
@@ -814,17 +851,35 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
  */
 char* h_write_result_unamb(const HParsedToken* tok);
 /**
- * Format token to the given output stream. Indent starting at
- * [indent] spaces, with [delta] spaces between levels.
+ * Format token to the given output stream. Indent starting at [indent] spaces,
+ * with [delta] spaces between levels.
+ *
+ * Note: This function does not print a trailing newline. It also does not
+ * print any spaces to indent the initial line of output. This makes it
+ * suitable for recursive use in the condensed output of larger structures.
  */
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
+/**
+ * Format token to the given output. Print a trailing newline.
+ *
+ * This function assumes an initial indentation of 0 and uses 2 spaces between
+ * indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)'
+ * followed by 'fputc('\n', stream)' and is provided for convenience.
+ */
+void h_pprintln(FILE* stream, const HParsedToken* tok);
 
 /**
  * Build parse tables for the given parser backend. See the
  * documentation for the parser backend in question for information
  * about the [params] parameter, or just pass in NULL for the defaults.
  *
- * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
+ * Returns a nonzero value on error; 0 otherwise. Common return codes include:
+ *
+ *  -1: parser uses a combinator that is incompatible with the chosen backend.
+ *  -2: parser could not be compiled with the chosen parameters.
+ *  >0: unexpected internal errors.
+ *
+ * Consult each backend for details.
  */
 HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
 
@@ -881,7 +936,8 @@ HTokenType h_allocate_token_type(const char* name);
 /// Allocate a new token type with an unambiguous print function.
 HTokenType h_allocate_token_new(
     const char* name,
-    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf));
+    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
+    void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta));
 
 /// Get the token type associated with name. Returns -1 if name is unkown
 HTokenType h_get_token_type_number(const char* name);
diff --git a/src/internal.h b/src/internal.h
index df720151d58beb7c4ee0a22e1d2434f704c61477..03d6c8adb3bee5c7a4ab7cf701eb2c89122bcd1f 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -362,9 +362,16 @@ char * h_get_short_name_with_no_params(HAllocator *mm__,
                                        HParserBackend be, void *params);
 
 int64_t h_read_bits(HInputStream* state, int count, char signed_p);
+void h_skip_bits(HInputStream* state, size_t count);
+void h_seek_bits(HInputStream* state, size_t pos);
 static inline size_t h_input_stream_pos(HInputStream* state) {
+  assert(state->index < SIZE_MAX / 8);
   return state->index * 8 + state->bit_offset + state->margin;
 }
+static inline size_t h_input_stream_length(HInputStream *state) {
+  assert(state->length <= SIZE_MAX / 8);
+  return state->length * 8;
+}
 // need to decide if we want to make this public. 
 HParseResult* h_do_parse(const HParser* parser, HParseState *state);
 void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);
@@ -412,16 +419,22 @@ HSlist* h_slist_remove_all(HSlist *slist, const void* item);
 void h_slist_free(HSlist *slist);
 static inline bool h_slist_empty(const HSlist *sl) { return (sl->head == NULL); }
 
-HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc);
-void* h_hashtable_get(const HHashTable* ht, const void* key);
-void  h_hashtable_put(HHashTable* ht, const void* key, void* value);
-void  h_hashtable_update(HHashTable* dst, const HHashTable *src);
-void  h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
+HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc,
+                            HHashFunc hashFunc);
+void * h_hashtable_get_precomp(const HHashTable *ht, const void *key,
+                               HHashValue hashval);
+void * h_hashtable_get(const HHashTable *ht, const void *key);
+void   h_hashtable_put_precomp(HHashTable *ht, const void *key,
+                               void *value, HHashValue hashval);
+void   h_hashtable_put(HHashTable *ht, const void *key, void *value);
+void   h_hashtable_update(HHashTable *dst, const HHashTable *src);
+void   h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
                         HHashTable *dst, const HHashTable *src);
-int   h_hashtable_present(const HHashTable* ht, const void* key);
-void  h_hashtable_del(HHashTable* ht, const void* key);
-void  h_hashtable_free(HHashTable* ht);
-static inline bool h_hashtable_empty(const HHashTable* ht) { return (ht->used == 0); }
+int   h_hashtable_present(const HHashTable *ht, const void *key);
+void  h_hashtable_del(HHashTable *ht, const void *key);
+void  h_hashtable_free(HHashTable *ht);
+static inline bool h_hashtable_empty(const HHashTable *ht) { return (ht->used == 0); }
+bool h_hashtable_equal(const HHashTable *a, const HHashTable *b, HEqualFunc value_eq);
 
 typedef HHashTable HHashSet;
 #define h_hashset_new(a,eq,hash) h_hashtable_new(a,eq,hash)
@@ -480,6 +493,7 @@ typedef struct HTTEntry_ {
   const char* name;
   HTokenType value;
   void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf);
+  void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta);
 } HTTEntry;
 
 const HTTEntry* h_get_token_type_entry(HTokenType token_type);
diff --git a/src/parsers/bits.c b/src/parsers/bits.c
index be8f13f10a65f67e50d134c5f3557a1a7a209d62..2b977a27401610a071f0bac42b7404f828351eed 100644
--- a/src/parsers/bits.c
+++ b/src/parsers/bits.c
@@ -14,6 +14,9 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
     result->sint = h_read_bits(&state->input_stream, env_->length, true);
   else
     result->uint = h_read_bits(&state->input_stream, env_->length, false);
+  result->index = 0;
+  result->bit_length = 0;
+  result->bit_offset = 0;
   return make_result(state->arena, result);
 }
 
@@ -29,7 +32,7 @@ static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
   HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
   ret->token_type = TT_UINT;
 
-  if(signedp && (seq->elements[0]->uint & 128))
+  if(signedp && seq->used > 0 && (seq->elements[0]->uint & 128))
     ret->uint = -1; // all ones
 
   for(size_t i=0; i<seq->used; i++) {
diff --git a/src/parsers/ch.c b/src/parsers/ch.c
index 3da1091a4b71505aebdc6ed5b396084d12b1fde4..c878f9d08659a1cb6ae39f95b06c522c1a5a185f 100644
--- a/src/parsers/ch.c
+++ b/src/parsers/ch.c
@@ -8,6 +8,9 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
   if (c == r) {
     HParsedToken *tok = a_new(HParsedToken, 1);    
     tok->token_type = TT_UINT; tok->uint = r;
+    tok->index = 0;
+    tok->bit_length = 0;
+    tok->bit_offset = 0;
     return make_result(state->arena, tok);
   } else {
     return NULL;
diff --git a/src/parsers/charset.c b/src/parsers/charset.c
index a4b8c89c7daca326cf77ee9bf5c8ae4660884c56..01657386f68e9788e69635790df6efb600de4c94 100644
--- a/src/parsers/charset.c
+++ b/src/parsers/charset.c
@@ -10,6 +10,9 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
   if (charset_isset(cs, in)) {
     HParsedToken *tok = a_new(HParsedToken, 1);
     tok->token_type = TT_UINT; tok->uint = in;
+    tok->index = 0;
+    tok->bit_length = 0;
+    tok->bit_offset = 0;
     return make_result(state->arena, tok);    
   } else
     return NULL;
diff --git a/src/parsers/choice.c b/src/parsers/choice.c
index 90c3662b515babe4a69b0e24dc146ebe1d0a647d..69e4aee778977243594f0ffc124cb3931f4a8d03 100644
--- a/src/parsers/choice.c
+++ b/src/parsers/choice.c
@@ -164,5 +164,6 @@ HParser* h_choice__ma(HAllocator* mm__, void *args[]) {
   ret->vtable = &choice_vt; 
   ret->env = (void*)s;
   ret->backend = PB_MIN;
+  ret->desugared = NULL;
   return ret;
 }
diff --git a/src/parsers/end.c b/src/parsers/end.c
index 85499d9348cd1df6503428a55d7a2ab878d1ef63..35e4186d430d8b48fe5cd1e41552403d6f95e562 100644
--- a/src/parsers/end.c
+++ b/src/parsers/end.c
@@ -4,6 +4,8 @@ static HParseResult* parse_end(void *env, HParseState *state) {
   if (state->input_stream.index == state->input_stream.length) {
     HParseResult *ret = a_new(HParseResult, 1);
     ret->ast = NULL;
+    ret->bit_length = 0;
+    ret->arena = state->arena;
     return ret;
   } else {
     return NULL;
diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c
index bb6e8beb31cca3ff09a565171b4e554e07f2ffad..be614489cecfec6f30e4c2bfdd18c323be894446 100644
--- a/src/parsers/epsilon.c
+++ b/src/parsers/epsilon.c
@@ -5,6 +5,7 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
   HParseResult* res = a_new(HParseResult, 1);
   res->ast = NULL;
   res->arena = state->arena;
+  res->bit_length = 0;
   return res;
 }
 
diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c
index c56802ac0885fc11429925f353a516d622b88a9d..7eda13d23eecfc771eb82d40db90c81387be146f 100644
--- a/src/parsers/ignore.c
+++ b/src/parsers/ignore.c
@@ -8,6 +8,7 @@ static HParseResult* parse_ignore(void* env, HParseState* state) {
   HParseResult *res = a_new(HParseResult, 1);
   res->ast = NULL;
   res->arena = state->arena;
+  res->bit_length = 0;
   return res;
 }
 
diff --git a/src/parsers/many.c b/src/parsers/many.c
index 071e3fcd2d30ed35f4622962751ebc63bea3d37c..655dd1497667f44b3de6694a0109c6f58804167d 100644
--- a/src/parsers/many.c
+++ b/src/parsers/many.c
@@ -37,6 +37,9 @@ static HParseResult *parse_many(void* env, HParseState *state) {
   HParsedToken *res = a_new(HParsedToken, 1);
   res->token_type = TT_SEQUENCE;
   res->seq = seq;
+  res->index = 0;
+  res->bit_length = 0;
+  res->bit_offset = 0;
   return make_result(state->arena, res);
  err0:
   if (count >= env_->count) {
@@ -85,6 +88,7 @@ static HParsedToken *reshape_many(const HParseResult *p, void *user)
   res->seq = seq;
   res->index = p->ast->index;
   res->bit_offset = p->ast->bit_offset;
+  res->bit_length = p->bit_length;
   return res;
 }
 
@@ -92,22 +96,17 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
   // TODO: refactor this.
   HRepeat *repeat = (HRepeat*)env;
   if (!repeat->min_p) {
-    assert(!"Unreachable");
+    // count is an exact count.
+    assert(repeat->sep == NULL);
     HCFS_BEGIN_CHOICE() {
       HCFS_BEGIN_SEQ() {
-	for (size_t i = 0; i < repeat->count; i++) {
-	  if (i != 0 && repeat->sep != NULL)
-	    HCFS_DESUGAR(repeat->sep); // Should be ignored.
+	for (size_t i = 0; i < repeat->count; i++)
 	  HCFS_DESUGAR(repeat->p);
-	}
       } HCFS_END_SEQ();
     } HCFS_END_CHOICE();
     return;
   }
-  if(repeat->count > 1) {
-    assert_message(0, "'h_repeat_n' is not context-free, can't be desugared");
-    return;
-  }
+  assert(repeat->count <= 1);
 
   /* many(A) =>
          Ma  -> A Mar
diff --git a/src/parsers/optional.c b/src/parsers/optional.c
index 726606643056b103f9481cb882dadc19417dd607..6a2789e2d0a86a8c4e6b141825fd3abc075af5d0 100644
--- a/src/parsers/optional.c
+++ b/src/parsers/optional.c
@@ -9,6 +9,9 @@ static HParseResult* parse_optional(void* env, HParseState* state) {
   state->input_stream = bak;
   HParsedToken *ast = a_new(HParsedToken, 1);
   ast->token_type = TT_NONE;
+  ast->index = 0;
+  ast->bit_length = 0;
+  ast->bit_offset = 0;
   return make_result(state->arena, ast);
 }
 
diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h
index 9a3b6de3898b42336a84bfe565448c27315e29bb..ebc5f4b32992d77cae4e6b512d17e3a39729e14f 100644
--- a/src/parsers/parser_internal.h
+++ b/src/parsers/parser_internal.h
@@ -10,12 +10,13 @@
 #include "../backends/regex.h"
 #include "../backends/contextfree.h"
 
-#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
+#define a_new_(arena, typ, count) ((typ*)h_arena_malloc_noinit((arena), sizeof(typ)*(count)))
 #define a_new(typ, count) a_new_(state->arena, typ, count)
-// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out.
+#define a_new0_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count)))
+#define a_new0(typ, count) a_new0_(state->arena, typ, count)
 
 static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) {
-  HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
+  HParseResult *ret = h_arena_malloc_noinit(arena, sizeof(HParseResult));
   ret->ast = tok;
   ret->arena = arena;
   ret->bit_length = 0; // This way it gets overridden in h_do_parse
diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c
index b16758413eeafe2ce2ae91db2ebbe7593681d3cd..c40f99da2d52eea8dcbce93882d4aed7a0307360 100644
--- a/src/parsers/permutation.c
+++ b/src/parsers/permutation.c
@@ -89,6 +89,9 @@ static HParseResult *parse_permutation(void *env, HParseState *state)
     HParsedToken *tok = a_new(HParsedToken, 1);
     tok->token_type  = TT_SEQUENCE;
     tok->seq = seq;
+    tok->index = 0;
+    tok->bit_length = 0;
+    tok->bit_offset = 0;
     return make_result(state->arena, tok);
   } else {
     // no parse
@@ -176,5 +179,6 @@ HParser* h_permutation__ma(HAllocator* mm__, void *args[]) {
   ret->vtable = &permutation_vt; 
   ret->env = (void*)s;
   ret->backend = PB_MIN;
+  ret->desugared = NULL;
   return ret;
 }
diff --git a/src/parsers/seek.c b/src/parsers/seek.c
new file mode 100644
index 0000000000000000000000000000000000000000..d5bc02840f0145dc3089e82c11068359932b454c
--- /dev/null
+++ b/src/parsers/seek.c
@@ -0,0 +1,124 @@
+#include "parser_internal.h"
+
+typedef struct {
+  ssize_t offset;
+  int whence;
+} HSeek;
+
+static HParseResult *parse_skip(void *env, HParseState *state)
+{
+  size_t n = (uintptr_t)env;
+
+  h_skip_bits(&state->input_stream, n);
+  return make_result(state->arena, NULL);
+}
+
+static HParseResult *parse_seek(void *env, HParseState *state)
+{
+  HSeek *s = (HSeek *)env;
+  HInputStream *stream = &state->input_stream;
+  size_t pos;
+
+  /* determine base position */
+  switch (s->whence) {
+  case SEEK_SET:
+    pos = 0;
+    break;
+  case SEEK_END:
+    pos = h_input_stream_length(stream);
+    break;
+  case SEEK_CUR:
+    pos = h_input_stream_pos(stream);
+    break;
+  default:
+    return NULL;	/* invalid argument */
+  }
+
+  /* calculate target position and do basic overflow checks */
+  if (s->offset < 0 && (size_t)(- s->offset) > pos)
+    return NULL;	/* underflow */
+  if (s->offset > 0 && SIZE_MAX - s->offset < pos)
+    return NULL;	/* overflow */
+  pos += s->offset;
+
+  /* perform the seek and check for overrun */
+  h_seek_bits(stream, pos);
+  if (stream->overrun)
+    return NULL;
+
+  HParsedToken *tok = a_new(HParsedToken, 1);
+  tok->token_type = TT_UINT;
+  tok->uint = pos;
+  tok->index = 0;
+  tok->bit_length = 0;
+  tok->bit_offset = 0;
+  return make_result(state->arena, tok);
+}
+
+static HParseResult *parse_tell(void *env, HParseState *state)
+{
+  HParsedToken *tok = a_new(HParsedToken, 1);
+  tok->token_type = TT_UINT;
+  tok->uint = h_input_stream_pos(&state->input_stream);
+  tok->index = 0;
+  tok->bit_length = 0;
+  tok->bit_offset = 0;
+  return make_result(state->arena, tok);
+}
+
+static const HParserVtable skip_vt = {
+  .parse = parse_skip,
+  .isValidRegular = h_false,
+  .isValidCF = h_false,
+  .compile_to_rvm = h_not_regular,
+  .higher = false,
+};
+
+static const HParserVtable seek_vt = {
+  .parse = parse_seek,
+  .isValidRegular = h_false,
+  .isValidCF = h_false,
+  .compile_to_rvm = h_not_regular,
+  .higher = false,
+};
+
+static const HParserVtable tell_vt = {
+  .parse = parse_tell,
+  .isValidRegular = h_false,
+  .isValidCF = h_false,
+  .compile_to_rvm = h_not_regular,
+  .higher = false,
+};
+
+HParser* h_skip(size_t n)
+{
+  return h_skip__m(&system_allocator, n);
+}
+
+HParser *h_skip__m(HAllocator* mm__, size_t n)
+{
+  return h_new_parser(mm__, &skip_vt, (void *)n);
+}
+
+HParser* h_seek(ssize_t offset, int whence)
+{
+  return h_seek__m(&system_allocator, offset, whence);
+}
+
+HParser *h_seek__m(HAllocator* mm__, ssize_t offset, int whence)
+{
+  HSeek *env = h_new(HSeek, 1);
+  env->offset = offset;
+  env->whence = whence;
+  return h_new_parser(mm__, &seek_vt, env);
+}
+
+HParser *h_tell()
+{
+  return h_tell__m(&system_allocator);
+}
+
+HParser *h_tell__m(HAllocator* mm__)
+{
+  return h_new_parser(mm__, &tell_vt, NULL);
+}
diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c
index 55c0c8885573ef7779714efd49eaf64cc59ac878..2e7b4bc7286ec0ac32af012126e4289226297be0 100644
--- a/src/parsers/sequence.c
+++ b/src/parsers/sequence.c
@@ -22,6 +22,9 @@ static HParseResult* parse_sequence(void *env, HParseState *state) {
   }
   HParsedToken *tok = a_new(HParsedToken, 1);
   tok->token_type = TT_SEQUENCE; tok->seq = seq;
+  tok->index = 0;
+  tok->bit_offset = 0;
+  tok->bit_length = 0;
   return make_result(state->arena, tok);
 }
 
@@ -60,6 +63,7 @@ static HParsedToken *reshape_sequence(const HParseResult *p, void* user_data) {
   res->seq = seq;
   res->index = p->ast->index;
   res->bit_offset = p->ast->bit_offset;
+  res->bit_length = p->bit_length;
 
   return res;
 }
@@ -171,5 +175,88 @@ HParser* h_sequence__ma(HAllocator* mm__, void *args[]) {
   ret->vtable = &sequence_vt; 
   ret->env = (void*)s; 
   ret->backend = PB_MIN;
+  ret->desugared = NULL;
   return ret;
 }
+
+HParser* h_drop_from_(HParser* p, ...) {
+  assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser");
+  va_list ap;
+  va_start(ap, p);
+  HParser* ret = h_drop_from___mv(&system_allocator, p, ap);
+  va_end(ap);
+  return ret;
+}
+
+HParser* h_drop_from___m(HAllocator* mm__, HParser* p, ...) {
+  assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser");
+  va_list ap;
+  va_start(ap, p);
+  HParser* ret = h_drop_from___mv(mm__, p, ap);
+  va_end(ap);
+  return ret;
+}
+
+HParser* h_drop_from___v(HParser* p, va_list ap) {
+  assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser");
+  return h_drop_from___mv(&system_allocator, p, ap);
+}
+
+HParser* h_drop_from___mv(HAllocator* mm__, HParser *p, va_list ap) {
+  /* Ok, here's where things get funny.
+   *
+   * Saying `h_drop_from(h_sequence(a, b, c, d, e, NULL), 0, 4, -1)` is functionally
+   * equivalent to `h_sequence(h_ignore(a), b, c, d, h_ignore(e), NULL)`. Thus, this
+   * term rewrites itself, becoming an h_sequence where some parsers are ignored.
+   */
+  HSequence *s = (HSequence*)(p->env);
+  size_t indices[s->len];
+  size_t count = 0;
+  int arg = 0;
+  
+  for (arg = va_arg(ap, int); arg >= 0; arg = va_arg(ap, int)) {
+    indices[count] = arg;
+    count++;
+  }
+  va_end(ap);
+
+  HSequence *rewrite = h_new(HSequence, 1);
+  rewrite->p_array = h_new(HParser *, s->len);
+  rewrite->len = s->len;
+  for (size_t i=0, j=0; i<s->len; ++i) {
+    if (indices[j]==i) {
+      rewrite->p_array[i] = h_ignore(s->p_array[i]);
+      ++j;
+    } else {
+      rewrite->p_array[i] = s->p_array[i];
+    }
+  }
+  
+  return h_new_parser(mm__, &sequence_vt, rewrite);
+}
+
+HParser* h_drop_from___a(void *args[]) {
+  return h_drop_from___ma(&system_allocator, args);
+}
+
+HParser* h_drop_from___ma(HAllocator* mm__, void *args[]) {
+  HParser *p = (HParser*)(args[0]);
+  assert_message(p->vtable == &sequence_vt, "drop_from requires a sequence parser");
+  HSequence *s = (HSequence*)(p->env);
+  HSequence *rewrite = h_new(HSequence, 1);
+  rewrite->p_array = h_new(HParser *, s->len);
+  rewrite->len = s->len;
+
+  int i=0, *argp = (int*)(args[1]);
+  while (*argp >= 0) {
+    if (i == *argp) {
+      rewrite->p_array[i] = h_ignore(s->p_array[i]);
+      ++argp;
+    } else {
+      rewrite->p_array[i] = s->p_array[i];
+    }
+    ++i;
+  }
+
+  return h_new_parser(mm__, &sequence_vt, rewrite);
+}
diff --git a/src/parsers/token.c b/src/parsers/token.c
index 19029726ad11a52fa0eadf62b67a7b15cd2e4744..b589d58c60e39bb895395a2eedb48984a0b8669f 100644
--- a/src/parsers/token.c
+++ b/src/parsers/token.c
@@ -16,6 +16,9 @@ static HParseResult* parse_token(void *env, HParseState *state) {
   }
   HParsedToken *tok = a_new(HParsedToken, 1);
   tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
+  tok->index = 0;
+  tok->bit_offset = 0;
+  tok->bit_length = 0;
   return make_result(state->arena, tok);
 }
 
diff --git a/src/platform_bsdlike.c b/src/platform_bsdlike.c
index 2ccf874264a740e0784e8fba14e2ae78a337fa08..ffe1e64db4d1c0e2589160a40468c408f12a3fa6 100644
--- a/src/platform_bsdlike.c
+++ b/src/platform_bsdlike.c
@@ -1,4 +1,8 @@
+#ifdef __OpenBSD__
+#define _BSD_SOURCE // to obtain asprintf/vasprintf
+#else
 #define _GNU_SOURCE // to obtain asprintf/vasprintf
+#endif
 #include "platform.h"
 
 #include <stdio.h>
diff --git a/src/pprint.c b/src/pprint.c
index 52f42eb6060230a8bb608b8e5ab1eafb6ef1467c..5f6e1e2c5a6d98869be764218cc2e4f191c0e669 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -30,55 +30,80 @@ typedef struct pp_state {
   int at_bol;
 } pp_state_t;
 
+static void pprint_bytes(FILE *stream, const uint8_t *bs, size_t len)
+{
+    fprintf(stream, "\"");
+    for (size_t i = 0; i < len; i++) {
+      uint8_t c = bs[i];
+      if (c == '"' || c == '\\')
+        fprintf(stream, "\\%c", c);
+      else if (c >= 0x20 && c <= 0x7e)
+        fputc(c, stream);
+      else
+        fprintf(stream, "\\u00%02hhx", c);
+    }
+    fprintf(stream, "\"");
+}
+
 void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) {
+  if (tok == NULL) {
+    fprintf(stream, "(null)");
+    return;
+  }
   switch (tok->token_type) {
   case TT_NONE:
-    fprintf(stream, "%*snull\n", indent, "");
+    fprintf(stream, "null");
     break;
   case TT_BYTES:
-    if (tok->bytes.len == 0)
-      fprintf(stream, "%*s<>\n", indent, "");
-    else {
-      fprintf(stream, "%*s", indent, "");
-      for (size_t i = 0; i < tok->bytes.len; i++) {
-        fprintf(stream,
-                "%c%02hhx",
-                (i == 0) ? '<' : '.',
-                tok->bytes.token[i]);
-      }
-      fprintf(stream, ">\n");
-    }
+    pprint_bytes(stream, tok->bytes.token, tok->bytes.len);
     break;
   case TT_SINT:
-    if (tok->sint < 0)
-      fprintf(stream, "%*ss -%#" PRIx64 "\n", indent, "", -tok->sint);
-    else
-      fprintf(stream, "%*ss %#" PRIx64 "\n", indent, "", tok->sint);
-
+    fprintf(stream, "%" PRId64, tok->sint);
     break;
   case TT_UINT:
-    fprintf(stream, "%*su %#" PRIx64 "\n", indent, "", tok->uint);
+    fprintf(stream, "%" PRIu64, tok->uint);
     break;
-  case TT_SEQUENCE: {
-    fprintf(stream, "%*s[\n", indent, "");
-    for (size_t i = 0; i < tok->seq->used; i++) {
-      h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
-    }
-    fprintf(stream, "%*s]\n", indent, "");
-  }
+  case TT_DOUBLE:
+    fprintf(stream, "%f", tok->dbl);
     break;
-  case TT_USER:
-    fprintf(stream, "%*sUSER:%s\n", indent, "", h_get_token_type_name(tok->token_type));
+  case TT_FLOAT:
+    fprintf(stream, "%f", (double)tok->flt);
+    break;
+  case TT_SEQUENCE:
+    if (tok->seq->used == 0)
+      fprintf(stream, "[ ]");
+    else {
+      fprintf(stream, "[%*s", delta - 1, "");
+      for (size_t i = 0; i < tok->seq->used; i++) {
+	if (i > 0) fprintf(stream, "\n%*s,%*s", indent, "", delta - 1, "");
+        h_pprint(stream, tok->seq->elements[i], indent + delta, delta);
+      }
+      if (tok->seq->used > 2)
+        fprintf(stream, "\n%*s]", indent, "");
+      else
+        fprintf(stream, " ]");
+    }
     break;
   default:
-    if(tok->token_type > TT_USER) {
-      fprintf(stream, "%*sUSER:%s %d\n", indent, "", h_get_token_type_name(tok->token_type), tok->token_type-TT_USER);
-    } else {
-      assert_message(0, "Should not reach here.");
+    assert_message(tok->token_type >= TT_USER, "h_pprint: unhandled token type");
+    {
+      const HTTEntry *e = h_get_token_type_entry(tok->token_type);
+      fprintf(stream, "{ \"TT\":%d, \"N\":", (int)e->value);
+      pprint_bytes(stream, (uint8_t *)e->name, strlen(e->name));
+      if (e->pprint != NULL) {
+        fprintf(stream, ", \"V\":");
+        e->pprint(stream, tok, indent + delta, delta);
+      }
+      fprintf(stream, " }");
     }
   }
 }
 
+void h_pprintln(FILE* stream, const HParsedToken* tok) {
+  h_pprint(stream, tok, 0, 2);
+  fputc('\n', stream);
+}
+
 
 struct result_buf {
   char* output;
@@ -164,6 +189,12 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
   case TT_UINT:
     h_append_buf_formatted(buf, "u%#" PRIx64, tok->uint);
     break;
+  case TT_DOUBLE:
+    h_append_buf_formatted(buf, "d%a", tok->dbl);
+    break;
+  case TT_FLOAT:
+    h_append_buf_formatted(buf, "f%a", (double)tok->flt);
+    break;
   case TT_ERR:
     h_append_buf(buf, "ERR", 3);
     break;
@@ -202,6 +233,3 @@ char* h_write_result_unamb(const HParsedToken* tok) {
   h_append_buf_c(&buf, 0);
   return buf.output;
 }
-  
-
-
diff --git a/src/registry.c b/src/registry.c
index 00486db46ca6c1fdece03a051242f4f05ad23514..15cf41a6c6eeb1ec8b9849b06a1e1ead2a5d204a 100644
--- a/src/registry.c
+++ b/src/registry.c
@@ -15,7 +15,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  */
 
-#include <search.h>
 #include <stdlib.h>
 #include "hammer.h"
 #include "internal.h"
@@ -54,12 +53,14 @@ static void default_unamb_sub(const HParsedToken* tok,
 
 HTokenType h_allocate_token_new(
     const char* name,
-    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf)) {
+    void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
+    void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta)) {
   HTTEntry* new_entry = h_alloc(&system_allocator, sizeof(*new_entry));
   assert(new_entry != NULL);
   new_entry->name = name;
   new_entry->value = 0;
-  new_entry->unamb_sub = unamb_sub;
+  new_entry->unamb_sub = unamb_sub ? unamb_sub : default_unamb_sub;
+  new_entry->pprint = pprint;
   HTTEntry* probe = *(HTTEntry**)tsearch(new_entry, &tt_registry, compare_entries);
   if (probe->value != 0) {
     // Token type already exists...
@@ -86,7 +87,7 @@ HTokenType h_allocate_token_new(
   }
 }
 HTokenType h_allocate_token_type(const char* name) {
-  return h_allocate_token_new(name, default_unamb_sub);
+  return h_allocate_token_new(name, NULL, NULL);
 }
 HTokenType h_get_token_type_number(const char* name) {
   HTTEntry e;
diff --git a/src/system_allocator.c b/src/system_allocator.c
index 39a1a7e77040c865f2d4f99977eb264391286bb4..f6e9cdcbbe74fedea568a73ada868e93d83c0660 100644
--- a/src/system_allocator.c
+++ b/src/system_allocator.c
@@ -59,6 +59,8 @@ static void* system_realloc(HAllocator *allocator, void* uptr, size_t size) {
   if (!uptr) {
     return system_alloc(allocator, size);
   }
+  // XXX this is incorrect if size == 0 and BLOCK_HEADER_SIZE != 0; it fails
+  // to behave like free(3)
   void* block = realloc(block_for_user_ptr(uptr), block_size(size));
   if (!block) {
     return NULL;
@@ -66,6 +68,7 @@ static void* system_realloc(HAllocator *allocator, void* uptr, size_t size) {
   uptr = user_ptr(block);
 
 #ifdef DEBUG__MEMFILL
+  // XXX this is the wrong block; this is reading uninitialized memory
   size_t old_size = ((HDebugBlockHeader*)block)->size;
   if (size > old_size)
     memset((char*)uptr+old_size, DEBUG__MEMFILL, size - old_size);
diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c
index 6b9b7051fa480b47e9cf173e29d865bdbc4a8943..0d2a8c0fde798d90ab60a9663c34c6744be11697 100644
--- a/src/t_bitwriter.c
+++ b/src/t_bitwriter.c
@@ -32,6 +32,8 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) {
   for (i = 0; data[i].nbits; i++) {
     g_check_cmp_uint64((uint64_t)h_read_bits(&input, data[i].nbits, FALSE), ==,  data[i].data);
   }
+
+  h_bit_writer_free(w);
 }
 
 static void test_bitwriter_ints(void) {
diff --git a/src/t_grammar.c b/src/t_grammar.c
index 0287b2fe6eda00a1d6575e619161d18ca9f20639..65812ea7dbd346f0833183a6d51b13c923126ad5 100644
--- a/src/t_grammar.c
+++ b/src/t_grammar.c
@@ -12,6 +12,8 @@ static void test_end(void) {
   g_check_hashtable_size(g->geneps, 0);
 
   g_check_derives_epsilon_not(g, p);
+
+  h_cfgrammar_free(g);
 }
 
 static void test_example_1(void) {
@@ -35,6 +37,8 @@ static void test_example_1(void) {
   g_check_followset_absent(1, g, c, "$");
   g_check_followset_absent(1, g, c, "x");
   g_check_followset_present(1, g, c, "y");
+
+  h_cfgrammar_free(g);
 }
 
 void register_grammar_tests(void) {
diff --git a/src/t_parser.c b/src/t_parser.c
index 331d2629018b40717bf49309ba0b561ce7a618a3..356c38f1674d6d3f90e3b0da672646455437f7a2 100644
--- a/src/t_parser.c
+++ b/src/t_parser.c
@@ -27,6 +27,44 @@ static void test_ch_range(gconstpointer backend) {
   g_check_parse_failed(range_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
 }
 
+static void test_bits0(gconstpointer backend) {
+  const HParser *bits0_;
+
+  bits0_ = h_bits(0, false);
+  g_check_parse_match(bits0_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "u0");
+  bits0_ = h_bits(0, true);
+  g_check_parse_match(bits0_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "s0");
+
+  bits0_ = h_sequence(h_bits(0, false), h_ch('a'), NULL);
+  g_check_parse_match(bits0_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0 u0x61)");
+  bits0_ = h_sequence(h_bits(0, true), h_ch('a'), NULL);
+  g_check_parse_match(bits0_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(s0 u0x61)");
+}
+
+static void test_bits(gconstpointer backend) {
+  const HParser *bits_;
+
+  bits_ = h_bits(3, false);
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\0", 1, "u0");
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\xff", 1, "u0x7");
+  g_check_parse_failed(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0);
+
+  bits_ = h_bits(3, true);
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\0", 1, "s0");
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\xff", 1, "s-0x1");
+  g_check_parse_failed(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0);
+
+  bits_ = h_bits(9, false);
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\0\0", 2, "u0");
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\xff\xff", 2, "u0x1ff");
+  g_check_parse_failed(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1);
+
+  bits_ = h_bits(9, true);
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\0\0", 2, "s0");
+  g_check_parse_match(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "\xff\xff", 2, "s-0x1");
+  g_check_parse_failed(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1);
+}
+
 //@MARK_START
 static void test_int64(gconstpointer backend) {
   const HParser *int64_ = h_int64();
@@ -91,6 +129,29 @@ static void test_uint8(gconstpointer backend) {
 }
 //@MARK_END
 
+// XXX implement h_double() and h_float(). these just test the pretty-printer...
+static HParsedToken *act_double(const HParseResult *p, void *u) {
+  return H_MAKE_DOUBLE((double)H_FIELD_UINT(0) + (double)H_FIELD_UINT(1)/10);
+}
+static void test_double(gconstpointer backend) {
+  HParser *b = h_uint8();
+  HParser *dbl = h_action(h_sequence(b, b, NULL), act_double, NULL);
+  uint8_t input[] = {4,2};
+
+  g_check_parse_match(dbl, (HParserBackend)GPOINTER_TO_INT(backend), input, 2, "d0x1.0cccccccccccdp+2");
+}
+
+static HParsedToken *act_float(const HParseResult *p, void *u) {
+  return H_MAKE_FLOAT((float)H_FIELD_UINT(0) + (float)H_FIELD_UINT(1)/10);
+}
+static void test_float(gconstpointer backend) {
+  HParser *b = h_uint8();
+  HParser *flt = h_action(h_sequence(b, b, NULL), act_float, NULL);
+  uint8_t input[] = {4,2};
+
+  g_check_parse_match(flt, (HParserBackend)GPOINTER_TO_INT(backend), input, 2, "f0x1.0cccccp+2");
+}
+
 static void test_int_range(gconstpointer backend) {
   const HParser *int_range_ = h_int_range(h_uint8(), 3, 10);
   
@@ -167,12 +228,12 @@ HParsedToken* upcase(const HParseResult *p, void* user_data) {
   switch(p->ast->token_type) {
   case TT_SEQUENCE:
     {
-      HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
+      HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1);
       HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used);
       ret->token_type = TT_SEQUENCE;
       for (size_t i=0; i<p->ast->seq->used; ++i) {
 	if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) {
-	  HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1);
+	  HParsedToken *tmp = a_new0_(p->arena, HParsedToken, 1);
 	  tmp->token_type = TT_UINT;
 	  tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint);
 	  h_carray_append(seq, tmp);
@@ -185,7 +246,7 @@ HParsedToken* upcase(const HParseResult *p, void* user_data) {
     }
   case TT_UINT:
     {
-      HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
+      HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1);
       ret->token_type = TT_UINT;
       ret->uint = toupper(p->ast->uint);
       return ret;
@@ -636,7 +697,7 @@ static void test_endianness(gconstpointer backend) {
 }
 
 HParsedToken* act_get(const HParseResult *p, void* user_data) {
-  HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
+  HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1);
   ret->token_type = TT_UINT;
   ret->uint = 3 * (1 << p->ast->uint);
   return ret;
@@ -743,10 +804,89 @@ static void test_bind(gconstpointer backend) {
   g_check_parse_failed(p, be, "272{", 4);
 }
 
+static void test_skip(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+  const HParser *p, *p_le, *p_be;
+
+  p = h_sequence(h_ch('a'), h_skip(32), h_ch('f'), NULL);
+  g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x66)");
+  g_check_parse_failed(p, be, "abcdex", 6);
+  g_check_parse_failed(p, be, "abc", 3);
+
+  p = h_sequence(h_ch('a'), h_skip(32), h_end_p(), NULL);
+  g_check_parse_match(p, be, "abcde", 5, "(u0x61)");
+
+  p = h_sequence(h_ch('a'), h_skip(3), h_ch('\0'), h_skip(5), h_ch('b'), NULL);
+  g_check_parse_match(p, be, "a\xe0\x1f\x62", 4, "(u0x61 u0 u0x62)"); // big-endian
+  p_le = h_with_endianness(BYTE_LITTLE_ENDIAN|BIT_LITTLE_ENDIAN, p);
+  p_be = h_with_endianness(BYTE_LITTLE_ENDIAN|BIT_BIG_ENDIAN, p);
+  g_check_parse_match(p_be, be, "a\xe0\x1f\x62", 4, "(u0x61 u0 u0x62)");
+  g_check_parse_match(p_le, be, "a\x07\xf8\x62", 4, "(u0x61 u0 u0x62)");
+
+  p = h_sequence(h_ch('a'), h_skip(3), h_ch('\0'), h_skip(5), h_end_p(), NULL);
+  g_check_parse_match(p, be, "a\xe0\x1f", 3, "(u0x61 u0)"); // big-endian
+}
+
+static void test_tell(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+  const HParser *p;
+
+  p = h_sequence(h_ch('a'), h_ch('b'), h_tell(), h_end_p(), NULL);
+  g_check_parse_match(p, be, "ab", 2, "(u0x61 u0x62 u0x10)");
+  g_check_parse_failed(p, be, "abc", 1);
+  g_check_parse_failed(p, be, "a", 1);
+}
+
+static void test_seek(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+  const HParser *p;
+
+  p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_ch('f'), NULL);
+  g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x28 u0x66)");
+  g_check_parse_failed(p, be, "abcdex", 6);
+  g_check_parse_failed(p, be, "abc", 3);
+
+  p = h_sequence(h_ch('a'), h_seek(40, SEEK_SET), h_end_p(), NULL);
+  g_check_parse_match(p, be, "abcde", 5, "(u0x61 u0x28)");
+  g_check_parse_failed(p, be, "abcdex", 6);
+  g_check_parse_failed(p, be, "abc", 3);
+
+  p = h_sequence(h_ch('a'), h_seek(0, SEEK_END), h_end_p(), NULL);
+  g_check_parse_match(p, be, "abcde", 5, "(u0x61 u0x28)");
+  g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x18)");
+
+  p = h_sequence(h_ch('a'), h_seek(-16, SEEK_END), h_ch('x'), NULL);
+  g_check_parse_match(p, be, "abcdxy", 6, "(u0x61 u0x20 u0x78)");
+  g_check_parse_match(p, be, "abxy", 4, "(u0x61 u0x10 u0x78)");
+  g_check_parse_failed(p, be, "abc", 3);
+  g_check_parse_failed(p, be, "x", 1);
+
+  p = h_sequence(h_ch('a'), h_seek(32, SEEK_CUR), h_ch('f'), NULL);
+  g_check_parse_match(p, be, "abcdef", 6, "(u0x61 u0x28 u0x66)");
+  g_check_parse_failed(p, be, "xbcdef", 6);
+  g_check_parse_failed(p, be, "abcdex", 6);
+  g_check_parse_failed(p, be, "abc", 3);
+}
+
+static void test_drop_from(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+  HParser *p, *q, *r, *seq;
+
+  seq = h_sequence(h_ch('a'), h_ch('b'), h_ch('c'), h_ch('d'), h_ch('e'), NULL);
+  p = h_drop_from(seq, 0, 4);
+  g_check_parse_match(p, be, "abcde", 5, "(u0x62 u0x63 u0x64)");
+  //q = h_drop_from(seq, 1, 2, -1);
+  //g_check_parse_match(q, be, "abcde", 5, "(u0x61 u0x64 u0x65)");
+  //r = h_drop_from(seq, 0, 1, 3, 4, -1);
+  //g_check_parse_match(r, be, "abcde", 5, "(u0x63)");
+}
+
 void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
   g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
   g_test_add_data_func("/core/parser/packrat/ch_range", GINT_TO_POINTER(PB_PACKRAT), test_ch_range);
+  g_test_add_data_func("/core/parser/packrat/bits0", GINT_TO_POINTER(PB_PACKRAT), test_bits0);
+  g_test_add_data_func("/core/parser/packrat/bits", GINT_TO_POINTER(PB_PACKRAT), test_bits);
   g_test_add_data_func("/core/parser/packrat/int64", GINT_TO_POINTER(PB_PACKRAT), test_int64);
   g_test_add_data_func("/core/parser/packrat/int32", GINT_TO_POINTER(PB_PACKRAT), test_int32);
   g_test_add_data_func("/core/parser/packrat/int16", GINT_TO_POINTER(PB_PACKRAT), test_int16);
@@ -756,10 +896,8 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/uint16", GINT_TO_POINTER(PB_PACKRAT), test_uint16);
   g_test_add_data_func("/core/parser/packrat/uint8", GINT_TO_POINTER(PB_PACKRAT), test_uint8);
   g_test_add_data_func("/core/parser/packrat/int_range", GINT_TO_POINTER(PB_PACKRAT), test_int_range);
-#if 0
-  g_test_add_data_func("/core/parser/packrat/float64", GINT_TO_POINTER(PB_PACKRAT), test_float64);
-  g_test_add_data_func("/core/parser/packrat/float32", GINT_TO_POINTER(PB_PACKRAT), test_float32);
-#endif
+  g_test_add_data_func("/core/parser/packrat/double", GINT_TO_POINTER(PB_PACKRAT), test_double);
+  g_test_add_data_func("/core/parser/packrat/float", GINT_TO_POINTER(PB_PACKRAT), test_float);
   g_test_add_data_func("/core/parser/packrat/whitespace", GINT_TO_POINTER(PB_PACKRAT), test_whitespace);
   g_test_add_data_func("/core/parser/packrat/left", GINT_TO_POINTER(PB_PACKRAT), test_left);
   g_test_add_data_func("/core/parser/packrat/right", GINT_TO_POINTER(PB_PACKRAT), test_right);
@@ -795,10 +933,16 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind);
   g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length);
   //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position);
+  g_test_add_data_func("/core/parser/packrat/skip", GINT_TO_POINTER(PB_PACKRAT), test_skip);
+  g_test_add_data_func("/core/parser/packrat/seek", GINT_TO_POINTER(PB_PACKRAT), test_seek);
+  g_test_add_data_func("/core/parser/packrat/tell", GINT_TO_POINTER(PB_PACKRAT), test_tell);
+  g_test_add_data_func("/core/parser/packrat/drop_from", GINT_TO_POINTER(PB_PACKRAT), test_drop_from);
 
   g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
   g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
   g_test_add_data_func("/core/parser/llk/ch_range", GINT_TO_POINTER(PB_LLk), test_ch_range);
+  g_test_add_data_func("/core/parser/llk/bits0", GINT_TO_POINTER(PB_LLk), test_bits0);
+  //g_test_add_data_func("/core/parser/llk/bits", GINT_TO_POINTER(PB_LLk), test_bits);
   g_test_add_data_func("/core/parser/llk/int64", GINT_TO_POINTER(PB_LLk), test_int64);
   g_test_add_data_func("/core/parser/llk/int32", GINT_TO_POINTER(PB_LLk), test_int32);
   g_test_add_data_func("/core/parser/llk/int16", GINT_TO_POINTER(PB_LLk), test_int16);
@@ -808,10 +952,8 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/llk/uint16", GINT_TO_POINTER(PB_LLk), test_uint16);
   g_test_add_data_func("/core/parser/llk/uint8", GINT_TO_POINTER(PB_LLk), test_uint8);
   g_test_add_data_func("/core/parser/llk/int_range", GINT_TO_POINTER(PB_LLk), test_int_range);
-#if 0
-  g_test_add_data_func("/core/parser/llk/float64", GINT_TO_POINTER(PB_LLk), test_float64);
-  g_test_add_data_func("/core/parser/llk/float32", GINT_TO_POINTER(PB_LLk), test_float32);
-#endif
+  g_test_add_data_func("/core/parser/llk/double", GINT_TO_POINTER(PB_LLk), test_double);
+  g_test_add_data_func("/core/parser/llk/float", GINT_TO_POINTER(PB_LLk), test_float);
   g_test_add_data_func("/core/parser/llk/whitespace", GINT_TO_POINTER(PB_LLk), test_whitespace);
   g_test_add_data_func("/core/parser/llk/left", GINT_TO_POINTER(PB_LLk), test_left);
   g_test_add_data_func("/core/parser/llk/right", GINT_TO_POINTER(PB_LLk), test_right);
@@ -825,6 +967,7 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/llk/choice", GINT_TO_POINTER(PB_LLk), test_choice);
   g_test_add_data_func("/core/parser/llk/many", GINT_TO_POINTER(PB_LLk), test_many);
   g_test_add_data_func("/core/parser/llk/many1", GINT_TO_POINTER(PB_LLk), test_many1);
+  g_test_add_data_func("/core/parser/llk/repeat_n", GINT_TO_POINTER(PB_LLk), test_repeat_n);
   g_test_add_data_func("/core/parser/llk/optional", GINT_TO_POINTER(PB_LLk), test_optional);
   g_test_add_data_func("/core/parser/llk/sepBy", GINT_TO_POINTER(PB_LLk), test_sepBy);
   g_test_add_data_func("/core/parser/llk/sepBy1", GINT_TO_POINTER(PB_LLk), test_sepBy1);
@@ -838,10 +981,13 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative);
   g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead);
   g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length);
+  g_test_add_data_func("/core/parser/llk/drop_from", GINT_TO_POINTER(PB_LLk), test_drop_from);
 
   g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token);
   g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch);
   g_test_add_data_func("/core/parser/regex/ch_range", GINT_TO_POINTER(PB_REGULAR), test_ch_range);
+  g_test_add_data_func("/core/parser/regex/bits0", GINT_TO_POINTER(PB_REGULAR), test_bits0);
+  //g_test_add_data_func("/core/parser/regex/bits", GINT_TO_POINTER(PB_REGULAR), test_bits);
   g_test_add_data_func("/core/parser/regex/int64", GINT_TO_POINTER(PB_REGULAR), test_int64);
   g_test_add_data_func("/core/parser/regex/int32", GINT_TO_POINTER(PB_REGULAR), test_int32);
   g_test_add_data_func("/core/parser/regex/int16", GINT_TO_POINTER(PB_REGULAR), test_int16);
@@ -850,11 +996,9 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32);
   g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16);
   g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8);
-#if 0
-  g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range);
-  g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64);
-  g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32);
-#endif
+  //g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range);
+  g_test_add_data_func("/core/parser/regex/double", GINT_TO_POINTER(PB_REGULAR), test_double);
+  g_test_add_data_func("/core/parser/regex/float", GINT_TO_POINTER(PB_REGULAR), test_float);
   g_test_add_data_func("/core/parser/regex/whitespace", GINT_TO_POINTER(PB_REGULAR), test_whitespace);
   g_test_add_data_func("/core/parser/regex/left", GINT_TO_POINTER(PB_REGULAR), test_left);
   g_test_add_data_func("/core/parser/regex/right", GINT_TO_POINTER(PB_REGULAR), test_right);
@@ -877,10 +1021,13 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore);
   g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length);
   g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position);
+  g_test_add_data_func("/core/parser/regex/drop_from", GINT_TO_POINTER(PB_REGULAR), test_drop_from);
 
   g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token);
   g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch);
   g_test_add_data_func("/core/parser/lalr/ch_range", GINT_TO_POINTER(PB_LALR), test_ch_range);
+  g_test_add_data_func("/core/parser/lalr/bits0", GINT_TO_POINTER(PB_LALR), test_bits0);
+  //g_test_add_data_func("/core/parser/lalr/bits", GINT_TO_POINTER(PB_LALR), test_bits);
   g_test_add_data_func("/core/parser/lalr/int64", GINT_TO_POINTER(PB_LALR), test_int64);
   g_test_add_data_func("/core/parser/lalr/int32", GINT_TO_POINTER(PB_LALR), test_int32);
   g_test_add_data_func("/core/parser/lalr/int16", GINT_TO_POINTER(PB_LALR), test_int16);
@@ -890,10 +1037,8 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/lalr/uint16", GINT_TO_POINTER(PB_LALR), test_uint16);
   g_test_add_data_func("/core/parser/lalr/uint8", GINT_TO_POINTER(PB_LALR), test_uint8);
   g_test_add_data_func("/core/parser/lalr/int_range", GINT_TO_POINTER(PB_LALR), test_int_range);
-#if 0
-  g_test_add_data_func("/core/parser/lalr/float64", GINT_TO_POINTER(PB_LALR), test_float64);
-  g_test_add_data_func("/core/parser/lalr/float32", GINT_TO_POINTER(PB_LALR), test_float32);
-#endif
+  g_test_add_data_func("/core/parser/lalr/double", GINT_TO_POINTER(PB_LALR), test_double);
+  g_test_add_data_func("/core/parser/lalr/float", GINT_TO_POINTER(PB_LALR), test_float);
   g_test_add_data_func("/core/parser/lalr/whitespace", GINT_TO_POINTER(PB_LALR), test_whitespace);
   g_test_add_data_func("/core/parser/lalr/left", GINT_TO_POINTER(PB_LALR), test_left);
   g_test_add_data_func("/core/parser/lalr/right", GINT_TO_POINTER(PB_LALR), test_right);
@@ -907,6 +1052,7 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/lalr/choice", GINT_TO_POINTER(PB_LALR), test_choice);
   g_test_add_data_func("/core/parser/lalr/many", GINT_TO_POINTER(PB_LALR), test_many);
   g_test_add_data_func("/core/parser/lalr/many1", GINT_TO_POINTER(PB_LALR), test_many1);
+  g_test_add_data_func("/core/parser/lalr/repeat_n", GINT_TO_POINTER(PB_LALR), test_repeat_n);
   g_test_add_data_func("/core/parser/lalr/optional", GINT_TO_POINTER(PB_LALR), test_optional);
   g_test_add_data_func("/core/parser/lalr/sepBy", GINT_TO_POINTER(PB_LALR), test_sepBy);
   g_test_add_data_func("/core/parser/lalr/sepBy1", GINT_TO_POINTER(PB_LALR), test_sepBy1);
@@ -921,10 +1067,13 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative);
   g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead);
   g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length);
+  g_test_add_data_func("/core/parser/lalr/drop_from", GINT_TO_POINTER(PB_LALR), test_drop_from);
 
   g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token);
   g_test_add_data_func("/core/parser/glr/ch", GINT_TO_POINTER(PB_GLR), test_ch);
   g_test_add_data_func("/core/parser/glr/ch_range", GINT_TO_POINTER(PB_GLR), test_ch_range);
+  g_test_add_data_func("/core/parser/glr/bits0", GINT_TO_POINTER(PB_GLR), test_bits0);
+  //g_test_add_data_func("/core/parser/glr/bits", GINT_TO_POINTER(PB_GLR), test_bits);
   g_test_add_data_func("/core/parser/glr/int64", GINT_TO_POINTER(PB_GLR), test_int64);
   g_test_add_data_func("/core/parser/glr/int32", GINT_TO_POINTER(PB_GLR), test_int32);
   g_test_add_data_func("/core/parser/glr/int16", GINT_TO_POINTER(PB_GLR), test_int16);
@@ -934,10 +1083,8 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/glr/uint16", GINT_TO_POINTER(PB_GLR), test_uint16);
   g_test_add_data_func("/core/parser/glr/uint8", GINT_TO_POINTER(PB_GLR), test_uint8);
   g_test_add_data_func("/core/parser/glr/int_range", GINT_TO_POINTER(PB_GLR), test_int_range);
-#if 0
-  g_test_add_data_func("/core/parser/glr/float64", GINT_TO_POINTER(PB_GLR), test_float64);
-  g_test_add_data_func("/core/parser/glr/float32", GINT_TO_POINTER(PB_GLR), test_float32);
-#endif
+  g_test_add_data_func("/core/parser/glr/double", GINT_TO_POINTER(PB_GLR), test_double);
+  g_test_add_data_func("/core/parser/glr/float", GINT_TO_POINTER(PB_GLR), test_float);
   g_test_add_data_func("/core/parser/glr/whitespace", GINT_TO_POINTER(PB_GLR), test_whitespace);
   g_test_add_data_func("/core/parser/glr/left", GINT_TO_POINTER(PB_GLR), test_left);
   g_test_add_data_func("/core/parser/glr/right", GINT_TO_POINTER(PB_GLR), test_right);
@@ -951,6 +1098,7 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/glr/choice", GINT_TO_POINTER(PB_GLR), test_choice);
   g_test_add_data_func("/core/parser/glr/many", GINT_TO_POINTER(PB_GLR), test_many);
   g_test_add_data_func("/core/parser/glr/many1", GINT_TO_POINTER(PB_GLR), test_many1);
+  g_test_add_data_func("/core/parser/glr/repeat_n", GINT_TO_POINTER(PB_GLR), test_repeat_n);
   g_test_add_data_func("/core/parser/glr/optional", GINT_TO_POINTER(PB_GLR), test_optional);
   g_test_add_data_func("/core/parser/glr/sepBy", GINT_TO_POINTER(PB_GLR), test_sepBy);
   g_test_add_data_func("/core/parser/glr/sepBy1", GINT_TO_POINTER(PB_GLR), test_sepBy1);
@@ -963,4 +1111,5 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous);
   g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length);
   g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position);
+  g_test_add_data_func("/core/parser/glr/drop_from", GINT_TO_POINTER(PB_GLR), test_drop_from);
 }
diff --git a/src/t_regression.c b/src/t_regression.c
index 3dfe5dce12d0d9c9306ede01d5daf3a45ebab488..2c28b99efe6a36e69e5831044dadcbcc381f4d18 100644
--- a/src/t_regression.c
+++ b/src/t_regression.c
@@ -187,12 +187,291 @@ static void test_charset_bits(void) {
         .free = NULL,
     };
     test_charset_bits__buf[32] = 0xAB;
-    HCharset cs = new_charset(&alloc);
+    new_charset(&alloc);
     for(size_t i=0; i<32; i++)
         g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0);
     g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB);
 }
 
+
+// Allocator for reproducing error 19.
+
+// The bug is a result of uninitialized data being used, initially
+// assumed to be zero.  Unfortunately, this assumption is often true,
+// so reproducing the bug reliably and in a minimal fashion requires
+// making it false.  Fortunately, glibc malloc has an M_PERTURB option
+// for making that assumption false.  Unfortunately, we want the test
+// to reproduce the bug on systems that don't use glibc.  Fortunately,
+// the standard Hammer system allocator has a DEBUG__MEMFILL option to
+// fill uninitialized memory with a fill byte.  Unfortunately, you
+// have to recompile Hammer with that symbol #defined in order to
+// enable it.  Fortunately, hammer allows you to supply your own
+// allocator.  So this is a simple non-#define-dependent allocator
+// that writes 0xbabababaâ€  over all the memory it allocates.  (But not
+// the memory it reallocs, because, as it happens, the uninitialized
+// memory in this case didn't come from a realloc.)
+//
+// Honestly I think we ought to remove the #ifdefs from
+// system_allocator and always compile both the DEBUG__MEMFILL version
+// and the non-DEBUG__MEMFILL version, merely changing which one is
+// system_allocator, which is after all a struct of three pointers
+// that can even be modified at run-time.
+//
+// â€  Can you hear it, Mr. Toot?
+
+static void* deadbeefing_malloc(HAllocator *allocator, size_t size) {
+    char *block = malloc(size);
+    if (block) memset(block, 0xba, size);
+    return block;
+}
+
+// Don't deadbeef on realloc because it isn't necessary to reproduce this bug.
+static void* deadbeefing_realloc(HAllocator *allocator, void *uptr, size_t size) {
+    return realloc(uptr, size);
+}
+
+static void deadbeefing_free(HAllocator *allocator, void *uptr) {
+    free(uptr);
+}
+
+static HAllocator deadbeefing_allocator = {
+    .alloc = deadbeefing_malloc,
+    .realloc = deadbeefing_realloc,
+    .free = deadbeefing_free,
+};
+
+static void test_bug_19() {
+    void *args[] = {
+        h_ch_range__m(&deadbeefing_allocator, '0', '9'),
+        h_ch_range__m(&deadbeefing_allocator, 'A', 'Z'),
+        h_ch_range__m(&deadbeefing_allocator, 'a', 'z'),
+        NULL,
+    };
+
+    HParser *parser = h_choice__ma(&deadbeefing_allocator, args);
+
+    // In bug 19 ("GLR backend reaches unreachable code"), this call
+    // would fail because h_choice__ma allocated an HParser with h_new
+    // and didn't initialize its ->desugared field; consequently in
+    // the call chain h_compile ... h_lalr_compile ... h_desugar,
+    // h_desugar would find that ->desugared was already non-NULL (set
+    // to 0xbabababa in the above deadbeefing_malloc), and just return
+    // it, leading to a crash immediately afterwards in collect_nts.
+    // We don't actually care if the compile succeeds or fails, just
+    // that it doesn't crash.
+    h_compile(parser, PB_GLR, NULL);
+
+    // The same bug happened in h_sequence__ma.
+    h_compile(h_sequence__ma(&deadbeefing_allocator, args), PB_GLR, NULL);
+
+    // It also exists in h_permutation__ma, but it doesn't happen to
+    // manifest in the same way.  I don't know how to write a test for
+    // the h_permutation__ma case.
+    g_assert_true(1);
+}
+
+static void test_flatten_null() {
+  // h_act_flatten() produces a flat sequence from a nested sequence. it also
+  // hapens to produce a one-element sequence when given a non-sequence token.
+  // but given a null token (as from h_epsilon_p() or h_ignore()), it would
+  // previously segfault.
+  //
+  // let's make sure the behavior is consistent and a singular null token
+  // produces the same thing as a sequence around h_epsilon_p() or h_ignore().
+
+  HParser *A = h_many(h_ch('a'));
+  HParser *B = h_ch('b');
+  HParser *C = h_sequence(h_ch('c'), NULL);
+
+  HParser *V = h_action(h_epsilon_p(), h_act_flatten, NULL);
+  HParser *W = h_action(B, h_act_flatten, NULL);
+  HParser *X = h_action(h_sequence(h_ignore(A), NULL), h_act_flatten, NULL);
+  HParser *Y = h_action(h_sequence(h_epsilon_p(), NULL), h_act_flatten, NULL);
+  HParser *Z = h_action(h_sequence(A, B, C, NULL), h_act_flatten, NULL);
+
+  g_check_parse_match(V, PB_PACKRAT, "", 0, "()");
+  g_check_parse_match(W, PB_PACKRAT, "b", 1, "(u0x62)");
+  g_check_parse_match(X, PB_PACKRAT, "", 0, "()");
+  g_check_parse_match(Y, PB_PACKRAT, "", 0, "()");
+  g_check_parse_match(Z, PB_PACKRAT, "aabc", 4, "(u0x61 u0x61 u0x62 u0x63)");
+
+#if 0 // XXX ast->bit_length and ast->index are currently not set
+  // let's also check that position and length info get attached correctly...
+
+  HParseResult *p = h_parse(h_sequence(A,V,B, NULL), (uint8_t *)"aaab", 4);
+
+  // top-level token
+  assert(p != NULL);
+  assert(p->ast != NULL);
+  g_check_cmp_int64(p->bit_length, ==, 32);
+  g_check_cmp_size(p->ast->bit_length, ==, 32);
+  g_check_cmp_size(p->ast->index, ==, 0);
+  g_check_cmp_int((int)p->ast->bit_offset, ==, 0);
+
+  // the empty sequence
+  HParsedToken *tok = H_INDEX_TOKEN(p->ast, 1);
+  assert(tok != NULL);
+  assert(tok->token_type == TT_SEQUENCE);
+  assert(tok->seq->used == 0);
+  g_check_cmp_size(tok->bit_length, ==, 0);
+  g_check_cmp_size(tok->index, ==, 2);
+  g_check_cmp_int((int)tok->bit_offset, ==, 0);
+#endif // 0
+}
+
+#if 0 // XXX ast->bit_length and ast->index are currently not set
+static void test_ast_length_index() {
+  HParser *A = h_many(h_ch('a'));
+  HParser *B = h_ch('b');
+  HParser *C = h_sequence(h_ch('c'), NULL);
+
+  const uint8_t input[] = "aabc";
+  size_t len = sizeof input - 1; // sans null
+  HParseResult *p = h_parse(h_sequence(A,B,C, NULL), input, len);
+  assert(p != NULL);
+  assert(p->ast != NULL);
+
+  // top-level token
+  g_check_cmp_int64(p->bit_length, ==, (int64_t)(8 * len));
+  g_check_cmp_size(p->ast->bit_length, ==, 8 * len);
+  g_check_cmp_size(p->ast->index, ==, 0);
+
+  HParsedToken *tok;
+
+  // "aa"
+  tok = H_INDEX_TOKEN(p->ast, 0);
+  g_check_cmp_size(tok->bit_length, ==, 16);
+  g_check_cmp_size(tok->index, ==, 0);
+
+  // "a", "a"
+  tok = H_INDEX_TOKEN(p->ast, 0, 0);
+  g_check_cmp_size(tok->bit_length, ==, 8);
+  g_check_cmp_size(tok->index, ==, 0);
+  tok = H_INDEX_TOKEN(p->ast, 0, 1);
+  g_check_cmp_size(tok->bit_length, ==, 8);
+  g_check_cmp_size(tok->index, ==, 1);
+
+  // "b"
+  tok = H_INDEX_TOKEN(p->ast, 1);
+  g_check_cmp_size(tok->bit_length, ==, 8);
+  g_check_cmp_size(tok->index, ==, 2);
+
+  // "c"
+  tok = H_INDEX_TOKEN(p->ast, 2);
+  g_check_cmp_size(tok->bit_length, ==, 8);
+  g_check_cmp_size(tok->index, ==, 3);
+  tok = H_INDEX_TOKEN(p->ast, 2, 0);
+  g_check_cmp_size(tok->bit_length, ==, 8);
+  g_check_cmp_size(tok->index, ==, 3);
+}
+#endif // 0
+
+static void test_issue91() {
+  // this ambiguous grammar caused intermittent (?) assertion failures when
+  // trying to compile with the LALR backend:
+  //
+  // assertion "action->type == HLR_SHIFT" failed: file "src/backends/lalr.c",
+  // line 34, function "follow_transition"
+  //
+  // cf. https://gitlab.special-circumstanc.es/hammer/hammer/issues/91
+
+  H_RULE(schar,   h_ch_range(' ', '~'));    /* overlaps digit */
+  H_RULE(digit,   h_ch_range('0', '9'));
+  H_RULE(digits,  h_choice(h_repeat_n(digit, 2), digit, NULL));
+  H_RULE(p,       h_many(h_choice(schar, digits, NULL)));
+
+  int r = h_compile(p, PB_LALR, NULL);
+  g_check_cmp_int(r, ==, -2);
+}
+
+// a different instance of issue 91
+static void test_issue87() {
+  HParser *a = h_ch('a');
+  HParser *a2 = h_ch_range('a', 'a');
+  HParser *p = h_many(h_many(h_choice(a, a2, NULL)));
+
+  int r = h_compile(p, PB_LALR, NULL);
+  g_check_cmp_int(r, ==, -2);
+}
+
+static void test_issue92() {
+  HParser *a = h_ch('a');
+  HParser *b = h_ch('b');
+
+  HParser *str_a  = h_indirect();
+  HParser *str_b  = h_choice(h_sequence(b, str_a, NULL), str_a, NULL);
+                    //h_sequence(h_optional(b), str_a, NULL);  // this works
+  HParser *str_a_ = h_optional(h_sequence(a, str_b, NULL));
+  HParser *str    = str_a;
+  h_bind_indirect(str_a, str_a_);
+  /*
+   * grammar generated from the above:
+   *
+   *   A -> B           -- "augmented" with a fresh start symbol
+   *   B -> C           -- B = str_a
+   *      | ""
+   *   C -> "a" D       -- C = h_sequence(a, str_b)
+   *   D -> E           -- D = str_b
+   *      | B
+   *   E -> "b" B       -- E = h_sequence(b, str_a)
+   *
+   * transformed to the following "enhanced grammar":
+   *
+   *    S  -> 0B3
+   *   0B3 -> 0C2
+   *        | ""
+   *   1B4 -> 1C2
+   *        | ""
+   *   6B8 -> 6C2
+   *        | ""           (*) here
+   *   0C2 -> "a" 1D7
+   *   1C2 -> "a" 1D7
+   *   6C2 -> "a" 1D7
+   *   1D7 -> 1E5
+   *        | 1B4
+   *   1E5 -> "b" 6B8
+   */
+
+  /*
+   * the following call would cause an assertion failure.
+   *
+   * assertion "!h_stringmap_empty(fs)" failed: file
+   * "src/backends/lalr.c", line 341, function "h_lalr_compile"
+   *
+   * the bug happens when trying to compute h_follow() for 6B8 in state 6,
+   * production "" (*). intermediate results could end up in the memoization
+   * table and be treated as final by later calls to h_follow(). the problem
+   * could appear or not depending on the order of nonterminals (i.e. pointers)
+   * in a hashtable.
+   */
+  int r = h_compile(str, PB_LALR, NULL);
+  g_check_cmp_int(r, ==, 0);
+}
+
+static void test_issue83() {
+  HParser *p = h_sequence(h_sequence(NULL, NULL), h_nothing_p(), NULL);
+  /*
+   * A -> B
+   * B -> C D
+   * C -> ""
+   * D -x
+   *
+   * (S) -> 0B1
+   * 0B1 -> 0C2 2D3
+   * 0C2 -> ""           (*) h_follow()
+   * 2D3 -x
+   */
+
+  /*
+   * similar to issue 91, this would cause the same assertion failure, but for
+   * a different reason. the follow set of 0C2 above is equal to the first set
+   * of 2D3, but 2D3 is an empty choice. The first set of an empty choice
+   * is legitimately empty. the asserting in h_lalr_compile() missed this case.
+   */
+  int r = h_compile(p, PB_LALR, NULL);
+  g_check_cmp_int(r, ==, 0);
+}
+
 void register_regression_tests(void) {
   g_test_add_func("/core/regression/bug118", test_bug118);
   g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
@@ -202,4 +481,11 @@ void register_regression_tests(void) {
   g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
   g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
   g_test_add_func("/core/regression/charset_bits", test_charset_bits);
+  g_test_add_func("/core/regression/bug19", test_bug_19);
+  g_test_add_func("/core/regression/flatten_null", test_flatten_null);
+  //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index);
+  g_test_add_func("/core/regression/issue91", test_issue91);
+  g_test_add_func("/core/regression/issue87", test_issue87);
+  g_test_add_func("/core/regression/issue92", test_issue92);
+  g_test_add_func("/core/regression/issue83", test_issue83);
 }
diff --git a/src/test_suite.h b/src/test_suite.h
index b93e6848fcc7c273d9005f1b55623f143a317224..f3477c1aef6d7fe65f0a644aae918dc35916a57f 100644
--- a/src/test_suite.h
+++ b/src/test_suite.h
@@ -134,6 +134,17 @@
 #define g_check_parse_failed(p, be, input, len)				\
     g_check_parse_failed__m(&system_allocator, p, be, input, len)
 
+#define print_arena_stats(arena) do {					\
+    if (g_test_verbose()) {						\
+      HArenaStats stats;						\
+      h_allocator_stats(arena, &stats);					\
+      g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
+                     "Inefficiency: %5f%%",				\
+                     stats.used, stats.wasted,				\
+                     stats.wasted * 100. / (stats.used+stats.wasted));	\
+    }									\
+  } while(0)
+
 #define g_check_parse_ok(parser, backend, input, inp_len) do {		\
     int skip = h_compile((HParser *)(parser), (HParserBackend) backend, NULL); \
     if(skip) {								\
@@ -146,12 +157,7 @@
       g_test_message("Parse failed on line %d", __LINE__);		\
       g_test_fail();							\
     } else {								\
-      HArenaStats stats;						\
-      h_allocator_stats(res->arena, &stats);				\
-      g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
-                     "Inefficiency: %5f%%",				\
-		     stats.used, stats.wasted,				\
-		     stats.wasted * 100. / (stats.used+stats.wasted));	\
+      print_arena_stats(res->arena);					\
       h_parse_result_free(res);						\
     }									\
   } while(0)
@@ -171,12 +177,7 @@
       char* cres = h_write_result_unamb(res->ast);			\
       g_check_string(cres, ==, result);					\
       (&system_allocator)->free(&system_allocator, cres);		\
-      HArenaStats stats;						\
-      h_allocator_stats(res->arena, &stats);				\
-      g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
-                     "Inefficiency: %5f%%",				\
-		     stats.used, stats.wasted,				\
-		     stats.wasted * 100. / (stats.used+stats.wasted));	\
+      print_arena_stats(res->arena);					\
       h_parse_result_free(res);						\
     }									\
   } while(0)
@@ -238,12 +239,7 @@
       g_test_message("Parse failed on line %d", __LINE__);		\
       g_test_fail();							\
     } else {								\
-      HArenaStats stats;						\
-      h_allocator_stats(res->arena, &stats);				\
-      g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
-                     "Inefficiency: %5f%%",				\
-		     stats.used, stats.wasted,				\
-		     stats.wasted * 100. / (stats.used+stats.wasted));	\
+      print_arena_stats(res->arena);					\
       h_parse_result_free(res);						\
     }									\
   } while(0)
@@ -275,12 +271,7 @@
       char* cres = h_write_result_unamb(res->ast);			\
       g_check_string(cres, ==, result);					\
       (&system_allocator)->free(&system_allocator, cres);		\
-      HArenaStats stats;						\
-      h_allocator_stats(res->arena, &stats);				\
-      g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
-                     "Inefficiency: %5f%%",				\
-		     stats.used, stats.wasted,				\
-		     stats.wasted * 100. / (stats.used+stats.wasted));	\
+      print_arena_stats(res->arena);					\
       h_parse_result_free(res);						\
     }									\
   } while(0)
@@ -359,6 +350,8 @@
 #define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2)
 #define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2)
 #define g_check_cmp_uint64(n1, op, n2) g_check_inttype("%" PRIu64, uint64_t, n1, op, n2)
+#define g_check_cmp_size(n1, op, n2) g_check_inttype("%zu", size_t, n1, op, n2)
+#define g_check_cmp_ptr(n1, op, n2) g_check_inttype("%p", void *, n1, op, n2)
 #define g_check_cmpfloat(n1, op, n2) g_check_inttype("%g", float, n1, op, n2)
 #define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
 
diff --git a/testing/leak-check.sh b/testing/leak-check.sh
new file mode 100755
index 0000000000000000000000000000000000000000..b3f2d250be8618afb6e84b029f785c716ef79653
--- /dev/null
+++ b/testing/leak-check.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Script to run valgrind against the test suite for hunting memory leaks
+#
+# This assumes you run it in the Hammer base directory and have a debug build
+
+HAMMER_ROOT=.
+VARIANT=debug
+BUILD_PATH=$HAMMER_ROOT/build/$VARIANT
+LD_LIBRARY_PATH=$BUILD_PATH/src:$LD_LIBRARY_PATH
+VALGRIND=valgrind
+VALGRIND_OPTS="-v --leak-check=full --leak-resolution=high --num-callers=40 --partial-loads-ok=no --show-leak-kinds=all --track-origins=yes --undef-value-errors=yes"
+VALGRIND_SUPPRESSIONS="valgrind-glib.supp"
+
+for s in $VALGRIND_SUPPRESSIONS
+do
+  VALGRIND_OPTS="$VALGRIND_OPTS --suppressions=$HAMMER_ROOT/testing/valgrind/$s"
+done
+
+export LD_LIBRARY_PATH
+
+$VALGRIND $VALGRIND_OPTS $BUILD_PATH/src/test_suite $@
diff --git a/testing/valgrind/valgrind-glib.supp b/testing/valgrind/valgrind-glib.supp
new file mode 100644
index 0000000000000000000000000000000000000000..9b35108f8f606aa7c3c9d8f85ffc6c51bff8f4d0
--- /dev/null
+++ b/testing/valgrind/valgrind-glib.supp
@@ -0,0 +1,40 @@
+{
+   <g_test_add_vtable_supp>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   ...
+   fun:g_malloc
+   ...
+   fun:g_test_add_vtable
+   ...
+}
+{
+   <g_test_init_malloc_supp>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:malloc
+   ...
+   fun:g_test_init
+   ...
+}
+{
+   <g_test_init_calloc_supp>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   ...
+   fun:g_test_init
+   ...
+}
+{
+   <g_rand_new_with_seed_array_supp>
+   Memcheck:Leak
+   match-leak-kinds: reachable
+   fun:calloc
+   fun:g_malloc0
+   fun:g_rand_new_with_seed_array
+   ...
+   fun:g_test_run_suite
+   fun:g_test_run
+   ...
+}