diff --git a/.gitignore b/.gitignore
index ed8fd180d2eeedc877681b2c8ea025f2fd3e8274..f14c6e9ee356cf57c2f0a01ac717a90318b3dee2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,4 @@ build/
 *.pyc
 *.ll
 libhammer.pc
+TestResult.xml
diff --git a/.travis.yml b/.travis.yml
index 01d0f512a906642fbcf016b3b285350c91a1b528..566fdbfc87108c23cad6411c47097d813d690a87 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,100 +8,176 @@ env:
   global:
     - LLVM_CONFIG=llvm-config-3.8
   matrix:
-    - BINDINGS=none
+    - BINDINGS=none LLVM_BACKEND=0
+    - BINDINGS=none LLVM_BACKEND=1
 matrix:
   include:
     - compiler: gcc
       language: ruby
       rvm: ruby-2.2.5
-      env: BINDINGS=ruby
+      env: BINDINGS=ruby LLVM_BACKEND=0
+    - compiler: gcc
+      language: ruby
+      rvm: ruby-2.2.5
+      env: BINDINGS=ruby LLVM_BACKEND=1
     - compiler: clang
       language: ruby
       rvm: ruby-2.2.5
-      env: BINDINGS=ruby CC=clang
+      env: BINDINGS=ruby CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: ruby
+      rvm: ruby-2.2.5
+      env: BINDINGS=ruby CC=clang LLVM_BACKEND=1
+    - compiler: gcc
+      language: ruby
+      rvm: ruby-2.3.1
+      env: BINDINGS=ruby LLVM_BACKEND=0
     - compiler: gcc
       language: ruby
       rvm: ruby-2.3.1
-      env: BINDINGS=ruby
+      env: BINDINGS=ruby LLVM_BACKEND=1
     - compiler: clang
       language: ruby
       rvm: ruby-2.3.1
-      env: BINDINGS=ruby CC=clang
+      env: BINDINGS=ruby CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: ruby
+      rvm: ruby-2.3.1
+      env: BINDINGS=ruby CC=clang LLVM_BACKEND=1
+    - compiler: gcc
+      language: python
+      python: "2.7.10"
+      env: BINDINGS=python LLVM_BACKEND=0
     - compiler: gcc
       language: python
-      python: "2.7"
-      env: BINDINGS=python
+      python: "2.7.10"
+      env: BINDINGS=python LLVM_BACKEND=1
     - compiler: clang
       language: python
-      python: "2.7"
-      env: BINDINGS=python CC=clang
+      python: "2.7.10"
+      env: BINDINGS=python CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: python
+      python: "2.7.10"
+      env: BINDINGS=python CC=clang LLVM_BACKEND=1
     - compiler: gcc
       language: perl
       perl: "5.18"
-      env: BINDINGS=perl
+      env: BINDINGS=perl LLVM_BACKEND=0
+    - compiler: gcc
+      language: perl
+      perl: "5.18"
+      env: BINDINGS=perl LLVM_BACKEND=1
     - compiler: clang
       language: perl
       perl: "5.18"
-      env: BINDINGS=perl CC=clang
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: perl
+      perl: "5.18"
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=1
+    - compiler: gcc
+      language: perl
+      perl: "5.14"
+      env: BINDINGS=perl LLVM_BACKEND=0
     - compiler: gcc
       language: perl
       perl: "5.14"
-      env: BINDINGS=perl
+      env: BINDINGS=perl LLVM_BACKEND=1
+    - compiler: clang
+      language: perl
+      perl: "5.14"
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=0
     - compiler: clang
       language: perl
       perl: "5.14"
-      env: BINDINGS=perl CC=clang
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=1
+    - compiler: gcc
+      language: perl
+      perl: "5.10"
+      env: BINDINGS=perl LLVM_BACKEND=0
     - compiler: gcc
       language: perl
       perl: "5.10"
-      env: BINDINGS=perl
+      env: BINDINGS=perl LLVM_BACKEND=1
+    - compiler: clang
+      language: perl
+      perl: "5.10"
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=0
     - compiler: clang
       language: perl
       perl: "5.10"
-      env: BINDINGS=perl CC=clang
+      env: BINDINGS=perl CC=clang LLVM_BACKEND=1
     - compiler: gcc
       language: php
       php: "5.5"
-      env: BINDINGS=php
+      env: BINDINGS=php LLVM_BACKEND=0
+    - compiler: gcc
+      language: php
+      php: "5.5"
+      env: BINDINGS=php LLVM_BACKEND=1
     - compiler: clang
       language: php
       php: "5.5"
-      env: BINDINGS=php CC=clang
+      env: BINDINGS=php CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: php
+      php: "5.5"
+      env: BINDINGS=php CC=clang LLVM_BACKEND=1
     - compiler: gcc
       language: php
       php: "5.4"
-      env: BINDINGS=php
+      env: BINDINGS=php LLVM_BACKEND=0
+    - compiler: gcc
+      language: php
+      php: "5.4"
+      env: BINDINGS=php LLVM_BACKEND=1
     - compiler: clang
       language: php
       php: "5.4"
-      env: BINDINGS=php CC=clang
+      env: BINDINGS=php CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: php
+      php: "5.4"
+      env: BINDINGS=php CC=clang LLVM_BACKEND=1
+    - compiler: gcc
+      language: dotnet
+      env: BINDINGS=dotnet LLVM_BACKEND=0
     - compiler: gcc
       language: dotnet
-      env: BINDINGS=dotnet
+      env: BINDINGS=dotnet LLVM_BACKEND=1
     - compiler: clang
       language: dotnet
-      env: BINDINGS=dotnet CC=clang
+      env: BINDINGS=dotnet CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: dotnet
+      env: BINDINGS=dotnet CC=clang LLVM_BACKEND=1
     - compiler: gcc
       language: cpp
-      env: BINDINGS=cpp
+      env: BINDINGS=cpp LLVM_BACKEND=0
     - compiler: gcc
       language: cpp
-      env: BINDINGS=cpp CC=clang
+      env: BINDINGS=cpp LLVM_BACKEND=1
+    - compiler: clang
+      language: cpp
+      env: BINDINGS=cpp CC=clang LLVM_BACKEND=0
+    - compiler: clang
+      language: cpp
+      env: BINDINGS=cpp CC=clang LLVM_BACKEND=1
 before_install:
   - sudo apt-get update -qq
-  - sudo apt-get install lcov
+  - sudo apt-get install -y lcov
   - gem install coveralls-lcov
   - if [ "$CC" == "gcc" ]; then sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y; sudo apt-get update -qq; sudo apt-get install gcc-5; fi
   - sudo apt-get install llvm-3.8 llvm-3.8-dev clang-3.8
-  - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi
-  - if [ "$BINDINGS" == "perl" ]; then sudo apt-get install -t trusty-backports swig3.0; fi
+  - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; sudo apt-get install -t trusty-backports swig3.0; fi
   - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi
   - if [ "$BINDINGS" == "dotnet" ]; then sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; fi
 install: true
 before_script:
   - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi
 script:
-  - if [ "$BINDINGS" == "none" ]; then scons test --variant=debug --coverage; else scons bindings=$BINDINGS test; fi
+  - if [ "$BINDINGS" == "none" ]; then if [ "$LLVM_BACKEND" == "1" ]; then LLVM_FLAG="--enable-llvm-backend"; else LLVM_FLAG="--disable-llvm-backend"; fi; scons test --variant=debug $LLVM_FLAG --coverage; else scons $LLVM_FLAG bindings=$BINDINGS test; fi
 after_success:
   - if [ "$BINDINGS" == "none" ]; then if [ "$CC" == "clang" ]; then llvm-cov gcov -o coverage.info build/debug/src/test_suite.gcda; else lcov --capture --directory build/debug/src --output-file coverage.info; fi; fi
   - coveralls-lcov coverage.info
diff --git a/SConstruct b/SConstruct
index 43ba87ea10730be4731e79ce412e06b944e0758c..8dc2a8dc7f32b84b5d18868273466424ddf74d43 100644
--- a/SConstruct
+++ b/SConstruct
@@ -46,7 +46,6 @@ if 'DESTDIR' in env:
         print >>sys.stderr, "--!!-- you want; files will be installed in"
         print >>sys.stderr, "--!!--    %s" % (calcInstallPath("$prefix"),)
 
-env['LLVM_CONFIG'] = "llvm-config"
 if 'includedir' in env:
     env['incpath'] = calcInstallPath("$includedir", "hammer")
 else:
@@ -62,7 +61,7 @@ else:
 env['parsersincpath'] = calcInstallPath("$includedir", "hammer", "parsers")
 env['backendsincpath'] = calcInstallPath("$includedir", "hammer", "backends")
 
-env.MergeFlags("-std=gnu11 -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wall -Wextra -Werror")
+env.MergeFlags("-std=gnu11 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable")
 
 if env['PLATFORM'] == 'darwin':
     env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')
@@ -91,6 +90,16 @@ AddOption("--in-place",
           action="store_true",
           help="Build in-place, rather than in the build/<variant> tree")
 
+AddOption("--disable-llvm-backend",
+          dest="use_llvm",
+          default=False,
+          action="store_false",
+          help="Disable the LLVM backend (and don't require LLVM library dependencies)")
+AddOption("--enable-llvm-backend",
+          dest="use_llvm",
+          default=False,
+          action="store_true",
+          help="Enable the LLVM backend (and require LLVM library dependencies)")
 
 dbg = env.Clone(VARIANT='debug')
 dbg.MergeFlags("-g -O0")
@@ -105,7 +114,12 @@ else:
 
 env["CC"] = os.getenv("CC") or env["CC"]
 env["CXX"] = os.getenv("CXX") or env["CXX"]
-env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"]
+
+if GetOption("use_llvm"):
+    # Overridable default path to llvm-config
+    env['LLVM_CONFIG'] = "llvm-config"
+    env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"]
+    env.MergeFlags("-DHAMMER_LLVM_BACKEND")
 
 if GetOption("coverage"):
     env.Append(CFLAGS=["--coverage"],
@@ -114,8 +128,11 @@ if GetOption("coverage"):
     if env["CC"] == "gcc":
         env.Append(LIBS=['gcov'])
     else:
-        env.ParseConfig('%s --cflags --ldflags --libs core executionengine mcjit analysis x86codegen x86info' % \
-                        env["LLVM_CONFIG"])
+        # XXX Why do need this with --coverage when we're doing it anyway?
+        if GetOption("use_llvm"):
+            env.ParseConfig('%s --cflags --ldflags --libs core executionengine mcjit analysis x86codegen x86info' % \
+                            env["LLVM_CONFIG"])
+
 
 if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin':
     env.Replace(CC="clang",
@@ -126,113 +143,122 @@ env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_"))
 #rootpath = env['ROOTPATH'] = os.path.abspath('.')
 #env.Append(CPPPATH=os.path.join('#', "hammer"))
 
+if GetOption("use_llvm"):
 # Set up LLVM config stuff to export
 
 # some llvm versions are old and will not work; some require --system-libs
 # with llvm-config, and some will break if given it
-llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \
-                                       shell=True, \
-                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
-if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"):
-   print "This LLVM version %s is too old" % llvm_config_version
-   Exit(1)
-
-if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \
-   LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"):
-    llvm_system_libs_flag = "--system-libs"
-else:
-    llvm_system_libs_flag = ""
-
-# Only keep one copy of this
-llvm_required_components = "core executionengine mcjit analysis x86codegen x86info"
-# Stubbing this out so we can implement static-only mode if needed later
-llvm_use_shared = True
-# Can we ask for shared/static from llvm-config?
-if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"):
-    # Nope
-    llvm_linkage_type_flag = ""
-    llvm_use_computed_shared_lib_name = True
-else:
-    # Woo, they finally fixed the dumb
-    llvm_use_computed_shared_lib_name = False
-    if llvm_use_shared:
-        llvm_linkage_type_flag = "--link-shared"
+    llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \
+                                           shell=True, \
+                                           stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
+    if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"):
+        print "This LLVM version %s is too old" % llvm_config_version[0].strip()
+        Exit(1)
+
+    if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \
+        LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"):
+        llvm_system_libs_flag = "--system-libs"
     else:
-        llvm_linkage_type_flag = "--link-static"
-
-if llvm_use_computed_shared_lib_name:
-    # Okay, pull out the major and minor version numbers (barf barf)
-    p = re.compile("^(\d+)\.(\d+).*$")
-    m = p.match(llvm_config_version[0])
-    if m:
-        llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2)))
+        llvm_system_libs_flag = ""
+
+    # Only keep one copy of this
+    llvm_required_components = "core executionengine mcjit analysis x86codegen x86info"
+    # Stubbing this out so we can implement static-only mode if needed later
+    llvm_use_shared = True
+    # Can we ask for shared/static from llvm-config?
+    if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"):
+        # Nope
+        llvm_linkage_type_flag = ""
+        llvm_use_computed_shared_lib_name = True
     else:
-        print "Couldn't compute shared library name from LLVM version '%s', but needed to" % \
-            llvm_config_version[0]
-        Exit(1)
-else:
-    # We won't be needing it
-    llvm_computed_shared_lib_name = None
-
-# llvm-config 'helpfully' supplies -g and -O flags; educate it with this
-# custom ParseConfig function arg; make it a class with a method so we can
-# pass it around with scons export/import
-
-class LLVMConfigSanitizer:
-    def sanitize(self, env, cmd, unique=1):
-        # cmd is output from llvm-config
-        flags = cmd.split()
-        # match -g or -O flags
-        p = re.compile("^-[gO].*$")
-        filtered_flags = [flag for flag in flags if not p.match(flag)]
-        filtered_cmd = ' '.join(filtered_flags)
-        # print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd)
-        env.MergeFlags(filtered_cmd, unique)
-llvm_config_sanitizer = LLVMConfigSanitizer()
-
-# LLVM defines, which the python bindings need
-try:
-    llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \
-                                          shell=True, \
-                                          stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
-    flags = llvm_config_cflags[0].split()
-    # get just the -D ones
-    p = re.compile("^-D(.*)$")
-    llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)]
-except:
-    print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
-    Exit(1)
-
-# Get the llvm includedir, which the python bindings need
-try:
-    llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \
-                                            shell=True, \
-                                            stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
-    llvm_includes = llvm_config_includes[0].splitlines()
-except:
-    print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
-    Exit(1)
-
-# This goes here so we already know all the LLVM crap
-# Make a fresh environment to parse the config into, to read out just LLVM stuff
-llvm_dummy_env = Environment()
-# Get LLVM stuff into LIBS/LDFLAGS
-llvm_dummy_env.ParseConfig('%s --ldflags %s %s %s' % \
-                           (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, \
-                            llvm_required_components), \
-                           function=llvm_config_sanitizer.sanitize)
-# Get the right -l lines in
-if llvm_use_shared:
+        # Woo, they finally fixed the dumb
+        llvm_use_computed_shared_lib_name = False
+        if llvm_use_shared:
+            llvm_linkage_type_flag = "--link-shared"
+        else:
+            llvm_linkage_type_flag = "--link-static"
+
     if llvm_use_computed_shared_lib_name:
-        llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
+        # Okay, pull out the major and minor version numbers (barf barf)
+        p = re.compile("^(\d+)\.(\d+).*$")
+        m = p.match(llvm_config_version[0])
+        if m:
+            llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2)))
+        else:
+            print "Couldn't compute shared library name from LLVM version '%s', but needed to" % \
+                llvm_config_version[0]
+            Exit(1)
     else:
-        llvm_dummy_env.ParseConfig('%s %s --libs %s' % \
-                                   (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
-                                   function=llvm_config_sanitizer.sanitize)
-llvm_dummy_env.Append(LIBS=['stdc++', ], )
+        # We won't be needing it
+        llvm_computed_shared_lib_name = None
+
+    # llvm-config 'helpfully' supplies -g and -O flags; educate it with this
+    # custom ParseConfig function arg; make it a class with a method so we can
+    # pass it around with scons export/import
+
+    class LLVMConfigSanitizer:
+        def sanitize(self, env, cmd, unique=1):
+            # cmd is output from llvm-config
+            flags = cmd.split()
+            # match -g or -O flags
+            p = re.compile("^-[gO].*$")
+            filtered_flags = [flag for flag in flags if not p.match(flag)]
+            filtered_cmd = ' '.join(filtered_flags)
+            # print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd)
+            env.MergeFlags(filtered_cmd, unique)
+    llvm_config_sanitizer = LLVMConfigSanitizer()
+
+    # LLVM defines, which the python bindings need
+    try:
+        llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \
+                                              shell=True, \
+                                              stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
+        flags = llvm_config_cflags[0].split()
+        # get just the -D ones
+        p = re.compile("^-D(.*)$")
+        llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)]
+    except:
+        print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
+        Exit(1)
+
+    # Get the llvm includedir, which the python bindings need
+    try:
+        llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \
+                                                shell=True, \
+                                                stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
+        llvm_includes = llvm_config_includes[0].splitlines()
+    except:
+        print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
+        Exit(1)
+
+    # This goes here so we already know all the LLVM crap
+    # Make a fresh environment to parse the config into, to read out just LLVM stuff
+    llvm_dummy_env = Environment()
+    # Get LLVM stuff into LIBS/LDFLAGS
+    llvm_dummy_env.ParseConfig('%s --ldflags %s %s %s' % \
+                               (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, \
+                                llvm_required_components), \
+                               function=llvm_config_sanitizer.sanitize)
+    # Get the right -l lines in
+    if llvm_use_shared:
+        if llvm_use_computed_shared_lib_name:
+            llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
+        else:
+            llvm_dummy_env.ParseConfig('%s %s --libs %s' % \
+                                       (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
+                                       function=llvm_config_sanitizer.sanitize)
+    llvm_dummy_env.Append(LIBS=['stdc++', ], )
+#endif GetOption("use_llvm")
+
+# The .pc.in file has substs for llvm_lib_flags and llvm_libdir_flags, so if
+# we aren't using LLVM, set them to the empty string
+if GetOption("use_llvm"):
+    env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS')
+    env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS')
+else:
+    env['llvm_libdir_flags'] = ""
+    env['llvm_lib_flags'] = ""
 
-env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS')
-env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS')
 pkgconfig = env.ScanReplace('libhammer.pc.in')
 Default(pkgconfig)
 env.Install("$pkgconfigpath", pkgconfig)
@@ -249,16 +275,17 @@ Export('env')
 Export('testruns')
 Export('targets')
 # LLVM-related flags
-Export('llvm_computed_shared_lib_name')
-Export('llvm_config_sanitizer')
-Export('llvm_config_version')
-Export('llvm_defines')
-Export('llvm_includes')
-Export('llvm_linkage_type_flag')
-Export('llvm_required_components')
-Export('llvm_system_libs_flag')
-Export('llvm_use_computed_shared_lib_name')
-Export('llvm_use_shared')
+if GetOption("use_llvm"):
+    Export('llvm_computed_shared_lib_name')
+    Export('llvm_config_sanitizer')
+    Export('llvm_config_version')
+    Export('llvm_defines')
+    Export('llvm_includes')
+    Export('llvm_linkage_type_flag')
+    Export('llvm_required_components')
+    Export('llvm_system_libs_flag')
+    Export('llvm_use_computed_shared_lib_name')
+    Export('llvm_use_shared')
 
 if not GetOption("in_place"):
     env['BUILD_BASE'] = 'build/$VARIANT'
diff --git a/src/SConscript b/src/SConscript
index 80d96bf45c85822f888c896dfa479417ae298edc..9b89730d9b7015dad78e40f790a414cdab00fb44 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -6,14 +6,15 @@ import subprocess
 
 Import('env testruns')
 # LLVM-related flags
-Import('llvm_computed_shared_lib_name')
-Import('llvm_config_sanitizer')
-Import('llvm_config_version')
-Import('llvm_linkage_type_flag')
-Import('llvm_required_components')
-Import('llvm_system_libs_flag')
-Import('llvm_use_computed_shared_lib_name')
-Import('llvm_use_shared')
+if GetOption("use_llvm"):
+    Import('llvm_computed_shared_lib_name')
+    Import('llvm_config_sanitizer')
+    Import('llvm_config_version')
+    Import('llvm_linkage_type_flag')
+    Import('llvm_required_components')
+    Import('llvm_system_libs_flag')
+    Import('llvm_use_computed_shared_lib_name')
+    Import('llvm_use_shared')
 
 dist_headers = [
     "hammer.h",
@@ -31,6 +32,7 @@ parsers_headers = [
 backends_headers = [
     "backends/regex.h",
     "backends/contextfree.h"
+    "backends/missing.h"
 ]
 
 parsers = ['parsers/%s.c'%s for s in
@@ -61,10 +63,15 @@ parsers = ['parsers/%s.c'%s for s in
             'unimplemented',
             'whitespace',
             'xor',
-            'value']] 
+            'value']]
 
 backends = ['backends/%s.c' % s for s in
-            ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'llvm']]
+            ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', "missing"]]
+
+# Add LLVM backend if enabled
+if GetOption("use_llvm"):
+    llvm_backend_files = ['llvm.c', 'llvm_charset.c', 'llvm_suint.c']
+    backends = backends + ['backends/llvm/%s' % s for s in llvm_backend_files]
 
 misc_hammer_parts = [
     'allocator.c',
@@ -89,29 +96,33 @@ ctests = ['t_benchmark.c',
           't_misc.c',
 	  't_regression.c']
 
-env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize)
+if GetOption("use_llvm"):
+    env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize)
 libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)
 
 # Use a cloned env for the shared library so we can have library dependencies
 shared_env = env.Clone()
-# Get LLVM stuff into LIBS/LDFLAGS
-shared_env.ParseConfig('%s --ldflags %s %s %s' % \
-                       (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, llvm_required_components), \
-                       function=llvm_config_sanitizer.sanitize)
-# Get the right -l lines in
-if llvm_use_shared:
-    if llvm_use_computed_shared_lib_name:
-        shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
+if GetOption("use_llvm"):
+    # Get LLVM stuff into LIBS/LDFLAGS
+    shared_env.ParseConfig('%s --ldflags %s %s %s' % \
+                           (env["LLVM_CONFIG"], llvm_system_libs_flag, \
+                            llvm_linkage_type_flag, llvm_required_components), \
+                           function=llvm_config_sanitizer.sanitize)
+    # Get the right -l lines in
+    if llvm_use_shared:
+        if llvm_use_computed_shared_lib_name:
+            shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
+        else:
+            shared_env.ParseConfig('%s %s --libs %s' % \
+                                   (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
+                                   function=llvm_config_sanitizer.sanitize)
     else:
+        # Just grab the statics regardless of version
         shared_env.ParseConfig('%s %s --libs %s' % \
                                (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
                                function=llvm_config_sanitizer.sanitize)
-else:
-    # Just grab the statics regardless of version
-    shared_env.ParseConfig('%s %s --libs %s' % \
-                           (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
-                           function=llvm_config_sanitizer.sanitize)
-shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.'])
+    shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.'])
+
 libhammer_shared = shared_env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
 
 Default(libhammer_shared, libhammer_static)
@@ -124,24 +135,27 @@ env.Install("$backendsincpath", backends_headers)
 testenv = env.Clone()
 testenv.Append(LIBS=['hammer'], LIBPATH=['.'])
 testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
-# Get LLVM stuff into LIBS/LDFLAGS
-testenv.ParseConfig('%s --ldflags %s %s %s' % \
-                    (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, llvm_required_components), \
-                    function=llvm_config_sanitizer.sanitize)
-# Get the right -l lines in
-if llvm_use_shared:
-    if llvm_use_computed_shared_lib_name:
-        testenv.Append(LIBS=[llvm_computed_shared_lib_name, ])
+if GetOption("use_llvm"):
+    # Get LLVM stuff into LIBS/LDFLAGS
+    testenv.ParseConfig('%s --ldflags %s %s %s' % \
+                        (env["LLVM_CONFIG"], llvm_system_libs_flag, \
+                         llvm_linkage_type_flag, llvm_required_components), \
+                        function=llvm_config_sanitizer.sanitize)
+    # Get the right -l lines in
+    if llvm_use_shared:
+        if llvm_use_computed_shared_lib_name:
+            testenv.Append(LIBS=[llvm_computed_shared_lib_name, ])
+        else:
+            testenv.ParseConfig('%s %s --libs %s' % \
+                                (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
+                                function=llvm_config_sanitizer.sanitize)
     else:
+        # Just grab the statics regardless of version
         testenv.ParseConfig('%s %s --libs %s' % \
                             (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
                             function=llvm_config_sanitizer.sanitize)
-else:
-    # Just grab the statics regardless of version
-    testenv.ParseConfig('%s %s --libs %s' % \
-                        (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
-                        function=llvm_config_sanitizer.sanitize)
-testenv.Append(LIBS=['stdc++'], LIBPATH=['.'])
+    testenv.Append(LIBS=['stdc++'], LIBPATH=['.'])
+
 ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS="--coverage" if testenv.GetOption("coverage") else None)
 ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path]))
 AlwaysBuild(ctest)
diff --git a/src/backends/llvm.c b/src/backends/llvm.c
deleted file mode 100644
index 79f91eafab27dac54b544601363e39e2c4567276..0000000000000000000000000000000000000000
--- a/src/backends/llvm.c
+++ /dev/null
@@ -1,317 +0,0 @@
-#include <llvm-c/Analysis.h>
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpedantic"
-#include <llvm-c/Core.h>
-#pragma GCC diagnostic pop
-#include <llvm-c/ExecutionEngine.h>
-#include "../internal.h"
-#include "../llvm.h"
-
-typedef struct HLLVMParser_ {
-  LLVMModuleRef mod;
-  LLVMValueRef func;
-  LLVMExecutionEngineRef engine;
-  LLVMBuilderRef builder;
-} HLLVMParser;
-
-HParseResult* make_result(HArena *arena, HParsedToken *tok) {
-  HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
-  ret->ast = tok;
-  ret->arena = arena;
-  ret->bit_length = 0; // This way it gets overridden in h_do_parse
-  return ret;
-}
-
-void h_llvm_declare_common(LLVMModuleRef mod) {
-  llvm_inputstream = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HInputStream_");
-  LLVMTypeRef llvm_inputstream_struct_types[] = {
-    LLVMPointerType(LLVMInt8Type(), 0),
-    LLVMInt64Type(),
-    LLVMInt64Type(),
-    LLVMInt64Type(),
-    LLVMInt8Type(),
-    LLVMInt8Type(),
-    LLVMInt8Type(),
-    LLVMInt8Type(),
-    LLVMInt8Type()
-  };
-  LLVMStructSetBody(llvm_inputstream, llvm_inputstream_struct_types, 9, 0);
-  llvm_inputstreamptr = LLVMPointerType(llvm_inputstream, 0);
-  llvm_arena = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HArena_");
-  llvm_arenaptr = LLVMPointerType(llvm_arena, 0);
-  llvm_parsedtoken = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParsedToken_");
-  LLVMTypeRef llvm_parsedtoken_struct_types[] = {
-    LLVMInt32Type(), // actually an enum value
-    LLVMInt64Type(), // actually this is a union; the largest thing in it is 64 bits
-    LLVMInt64Type(), // FIXME sizeof(size_t) will be 32 bits on 32-bit platforms
-    LLVMInt64Type(), // FIXME ditto
-    LLVMInt8Type()
-  };
-  LLVMStructSetBody(llvm_parsedtoken, llvm_parsedtoken_struct_types, 5, 0);
-  llvm_parsedtokenptr = LLVMPointerType(llvm_parsedtoken, 0);
-  llvm_parseresult = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParseResult_");
-  LLVMTypeRef llvm_parseresult_struct_types[] = {
-    llvm_parsedtokenptr,
-    LLVMInt64Type(),
-    llvm_arenaptr
-  };
-  LLVMStructSetBody(llvm_parseresult, llvm_parseresult_struct_types, 3, 0);
-  llvm_parseresultptr = LLVMPointerType(llvm_parseresult, 0);
-  LLVMTypeRef readbits_pt[] = {
-    llvm_inputstreamptr,
-    LLVMInt32Type(),
-    LLVMInt8Type()
-  };
-  LLVMTypeRef readbits_ret = LLVMFunctionType(LLVMInt64Type(), readbits_pt, 3, 0);
-  LLVMAddFunction(mod, "h_read_bits", readbits_ret);
-
-  LLVMTypeRef amalloc_pt[] = {
-    llvm_arenaptr,
-    LLVMInt32Type()
-  };
-  LLVMTypeRef amalloc_ret = LLVMFunctionType(LLVMPointerType(LLVMVoidType(), 0), amalloc_pt, 2, 0);
-  LLVMAddFunction(mod, "h_arena_malloc", amalloc_ret);
-
-  LLVMTypeRef makeresult_pt[] = {
-    llvm_arenaptr,
-    llvm_parsedtokenptr
-  };
-  LLVMTypeRef makeresult_ret = LLVMFunctionType(llvm_parseresultptr, makeresult_pt, 2, 0);
-  LLVMAddFunction(mod, "make_result", makeresult_ret);
-}
-
-int h_llvm_compile(HAllocator* mm__, HParser* parser, const void* params) {
-  // Boilerplate to set up a translation unit, aka a module.
-  const char* name = params ? (const char*)params : "parse";
-  LLVMModuleRef mod = LLVMModuleCreateWithName(name);
-  h_llvm_declare_common(mod);
-  // Boilerplate to set up the parser function to add to the module. It takes an HInputStream* and
-  // returns an HParseResult.
-  LLVMTypeRef param_types[] = {
-    llvm_inputstreamptr,
-    llvm_arenaptr
-  };
-  LLVMTypeRef ret_type = LLVMFunctionType(llvm_parseresultptr, param_types, 2, 0);
-  LLVMValueRef parse_func = LLVMAddFunction(mod, name, ret_type);
-  // Parse function is now declared; time to define it
-  LLVMBuilderRef builder = LLVMCreateBuilder();
-  // Translate the contents of the children of `parser` into their LLVM instruction equivalents
-  if (parser->vtable->llvm(builder, parse_func, mod, parser->env)) {
-    // But first, verification
-    char *error = NULL;
-    LLVMVerifyModule(mod, LLVMAbortProcessAction, &error);
-    LLVMDisposeMessage(error);
-    error = NULL;
-    // OK, link that sonofabitch
-    LLVMLinkInMCJIT();
-    LLVMInitializeNativeTarget();
-    LLVMInitializeNativeAsmPrinter();
-    LLVMExecutionEngineRef engine = NULL;
-    LLVMCreateExecutionEngineForModule(&engine, mod, &error);
-    if (error) {
-      fprintf(stderr, "error: %s\n", error);
-      LLVMDisposeMessage(error);
-      return -1;
-    }
-    char* dump = LLVMPrintModuleToString(mod);
-    fprintf(stderr, "\n\n%s\n\n", dump);
-    // Package up the pointers that comprise the module and stash it in the original HParser
-    HLLVMParser *llvm_parser = h_new(HLLVMParser, 1);
-    llvm_parser->mod = mod;
-    llvm_parser->func = parse_func;
-    llvm_parser->engine = engine;
-    llvm_parser->builder = builder;
-    parser->backend_data = llvm_parser;
-    return 0;
-  } else {
-    return -1;
-  }
-}
-
-void h_llvm_free(HParser *parser) {
-  HLLVMParser *llvm_parser = parser->backend_data;
-  LLVMModuleRef mod_out;
-  char *err_out;
-
-  llvm_parser->func = NULL;
-  LLVMRemoveModule(llvm_parser->engine, llvm_parser->mod, &mod_out, &err_out);
-  LLVMDisposeExecutionEngine(llvm_parser->engine);
-  llvm_parser->engine = NULL;
-
-  LLVMDisposeBuilder(llvm_parser->builder);
-  llvm_parser->builder = NULL;
-
-  LLVMDisposeModule(llvm_parser->mod);
-  llvm_parser->mod = NULL;
-}
-
-/*
- * Construct LLVM IR to decide if a runtime value is a member of a compile-time
- * character set, and branch depending on the result.
- *
- * Parameters:
- *  - mod [in]: an LLVMModuleRef
- *  - func [in]: an LLVMValueRef to the function to add the new basic blocks
- *  - builder [in]: an LLVMBuilderRef, positioned appropriately
- *  - r [in]: an LLVMValueRef to the value to test
- *  - cs [in]: the HCharset to test membership in
- *  - yes [in]: the basic block to branch to if r is in cs
- *  - no [in]: the basic block to branch to if r is not in cs
- */
-
-void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
-                                         LLVMValueRef r, HCharset cs,
-                                         LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
-  /*
-   * A charset is a 256-element bit array, 32 bytes long in total.  Ours is
-   * static at compile time, so we can try to construct minimal LLVM IR for
-   * this particular charset.  In particular, we should handle cases like
-   * only one or two bits being set, or a long consecutive range, efficiently.
-   *
-   * In LLVM IR, we can test propositions like r == x, r <= x, r >= x and their
-   * negations efficiently, so the challenge here is to turn a character map
-   * into a minimal set of such propositions.
-   *
-   * TODO: actually do this; right now for the sake of a first pass we're just
-   * testing r == x for every x in cs.
-   */
-
-  for (int i = 0; i < 256; ++i) {
-    if (charset_isset(cs, i)) {
-      char bbname[16];
-      uint8_t c = (uint8_t)i;
-      snprintf(bbname, 16, "cs_memb_%02x", c);
-      LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ,
-          LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r");
-      LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname);
-      LLVMBuildCondBr(builder, icmp, yes, bb);
-      LLVMPositionBuilderAtEnd(builder, bb);
-    }
-  }
-
-  LLVMBuildBr(builder, no);
-}
-
-/*
- * Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT
- *
- * Parameters:
- *  - mod [in]: an LLVMModuleRef
- *  - builder [in]: an LLVMBuilderRef, positioned appropriately
- *  - stream [in]: a value ref to an llvm_inputstreamptr, for the input stream
- *  - arena [in]: a value ref to an llvm_arenaptr to be used for the malloc
- *  - r [in]: a value ref to the value to be used to this token
- *  - mr_out [out]: the return value from make_result()
- *
- * TODO actually support TT_SINT, inputs other than 8 bit
- */
-
-void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder,
-                          LLVMValueRef stream, LLVMValueRef arena,
-                          LLVMValueRef r, LLVMValueRef *mr_out) {
-  /* Set up call to h_arena_malloc() for a new HParsedToken */
-  LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
-  LLVMValueRef amalloc_args[] = { arena, tok_size };
-  /* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */
-  LLVMValueRef amalloc = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_arena_malloc"),
-      amalloc_args, 2, "h_arena_malloc");
-  /* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */
-  LLVMValueRef tok = LLVMBuildBitCast(builder, amalloc, llvm_parsedtokenptr, "tok");
-
-  /*
-   * tok->token_type = TT_UINT;
-   *
-   * %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0
-   *
-   * TODO if we handle TT_SINT too, adjust here and the zero-ext below
-   */
-  LLVMValueRef toktype = LLVMBuildStructGEP(builder, tok, 0, "token_type");
-  /* store i32 8, i32* %token_type */
-  LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 8, 0), toktype);
-
-  /*
-   * tok->uint = r;
-   *
-   * %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1
-   */
-  LLVMValueRef tokdata = LLVMBuildStructGEP(builder, tok, 1, "token_data");
-  /*
-   * TODO
-   *
-   * This is where we'll need to adjust to handle other types (sign vs. zero extend, omit extend if
-   * r is 64-bit already
-   */
-  LLVMBuildStore(builder, LLVMBuildZExt(builder, r, LLVMInt64Type(), "r"), tokdata);
-  /*
-   * Store the index from the stream into the token
-   */
-  /* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */
-  LLVMValueRef tokindex = LLVMBuildStructGEP(builder, tok, 2, "t_index");
-  /* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */
-  LLVMValueRef streamindex = LLVMBuildStructGEP(builder, stream, 2, "s_index");
-  /* %4 = load i64, i64* %s_index */
-  /* store i64 %4, i64* %t_index */
-  LLVMBuildStore(builder, LLVMBuildLoad(builder, streamindex, ""), tokindex);
-  /* Store the bit length into the token */
-  LLVMValueRef tokbitlen = LLVMBuildStructGEP(builder, tok, 3, "bit_length");
-  /* TODO handle multiple bit lengths */
-  LLVMBuildStore(builder, LLVMConstInt(LLVMInt64Type(), 8, 0), tokbitlen);
-
-  /*
-   * Now call make_result()
-   *
-   * %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3)
-   */
-  LLVMValueRef result_args[] = { arena, tok };
-  LLVMValueRef mr = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "make_result"),
-      result_args, 2, "make_result");
-
-  *mr_out = mr;
-}
-
-HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
-  const HLLVMParser *llvm_parser = parser->backend_data;
-  HArena *arena = h_new_arena(mm__, 0);
-
-  // LLVMRunFunction only supports certain signatures for dumb reasons; it's this hack with
-  // memcpy and function pointers, or writing a shim in LLVM IR.
-  //
-  // LLVMGenericValueRef args[] = {
-  //   LLVMCreateGenericValueOfPointer(input_stream),
-  //   LLVMCreateGenericValueOfPointer(arena)
-  // };
-  // LLVMGenericValueRef res = LLVMRunFunction(llvm_parser->engine, llvm_parser->func, 2, args);
-  // HParseResult *ret = (HParseResult*)LLVMGenericValueToPointer(res);
-
-  void *parse_func_ptr_v;
-  HParseResult * (*parse_func_ptr)(HInputStream *input_stream, HArena *arena);
-  parse_func_ptr_v = LLVMGetPointerToGlobal(llvm_parser->engine, llvm_parser->func);
-  memcpy(&parse_func_ptr, &parse_func_ptr_v, sizeof(parse_func_ptr));
-  HParseResult *ret = parse_func_ptr(input_stream, arena);
-  if (ret) {
-    ret->arena = arena;
-    if (!input_stream->overrun) {
-      size_t bit_length = h_input_stream_pos(input_stream);
-      if (ret->bit_length == 0) {
-	ret->bit_length = bit_length;
-      }
-      if (ret->ast && ret->ast->bit_length != 0) {
-	((HParsedToken*)(ret->ast))->bit_length = bit_length;
-      }
-    } else {
-      ret->bit_length = 0;
-    }
-  } else {
-    ret = NULL;
-  }
-  if (input_stream->overrun) {
-    return NULL; // overrun is always failure.
-  }
-  return ret;
-}
-
-HParserBackendVTable h__llvm_backend_vtable = {
-  .compile = h_llvm_compile,
-  .parse = h_llvm_parse,
-  .free = h_llvm_free
-};
diff --git a/src/backends/llvm/llvm.c b/src/backends/llvm/llvm.c
new file mode 100644
index 0000000000000000000000000000000000000000..185367a2e166920ada40a3e629ab72a38b747261
--- /dev/null
+++ b/src/backends/llvm/llvm.c
@@ -0,0 +1,229 @@
+#ifdef HAMMER_LLVM_BACKEND
+
+#include <llvm-c/Analysis.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#include <llvm-c/Core.h>
+#pragma GCC diagnostic pop
+#include <llvm-c/ExecutionEngine.h>
+#include "../../internal.h"
+#include "llvm.h"
+
+typedef struct HLLVMParser_ {
+  LLVMModuleRef mod;
+  LLVMValueRef func;
+  LLVMExecutionEngineRef engine;
+  LLVMBuilderRef builder;
+  HLLVMParserCompileContext *compile_ctxt;
+} HLLVMParser;
+
+HParseResult* make_result(HArena *arena, HParsedToken *tok) {
+  HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
+  ret->ast = tok;
+  ret->arena = arena;
+  ret->bit_length = 0; // This way it gets overridden in h_do_parse
+  return ret;
+}
+
+void h_llvm_declare_common(HLLVMParserCompileContext *ctxt) {
+  ctxt->llvm_inputstream = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HInputStream_");
+  LLVMTypeRef llvm_inputstream_struct_types[] = {
+    LLVMPointerType(LLVMInt8Type(), 0),
+    LLVMInt64Type(),
+    LLVMInt64Type(),
+    LLVMInt64Type(),
+    LLVMInt8Type(),
+    LLVMInt8Type(),
+    LLVMInt8Type(),
+    LLVMInt8Type(),
+    LLVMInt8Type()
+  };
+  LLVMStructSetBody(ctxt->llvm_inputstream, llvm_inputstream_struct_types, 9, 0);
+  ctxt->llvm_inputstreamptr = LLVMPointerType(ctxt->llvm_inputstream, 0);
+  ctxt->llvm_arena = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HArena_");
+  ctxt->llvm_arenaptr = LLVMPointerType(ctxt->llvm_arena, 0);
+  ctxt->llvm_parsedtoken = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParsedToken_");
+  LLVMTypeRef llvm_parsedtoken_struct_types[] = {
+    LLVMInt32Type(), // actually an enum value
+    LLVMInt64Type(), // actually this is a union; the largest thing in it is 64 bits
+    LLVMInt64Type(), // FIXME sizeof(size_t) will be 32 bits on 32-bit platforms
+    LLVMInt64Type(), // FIXME ditto
+    LLVMInt8Type()
+  };
+  LLVMStructSetBody(ctxt->llvm_parsedtoken, llvm_parsedtoken_struct_types, 5, 0);
+  ctxt->llvm_parsedtokenptr = LLVMPointerType(ctxt->llvm_parsedtoken, 0);
+  ctxt->llvm_parseresult = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParseResult_");
+  LLVMTypeRef llvm_parseresult_struct_types[] = {
+    ctxt->llvm_parsedtokenptr,
+    LLVMInt64Type(),
+    ctxt->llvm_arenaptr
+  };
+  LLVMStructSetBody(ctxt->llvm_parseresult, llvm_parseresult_struct_types, 3, 0);
+  ctxt->llvm_parseresultptr = LLVMPointerType(ctxt->llvm_parseresult, 0);
+  LLVMTypeRef readbits_pt[] = {
+    ctxt->llvm_inputstreamptr,
+    LLVMInt32Type(),
+    LLVMInt8Type()
+  };
+  LLVMTypeRef readbits_ret = LLVMFunctionType(LLVMInt64Type(), readbits_pt, 3, 0);
+  LLVMAddFunction(ctxt->mod, "h_read_bits", readbits_ret);
+
+  LLVMTypeRef amalloc_pt[] = {
+    ctxt->llvm_arenaptr,
+    LLVMInt32Type()
+  };
+  LLVMTypeRef amalloc_ret = LLVMFunctionType(LLVMPointerType(LLVMVoidType(), 0), amalloc_pt, 2, 0);
+  LLVMAddFunction(ctxt->mod, "h_arena_malloc", amalloc_ret);
+
+  LLVMTypeRef makeresult_pt[] = {
+    ctxt->llvm_arenaptr,
+    ctxt->llvm_parsedtokenptr
+  };
+  LLVMTypeRef makeresult_ret = LLVMFunctionType(ctxt->llvm_parseresultptr, makeresult_pt, 2, 0);
+  LLVMAddFunction(ctxt->mod, "make_result", makeresult_ret);
+}
+
+int h_llvm_compile(HAllocator* mm__, HParser* parser, const void* params) {
+  HLLVMParserCompileContext *ctxt;
+  // Boilerplate to set up a translation unit, aka a module.
+  const char* name = params ? (const char*)params : "parse";
+
+  /* Build a parser compilation context */
+  ctxt = h_new(HLLVMParserCompileContext, 1);
+  memset(ctxt, 0, sizeof(*ctxt));
+  ctxt->mm__ = mm__;
+  ctxt->mod = LLVMModuleCreateWithName(name);
+  h_llvm_declare_common(ctxt);
+
+  // Boilerplate to set up the parser function to add to the module. It takes an HInputStream* and
+  // returns an HParseResult.
+  LLVMTypeRef param_types[] = {
+    ctxt->llvm_inputstreamptr,
+    ctxt->llvm_arenaptr
+  };
+  LLVMTypeRef ret_type = LLVMFunctionType(ctxt->llvm_parseresultptr, param_types, 2, 0);
+  ctxt->func = LLVMAddFunction(ctxt->mod, name, ret_type);
+
+  // Parse function is now declared; time to define it
+  ctxt->builder = LLVMCreateBuilder();
+  LLVMBasicBlockRef preamble = LLVMAppendBasicBlock(ctxt->func, "preamble");
+  LLVMPositionBuilderAtEnd(ctxt->builder, preamble);
+
+  /*
+   * First thing it needs to do is get its stream and arena args and stick
+   * value refs in the context.
+   *
+   * XXX do we always need arena?  Can we make a dummy valueref the generated
+   * IR refers to, and then fill in arena if we need it after we know whether
+   * we need it?  Similar concerns apply to setting up storage needed for, e.g.
+   * memoizing charsets.
+   */
+  ctxt->stream = LLVMBuildBitCast(ctxt->builder, LLVMGetFirstParam(ctxt->func),
+      ctxt->llvm_inputstreamptr, "stream");
+  ctxt->arena = LLVMGetLastParam(ctxt->func);
+
+  // Translate the contents of the children of `parser` into their LLVM instruction equivalents
+  if (parser->vtable->llvm(ctxt, parser->env)) {
+    // But first, verification
+    char *error = NULL;
+    LLVMVerifyModule(ctxt->mod, LLVMAbortProcessAction, &error);
+    LLVMDisposeMessage(error);
+    error = NULL;
+    // OK, link that sonofabitch
+    LLVMLinkInMCJIT();
+    LLVMInitializeNativeTarget();
+    LLVMInitializeNativeAsmPrinter();
+    LLVMExecutionEngineRef engine = NULL;
+    LLVMCreateExecutionEngineForModule(&engine, ctxt->mod, &error);
+    if (error) {
+      fprintf(stderr, "error: %s\n", error);
+      LLVMDisposeMessage(error);
+      return -1;
+    }
+    char* dump = LLVMPrintModuleToString(ctxt->mod);
+    fprintf(stderr, "\n\n%s\n\n", dump);
+    // Package up the pointers that comprise the module and stash it in the original HParser
+    HLLVMParser *llvm_parser = h_new(HLLVMParser, 1);
+    llvm_parser->mod = ctxt->mod;
+    llvm_parser->func = ctxt->func;
+    llvm_parser->engine = engine;
+    llvm_parser->builder = ctxt->builder;
+    llvm_parser->compile_ctxt = ctxt;
+    parser->backend_data = llvm_parser;
+    return 0;
+  } else {
+    return -1;
+  }
+}
+
+void h_llvm_free(HParser *parser) {
+  HAllocator *mm__;
+  HLLVMParser *llvm_parser = parser->backend_data;
+  LLVMModuleRef mod_out;
+  char *err_out;
+
+  mm__ = llvm_parser->compile_ctxt->mm__;
+  h_free(llvm_parser->compile_ctxt);
+  llvm_parser->compile_ctxt = NULL;
+  mm__ = NULL;
+
+  llvm_parser->func = NULL;
+  LLVMRemoveModule(llvm_parser->engine, llvm_parser->mod, &mod_out, &err_out);
+  LLVMDisposeExecutionEngine(llvm_parser->engine);
+  llvm_parser->engine = NULL;
+
+  LLVMDisposeBuilder(llvm_parser->builder);
+  llvm_parser->builder = NULL;
+
+  LLVMDisposeModule(llvm_parser->mod);
+  llvm_parser->mod = NULL;
+}
+
+HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
+  const HLLVMParser *llvm_parser = parser->backend_data;
+  HArena *arena = h_new_arena(mm__, 0);
+
+  // LLVMRunFunction only supports certain signatures for dumb reasons; it's this hack with
+  // memcpy and function pointers, or writing a shim in LLVM IR.
+  //
+  // LLVMGenericValueRef args[] = {
+  //   LLVMCreateGenericValueOfPointer(input_stream),
+  //   LLVMCreateGenericValueOfPointer(arena)
+  // };
+  // LLVMGenericValueRef res = LLVMRunFunction(llvm_parser->engine, llvm_parser->func, 2, args);
+  // HParseResult *ret = (HParseResult*)LLVMGenericValueToPointer(res);
+
+  void *parse_func_ptr_v;
+  HParseResult * (*parse_func_ptr)(HInputStream *input_stream, HArena *arena);
+  parse_func_ptr_v = LLVMGetPointerToGlobal(llvm_parser->engine, llvm_parser->func);
+  memcpy(&parse_func_ptr, &parse_func_ptr_v, sizeof(parse_func_ptr));
+  HParseResult *ret = parse_func_ptr(input_stream, arena);
+  if (ret) {
+    ret->arena = arena;
+    if (!input_stream->overrun) {
+      size_t bit_length = h_input_stream_pos(input_stream);
+      if (ret->bit_length == 0) {
+	ret->bit_length = bit_length;
+      }
+      if (ret->ast && ret->ast->bit_length != 0) {
+	((HParsedToken*)(ret->ast))->bit_length = bit_length;
+      }
+    } else {
+      ret->bit_length = 0;
+    }
+  } else {
+    ret = NULL;
+  }
+  if (input_stream->overrun) {
+    return NULL; // overrun is always failure.
+  }
+  return ret;
+}
+
+HParserBackendVTable h__llvm_backend_vtable = {
+  .compile = h_llvm_compile,
+  .parse = h_llvm_parse,
+  .free = h_llvm_free
+};
+
+#endif /* defined(HAMMER_LLVM_BACKEND) */
diff --git a/src/backends/llvm/llvm.h b/src/backends/llvm/llvm.h
new file mode 100644
index 0000000000000000000000000000000000000000..0721c3733b818877090af2e420d611902b375e5b
--- /dev/null
+++ b/src/backends/llvm/llvm.h
@@ -0,0 +1,45 @@
+#ifdef HAMMER_LLVM_BACKEND
+
+#ifndef HAMMER_LLVM__H
+#define HAMMER_LLVM__H
+
+#include "../../internal.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#include <llvm-c/Core.h>
+#pragma GCC diagnostic pop
+
+/* The typedef is in internal.h */
+
+struct HLLVMParserCompileContext_ {
+  /* Allocator */
+  HAllocator* mm__;
+  /* Module/function/builder */
+  LLVMModuleRef mod;
+  LLVMValueRef func;
+  LLVMBuilderRef builder;
+  /* Typerefs */
+  LLVMTypeRef llvm_inputstream;
+  LLVMTypeRef llvm_inputstreamptr;
+  LLVMTypeRef llvm_arena;
+  LLVMTypeRef llvm_arenaptr;
+  LLVMTypeRef llvm_parsedtoken;
+  LLVMTypeRef llvm_parsedtokenptr;
+  LLVMTypeRef llvm_parseresult;
+  LLVMTypeRef llvm_parseresultptr;
+  /* Set up in function preamble */
+  LLVMValueRef stream;
+  LLVMValueRef arena;
+};
+
+bool h_llvm_make_charset_membership_test(HLLVMParserCompileContext *ctxt,
+                                         LLVMValueRef r, HCharset cs,
+                                         LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
+void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt,
+                          uint8_t length, uint8_t signedp,
+                          LLVMValueRef r, LLVMValueRef *mr_out);
+
+#endif // #ifndef HAMMER_LLVM__H
+
+#endif /* defined(HAMMER_LLVM_BACKEND) */
diff --git a/src/backends/llvm/llvm_charset.c b/src/backends/llvm/llvm_charset.c
new file mode 100644
index 0000000000000000000000000000000000000000..56e3e80c1d421f37d6bf32bd5a1ba20e393d6676
--- /dev/null
+++ b/src/backends/llvm/llvm_charset.c
@@ -0,0 +1,1117 @@
+#ifdef HAMMER_LLVM_BACKEND
+
+#include <llvm-c/Analysis.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#include <llvm-c/Core.h>
+#pragma GCC diagnostic pop
+#include <llvm-c/ExecutionEngine.h>
+#include "../../internal.h"
+#include "llvm.h"
+
+/*
+ * Set this #define to enable some debug logging and internal consistency
+ * checking.
+ */
+#define HAMMER_LLVM_CHARSET_DEBUG
+
+typedef enum {
+  /*
+   * Accept action; this entire range is in the charset.  This action type
+   * has no children and terminates handling the input character.
+   */
+  CHARSET_ACTION_ACCEPT,
+  /*
+   * Scan action; test input char against each set character in the charset.
+   * This action type has no children and terminates handling the input
+   * character.
+   */
+  CHARSET_ACTION_SCAN,
+  /*
+   * Bitmap action; test input char against a bitmap in the IR at fixed
+   * cost.
+   */
+  CHARSET_ACTION_BITMAP,
+  /*
+   * Complement action; invert the sense of the charset.  This action type
+   * has one child node, with the bounds unchanged and the portion of the
+   * charset within the bounds complemented.
+   */
+  CHARSET_ACTION_COMPLEMENT,
+  /*
+   * Split action; check whether the input char is above or below a split
+   * point, and branch into one of two children depending.
+   */
+  CHARSET_ACTION_SPLIT
+} llvm_charset_exec_plan_action_t;
+
+typedef struct llvm_charset_exec_plan_s llvm_charset_exec_plan_t;
+struct llvm_charset_exec_plan_s {
+  /*
+   * The charset at this node, with transforms such as range restriction
+   * or complementation applied.
+   */
+  HCharset cs;
+  /*
+   * Char values for the range of this node, and the split point if this
+   * is CHARSET_ACTION_SPLIT
+   */
+  uint8_t idx_start, idx_end, split_point;
+  /* Action to take at this node */
+  llvm_charset_exec_plan_action_t action;
+  /* Estimated cost metric */
+  int cost;
+  /* Depth in exec plan */
+  int depth;
+  /* Children, if any (zero, one or two depending on action) */
+  llvm_charset_exec_plan_t *children[2];
+};
+
+/* Forward prototypes for charset llvm stuff */
+static int h_llvm_build_charset_exec_plan_impl(HAllocator* mm__, HCharset cs,
+    llvm_charset_exec_plan_t *parent, llvm_charset_exec_plan_t *cep,
+    int allow_complement, uint8_t *split_point);
+static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan_impl_alloc(
+    HAllocator* mm__, llvm_charset_exec_plan_t *parent, HCharset cs,
+    uint8_t idx_start, uint8_t idx_end, int allow_complement);
+static void h_llvm_free_charset_exec_plan(HAllocator* mm__,
+                                          llvm_charset_exec_plan_t *cep);
+
+/*
+ * Check if this charset is eligible for CHARSET_ACTION_ACCEPT on a range
+ */
+
+static int h_llvm_charset_eligible_for_accept(HCharset cs, uint8_t idx_start, uint8_t idx_end) {
+  int eligible = 1, i;
+
+  for (i = idx_start; i <= idx_end; ++i) {
+    if (!(charset_isset(cs, (uint8_t)i))) {
+      eligible = 0;
+      break;
+    }
+  }
+
+  return eligible;
+}
+
+/*
+ * Estimate cost of CHARSET_ACTION_SCAN for this charset (~proportional to number of set chars, min 1)
+ */
+
+static int h_llvm_charset_estimate_scan_cost(HCharset cs, uint8_t idx_start, uint8_t idx_end) {
+  int i, cost;
+
+  cost = 1;
+  for (i = idx_start; i <= idx_end; ++i) {
+    if (charset_isset(cs, (uint8_t)i)) ++cost;
+  }
+
+  return cost;
+}
+
+/*
+ * Given a skeletal CHARSET_ACTION_SPLIT node from h_llvm_build_charset_exec_plan_impl(),
+ * binary search for the best split point we can find and return the cost metric.
+ * Unfortunately the search space is quite large, so we're going to use some silly
+ * heuristics here such as looking for the longest run of present or absent chars at
+ * one end of a charset, and proposing it as a split, or just trying the midpoint.
+ * It may be possible to do better.
+ */
+
+static int h_llvm_find_best_split(HAllocator* mm__, llvm_charset_exec_plan_t *split) {
+  int rv, best_end_run, i, contiguous;
+  uint8_t best_end_run_split, midpoint;
+  llvm_charset_exec_plan_t *best_left, *best_right, *left, *right;
+  int best_cost, cost;
+
+  /* Sanity-check: we should be a split with a range at least two indices long */
+  if (!split || split->action != CHARSET_ACTION_SPLIT) return -1;
+  if (split->idx_end <= split->idx_start) return -1;
+
+  /* Find the longest end run; split a run of length 1 at the left end as a
+   * fallback, since there's always a run of length 1 at each end. */
+  best_end_run = 1;
+  best_end_run_split = split->idx_start;
+  contiguous = 0;
+  /* Try the low end */
+  i = 0;
+  while (i <= split->idx_end - split->idx_start &&
+         (charset_isset(split->cs, split->idx_start + i) ==
+          charset_isset(split->cs, split->idx_start))) ++i;
+  if (i <= split->idx_end - split->idx_start) {
+    /* This run has length i */
+    if (i > best_end_run) {
+      best_end_run = i;
+      /*
+       * -1 since split points are last index of left child, and i
+       * is first index that wasn't in the run
+       */
+      best_end_run_split = split->idx_start + i - 1;
+    }
+
+    /* Now the same thing from the high end */
+    i = 0;
+    while (i <= split->idx_end - split->idx_start &&
+           (charset_isset(split->cs, split->idx_end - i) ==
+            charset_isset(split->cs, split->idx_end))) ++i;
+    if (i <= split->idx_end - split->idx_start && i > best_end_run) {
+      best_end_run = i;
+      best_end_run_split = split->idx_end - i;
+    }
+  } else {
+    /* Wow, contiguous - any split will turn out well - just use the midpoint */
+    contiguous = 1;
+  }
+
+  /* Initialize, start trying things */
+  best_left = best_right = left = right = NULL;
+  rv = -1;
+
+  /* Try a midpoint split */
+  midpoint = split->idx_start + (split->idx_end - split->idx_start) / 2;
+  left = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs,
+      split->idx_start, midpoint, 1);
+  right = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs,
+      midpoint + 1, split->idx_end, 1);
+  if (left && right) {
+    /* Cost of the split == 1 + max(left->cost, right->cost) */
+    cost = left->cost;
+    if (right->cost > cost) cost = right->cost;
+    ++cost;
+    /* We haven't tried the end-run one yet, so always accept this */
+    best_left = left;
+    best_right = right;
+    best_cost = cost;
+    left = right = NULL;
+  } else goto err;
+
+  /*
+   * Try an end-run split; if we decided we had a contiguous run earlier,
+   * all are equally good, so don't bother and just use the midpoint
+   */
+
+  if (!contiguous) {
+    /*
+     * Sanity-check the indices; error out if the scanner gave us
+     * something silly
+     */
+    if (best_end_run_split < split->idx_start ||
+        best_end_run_split >= split->idx_end) goto err;
+    left = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs,
+        split->idx_start, best_end_run_split, 1);
+    right = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs,
+       best_end_run_split + 1, split->idx_end, 1);
+    if (left && right) {
+      /* Cost of the split == 1 + max(left->cost, right->cost) */
+      cost = left->cost;
+      if (right->cost > cost) cost = right->cost;
+      ++cost;
+      /* Check if against what we already have */
+      if (cost < best_cost) {
+        if (best_left) h_llvm_free_charset_exec_plan(mm__, best_left);
+        if (best_right) h_llvm_free_charset_exec_plan(mm__, best_right);
+        best_left = left;
+        best_right = right;
+        best_cost = cost;
+        left = right = NULL;
+      }
+    } else goto err;
+  }
+
+  /* Set up the split node with our best results */
+  split->cost = best_cost;
+  split->children[0] = best_left;
+  split->children[1] = best_right;
+  split->split_point = best_left->idx_end;
+  best_left = best_right = NULL;
+  rv = split->cost;
+
+ err:
+  /* Error/cleanup case */
+  if (left) h_llvm_free_charset_exec_plan(mm__, left);
+  if (right) h_llvm_free_charset_exec_plan(mm__, right);
+  if (best_left) h_llvm_free_charset_exec_plan(mm__, best_left);
+  if (best_right) h_llvm_free_charset_exec_plan(mm__, best_right);
+
+  return rv;
+}
+
+/*
+ * Setup call to h_llvm_build_charset_exec_plan_impl(), while allocating a new
+ * llvm_charset_exec_plan_t.
+ */
+static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan_impl_alloc(
+    HAllocator* mm__, llvm_charset_exec_plan_t *parent, HCharset cs,
+    uint8_t idx_start, uint8_t idx_end, int allow_complement) {
+  int cost;
+  llvm_charset_exec_plan_t *cep;
+
+  if (!mm__) return NULL;
+  if (!cs) return NULL;
+  if (idx_start > idx_end) return NULL;
+
+  cep = h_new(llvm_charset_exec_plan_t, 1);
+  memset(cep, 0, sizeof(*cep));
+  cep->cs = NULL;
+  /*
+   * Initializing these is important; if the parent is CHARSET_ACTION_SPLIT,
+   * these are how h_llvm_build_charset_exec_plan_impl() knows the range for
+   * the child it's constructing.
+   */
+  cep->idx_start = idx_start;
+  cep->idx_end = idx_end;
+  cost = h_llvm_build_charset_exec_plan_impl(mm__, cs, parent, cep,
+      allow_complement, NULL);
+  if (cost >= 0) cep->cost = cost;
+  else {
+    h_llvm_free_charset_exec_plan(mm__, cep);
+    cep = NULL;
+  }
+
+  return cep;
+}
+
+/*
+ * Given a charset, optionally its parent containing range restrictions, and
+ * an allow_complement parameter, search for the best exec plan and write it
+ * to another (skeletal) charset which will receive an action and range.  If
+ * the action is CHARSET_ACTION_SPLIT, also output a split point.  Return a
+ * cost estimate.
+ */
+
+static int h_llvm_build_charset_exec_plan_impl(HAllocator* mm__, HCharset cs,
+    llvm_charset_exec_plan_t *parent, llvm_charset_exec_plan_t *cep,
+    int allow_complement, uint8_t *split_point) {
+  int eligible_for_accept, best_cost, depth;
+  int estimated_complement_cost, estimated_scan_cost, estimated_split_cost;
+  int estimated_bitmap_cost;
+  uint8_t idx_start, idx_end;
+  llvm_charset_exec_plan_t complement_cep, split_cep;
+  llvm_charset_exec_plan_action_t chosen_action;
+
+  /* Check args */
+  if (!(mm__ && cep)) return -1;
+
+  /*
+   * The index bounds come from either the parent or maximal bounds by
+   * default.  Exception is the case that we are a child of a split, in
+   * which case h_llvm_find_best_split() should have set bounds in cep.
+   */
+  if (parent && parent->action == CHARSET_ACTION_SPLIT &&
+      ((cep->idx_start == parent->idx_start &&
+        cep->idx_end < parent->idx_end) ||
+       (cep->idx_start > parent->idx_start &&
+        cep->idx_end == parent->idx_end))) {
+    idx_start = cep->idx_start;
+    idx_end = cep->idx_end;
+  } else if (parent) {
+    idx_start = parent->idx_start;
+    idx_end = parent->idx_end;
+  } else {
+    idx_start = 0;
+    idx_end = UINT8_MAX;
+  }
+
+  /* Get the depth in the exec plan */
+  if (parent) depth = parent->depth + 1;
+  else depth = 0;
+
+  eligible_for_accept = h_llvm_charset_eligible_for_accept(cs, idx_start, idx_end);
+  if (eligible_for_accept) {
+    /* if we can use CHARSET_ACTION_ACCEPT, always do so */
+    cep->cs = copy_charset(mm__, cs);
+    charset_restrict_to_range(cep->cs, idx_start, idx_end);
+    cep->idx_start = idx_start;
+    cep->idx_end = idx_end;
+    cep->split_point = 0;
+    /* Acceptance (or rejection, under a complement) is free */
+    cep->cost = 0;
+    cep->depth = depth;
+    cep->action = CHARSET_ACTION_ACCEPT;
+    cep->children[0] = NULL;
+    cep->children[1] = NULL;
+
+    return cep->cost;
+  } else {
+    /*
+     * Estimate cost for CHARSET_ACTION_SCAN, and for the tree below
+     * CHARSET_ACTION_COMPLEMENT if we are eligible to use it.
+     */
+    estimated_scan_cost = h_llvm_charset_estimate_scan_cost(cs, idx_start, idx_end);
+    /*
+     * We can always use CHARSET_ACTION_BITMAP; this constant controls how
+     * strongly we prefer it over the compare-and-branch approach.
+     */
+    estimated_bitmap_cost = 6;
+    /* >= 0 is a flag we have a complement we may need to free later */
+    estimated_complement_cost = -1;
+    if (allow_complement) {
+      HCharset child_cs;
+
+      /* Complement the charset within the range */
+      memset(&complement_cep, 0, sizeof(complement_cep));
+      complement_cep.cs = copy_charset(mm__, cs);
+      charset_restrict_to_range(complement_cep.cs, idx_start, idx_end);
+      child_cs = copy_charset(mm__, complement_cep.cs);
+      charset_complement(child_cs);
+      charset_restrict_to_range(child_cs, idx_start, idx_end);
+      complement_cep.idx_start = idx_start;
+      complement_cep.idx_end = idx_end;
+      complement_cep.split_point = 0;
+      complement_cep.depth = depth;
+      complement_cep.action = CHARSET_ACTION_COMPLEMENT;
+      complement_cep.children[0] = h_new(llvm_charset_exec_plan_t, 1);
+      memset(complement_cep.children[0], 0, sizeof(llvm_charset_exec_plan_t));
+      complement_cep.children[1] = NULL;
+      /*
+       * Find the child; the complement has cost 0 since it just swizzles success
+       * and fail output basic blocks; it's important we test for complement last
+       * below then, so we break ties in favor of not stacking complements up.  We
+       * set allow_complement = 0 so we never stack two complements.
+       */
+      complement_cep.cost = h_llvm_build_charset_exec_plan_impl(mm__, child_cs, &complement_cep,
+          complement_cep.children[0], 0, NULL);
+      estimated_complement_cost = complement_cep.cost;
+      h_free(child_cs);
+    }
+
+    /*
+     * Set up split node if it makes sense; the depth cutoff here limits the
+     * cost of the search for complex charsets.
+     */
+    if (idx_start < idx_end && depth < 5) {
+      split_cep.cs = copy_charset(mm__, cs);
+      charset_restrict_to_range(split_cep.cs, idx_start, idx_end);
+      split_cep.idx_start = idx_start;
+      split_cep.idx_end = idx_end;
+      split_cep.split_point = 0;
+      split_cep.action = CHARSET_ACTION_SPLIT;
+      split_cep.cost = -1;
+      split_cep.depth = depth;
+      split_cep.children[0] = NULL;
+      split_cep.children[1] = NULL;
+      /* h_llvm_find_best_split() sets split_cep.cost */
+      estimated_split_cost = h_llvm_find_best_split(mm__, &split_cep);
+      if (estimated_split_cost < 0) {
+        /* This shouldn't happen, but make sure we free the charset */
+        h_free(split_cep.cs);
+      }
+    } else {
+      estimated_split_cost = -1;
+    }
+
+    /* Pick the action type with the lowest cost */
+    best_cost = -1;
+    if (estimated_scan_cost >= 0 &&
+        (best_cost < 0 || estimated_scan_cost < best_cost)) {
+      chosen_action = CHARSET_ACTION_SCAN;
+      best_cost = estimated_scan_cost;
+    }
+
+    if (estimated_bitmap_cost >= 0 &&
+        (best_cost < 0 || estimated_bitmap_cost < best_cost)) {
+      chosen_action = CHARSET_ACTION_BITMAP;
+      best_cost = estimated_bitmap_cost;
+    }
+
+    if (estimated_split_cost >= 0 &&
+        (best_cost < 0 || estimated_split_cost < best_cost)) {
+      chosen_action = CHARSET_ACTION_SPLIT;
+      best_cost = estimated_split_cost;
+    }
+
+    if (allow_complement && estimated_complement_cost >= 0 &&
+        (best_cost < 0 || estimated_complement_cost < best_cost)) {
+      chosen_action = CHARSET_ACTION_COMPLEMENT;
+      best_cost = estimated_complement_cost;
+    }
+
+    /* Fill out cep based on the chosen action */
+    switch (chosen_action) {
+      case CHARSET_ACTION_SCAN:
+        /* Set up a scan */
+        cep->cs = copy_charset(mm__, cs);
+        charset_restrict_to_range(cep->cs, idx_start, idx_end);
+        cep->idx_start = idx_start;
+        cep->idx_end = idx_end;
+        cep->split_point = 0;
+        cep->action = CHARSET_ACTION_SCAN;
+        cep->cost = estimated_scan_cost;
+        cep->depth = depth;
+        cep->children[0] = NULL;
+        cep->children[1] = NULL;
+        break;
+      case CHARSET_ACTION_BITMAP:
+        /* Set up a bitmap */
+        cep->cs = copy_charset(mm__, cs);
+        charset_restrict_to_range(cep->cs, idx_start, idx_end);
+        cep->idx_start = idx_start;
+        cep->idx_end = idx_end;
+        cep->split_point = 0;
+        cep->action = CHARSET_ACTION_BITMAP;
+        cep->cost = estimated_bitmap_cost;
+        cep->depth = depth;
+        cep->children[0] = NULL;
+        cep->children[1] = NULL;
+        break;
+      case CHARSET_ACTION_COMPLEMENT:
+        /*
+         * We have a CEP filled out we can just copy over; be sure to set
+         * estimated_complement_cost = -1 so we know not to free it on the
+         * way out.
+         */
+        memcpy(cep, &complement_cep, sizeof(complement_cep));
+        memset(&complement_cep, 0, sizeof(complement_cep));
+        estimated_complement_cost = -1;
+        break;
+      case CHARSET_ACTION_SPLIT:
+        /*
+         * We have a CEP filled out we can just copy over; be sure to set
+         * estimated_split_cost = -1 so we know not to free it on the way
+         * out.
+         */
+        memcpy(cep, &split_cep, sizeof(split_cep));
+        memset(&split_cep, 0, sizeof(split_cep));
+        estimated_split_cost = -1;
+        break;
+      default:
+        /* Not supported */
+        best_cost = -1;
+        memset(cep, 0, sizeof(*cep));
+        break;
+    }
+  }
+
+  /* Free temporary CEPs if needed */
+
+  if (estimated_complement_cost >= 0) {
+    /*
+     * We have a complement_cep we ended up not using; free its child and
+     * charset
+     */
+    h_llvm_free_charset_exec_plan(mm__, complement_cep.children[0]);
+    h_free(complement_cep.cs);
+    memset(&complement_cep, 0, sizeof(complement_cep));
+    estimated_complement_cost = -1;
+  }
+
+  if (estimated_split_cost >= 0) {
+    /*
+     * We have a split_cep we ended up not using; free its children and
+     * charset.
+     */
+    h_llvm_free_charset_exec_plan(mm__, split_cep.children[0]);
+    h_llvm_free_charset_exec_plan(mm__, split_cep.children[1]);
+    h_free(split_cep.cs);
+    memset(&split_cep, 0, sizeof(split_cep));
+    estimated_split_cost = -1;
+  }
+
+  return best_cost;
+}
+
+/*
+ * Build a charset exec plan for a charset
+ */
+
+static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan(
+    HAllocator* mm__, HCharset cs) {
+  llvm_charset_exec_plan_t *cep = NULL;
+  int best_cost;
+
+  cep = h_new(llvm_charset_exec_plan_t, 1);
+  best_cost = h_llvm_build_charset_exec_plan_impl(mm__, cs, NULL, cep, 1, NULL);
+
+  if (best_cost < 0) {
+    /* h_llvm_build_charset_exec_plan_impl() failed */
+    h_free(cep);
+    cep = NULL;
+  }
+
+  return cep;
+}
+
+/*
+ * Consistency-check a charset exec plan
+ */
+
+static bool h_llvm_check_charset_exec_plan(llvm_charset_exec_plan_t *cep) {
+  bool consistent = false;
+  uint8_t i;
+
+  if (cep) {
+    /* Check that we have a charset */
+    if (!(cep->cs)) goto done;
+    /* Check that the range makes sense */
+    if (cep->idx_start > cep->idx_end) goto done;
+    /* Check that the charset is empty outside the range */
+    for (i = 0; i < cep->idx_start; ++i) {
+      /* Failed check */
+      if (charset_isset(cep->cs, i)) goto done;
+      /* Prevent wraparound */
+      if (i == UINT8_MAX) break;
+    }
+
+    if (cep->idx_end < UINT8_MAX) {
+      /* We break at the end */
+      for (i = cep->idx_end + 1; ; ++i) {
+        /* Failed check */
+        if (charset_isset(cep->cs, i)) goto done;
+        /* Prevent wraparound */
+        if (i == UINT8_MAX) break;
+      }
+    }
+
+    /* Minimum cost estimate is 0; complements and accepts can be free */
+    if (cep->cost < 0) goto done;
+
+    /* No split point unlesswe're CHARSET_ACTION_SPLIT */
+    if (cep->action != CHARSET_ACTION_SPLIT && cep->split_point != 0) goto done;
+
+    /* Action type dependent part */
+    switch (cep->action) {
+      case CHARSET_ACTION_ACCEPT:
+      case CHARSET_ACTION_SCAN:
+      case CHARSET_ACTION_BITMAP:
+        /* These are always okay and have no children */
+        if (cep->children[0] || cep->children[1]) goto done;
+        consistent = true;
+        break;
+      case CHARSET_ACTION_COMPLEMENT:
+        /* This has one child, which should have the same range */
+        if (cep->children[1]) goto done;
+        if (cep->children[0]) {
+          if (cep->children[0]->idx_start == cep->idx_start &&
+              cep->children[0]->idx_end == cep->idx_end) {
+            /* The cost cannot be lower than the child */
+            if (cep->cost < cep->children[0]->cost) goto done;
+            /* Okay, we're consistent if the child node is */
+            consistent = h_llvm_check_charset_exec_plan(cep->children[0]);
+          }
+        }
+        break;
+      case CHARSET_ACTION_SPLIT:
+        /* This has two children, which should split the range */
+        if (cep->children[0] && cep->children[1]) {
+          if (cep->children[0]->idx_start == cep->idx_start &&
+              cep->children[0]->idx_end + 1 == cep->children[1]->idx_start &&
+              cep->children[1]->idx_end == cep->idx_end) {
+            /* The split point must match the children */
+            if (cep->split_point != cep->children[0]->idx_end) goto done;
+            /*
+             * The cost must be in the range defined by the children, + 1 for
+             * the comparison at most
+             */
+            int child_min_cost = (cep->children[0]->cost < cep->children[1]->cost) ?
+              cep->children[0]->cost : cep->children[1]->cost;
+            int child_max_cost = (cep->children[0]->cost > cep->children[1]->cost) ?
+              cep->children[0]->cost : cep->children[1]->cost;
+            if ((cep->cost < child_min_cost) || (cep->cost > child_max_cost + 1)) goto done;
+            /* Okay, we're consistent if both children are */
+            consistent = h_llvm_check_charset_exec_plan(cep->children[0]) &&
+                         h_llvm_check_charset_exec_plan(cep->children[1]);
+          }
+        }
+        break;
+      default:
+        break;
+    }
+  }
+
+ done:
+  return consistent;
+}
+
+/*
+ * Free a charset exec plan using the supplied allocator
+ */
+
+static void h_llvm_free_charset_exec_plan(HAllocator* mm__,
+                                          llvm_charset_exec_plan_t *cep) {
+  int n_children, i;
+
+  if (cep) {
+    n_children = 0;
+    switch (cep->action) {
+      case CHARSET_ACTION_COMPLEMENT:
+        n_children = 1;
+        break;
+      case CHARSET_ACTION_SPLIT:
+        n_children = 2;
+        break;
+      default:
+        break;
+    }
+
+    for (i = 0; i < n_children; ++i) {
+      h_llvm_free_charset_exec_plan(mm__, cep->children[i]);
+    }
+    h_free(cep->cs);
+    h_free(cep);
+  }
+}
+
+/*
+ * Pretty-print a charset exec plan to stdout
+ */
+
+static void h_llvm_pretty_print_charset_exec_plan_impl(HAllocator *mm__, llvm_charset_exec_plan_t *cep,
+                                                       const char *pfx_on_action_line, const char *pfx,
+                                                       int depth) {
+  const char *action_string = NULL, *pfx_incr = NULL;
+  const char *pfx_incr_child_action = NULL, *pfx_incr_last_child = NULL;
+  char *next_pfx = NULL, *next_pfx_child_action_line = NULL, *next_pfx_last_child = NULL;
+  int n_children = 0, i, j, next_pfx_len;
+  uint8_t ch;
+
+  if (!cep) {
+    action_string = "NULL";
+  } else {
+    switch (cep->action) {
+      case CHARSET_ACTION_ACCEPT:
+        action_string = "CHARSET_ACTION_ACCEPT";
+        break;
+      case CHARSET_ACTION_SCAN:
+        action_string = "CHARSET_ACTION_SCAN";
+        break;
+      case CHARSET_ACTION_BITMAP:
+        action_string = "CHARSET_ACTION_BITMAP";
+        break;
+      case CHARSET_ACTION_COMPLEMENT:
+        action_string = "CHARSET_ACTION_COMPLEMENT";
+        n_children = 1;
+        break;
+      case CHARSET_ACTION_SPLIT:
+        action_string = "CHARSET_ACTION_SPLIT";
+        n_children = 2;
+        break;
+      default:
+        action_string = "UNKNOWN";
+        break;
+    }
+  }
+
+  if (n_children > 0) {
+    pfx_incr = " | ";
+  } else {
+    pfx_incr = "   ";
+  }
+
+
+  if (depth > 0 || strlen(pfx_on_action_line) > 0) {
+    printf("%s-%s\n", pfx_on_action_line, action_string);
+    pfx_incr = (n_children > 0) ? " | " : "   ";
+    pfx_incr_child_action = " +-";
+    pfx_incr_last_child = "   ";
+  } else {
+    printf("%s\n", action_string);
+    pfx_incr = (n_children > 0) ? "| " : "  ";
+    pfx_incr_child_action = "+-";
+    pfx_incr_last_child = "  ";
+  }
+
+  /*
+   * Now do the charset, 8 lines of 32 bits with spaces in between to
+   * fit [] range markers and | split point marker.
+   */
+  int open = 0, close = 0, split = 0;
+  for (ch = 0, i = 0; i < 8; ++i) {
+    /* Special case: [ should go before first char on line */
+    if (ch == cep->idx_start) {
+      printf("%s%s [", pfx, pfx_incr);
+    } else {
+      printf("%s%s  ", pfx, pfx_incr);
+    }
+    for (j = 0; j < 32; ++j, ++ch) {
+      open = close = split = 0;
+      /* Figure out markers, avoid wraparound */
+      if (cep->idx_start != 0 && ch + 1 == cep->idx_start) {
+        /* There should be a [ right after this char */
+        open = 1;
+      } else if (ch == cep->idx_end) {
+        /* There should be a ] right after this char */
+        close = 1;
+      } else if (ch == cep->split_point &&
+                 cep->action == CHARSET_ACTION_SPLIT) {
+        /* There should be a | right after this char */
+        split = 1;
+      }
+
+      if (charset_isset(cep->cs, ch)) printf("X");
+      else printf(".");
+
+      if (open) printf("[");
+      else if (close) printf("]");
+      else if (split) printf("|");
+      else printf(" ");
+    }
+    printf("\n");
+  }
+
+  if (cep->action == CHARSET_ACTION_SPLIT) {
+    printf("%s%s idx_start = %u, split_point = %u, idx_end = %u\n",
+           pfx, pfx_incr, cep->idx_start, cep->split_point, cep->idx_end);
+  } else {
+    printf("%s%s idx_start = %u, idx_end = %u\n",
+           pfx, pfx_incr, cep->idx_start, cep->idx_end);
+  }
+
+  printf("%s%s cost = %d, depth = %d\n", pfx, pfx_incr, cep->cost, cep->depth);
+
+  if (n_children > 0) {
+    if (n_children > 1) {
+      next_pfx_len = strlen(pfx) + strlen(pfx_incr) + 1;
+      next_pfx = h_new(char, next_pfx_len);
+      snprintf(next_pfx, next_pfx_len, "%s%s", pfx, pfx_incr);
+    } else {
+      /* Won't be needed */
+      next_pfx = NULL;
+    }
+    next_pfx_len = strlen(pfx) + strlen(pfx_incr_child_action) + 1;
+    next_pfx_child_action_line = h_new(char, next_pfx_len);
+    snprintf(next_pfx_child_action_line, next_pfx_len,
+             "%s%s", pfx, pfx_incr_child_action);
+    next_pfx_len = strlen(pfx) + strlen(pfx_incr_last_child) + 1;
+    next_pfx_last_child = h_new(char, next_pfx_len);
+    snprintf(next_pfx_last_child, next_pfx_len,
+             "%s%s", pfx, pfx_incr_last_child);
+
+    for (i = 0; i < n_children; ++i) {
+      /* Space things out */
+      printf("%s%s\n", pfx, pfx_incr);
+      h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep->children[i],
+          next_pfx_child_action_line, (i + 1 == n_children) ? next_pfx_last_child : next_pfx,
+          depth + 1);
+    }
+
+    if (next_pfx) h_free(next_pfx);
+    h_free(next_pfx_last_child);
+    h_free(next_pfx_child_action_line);
+  }
+}
+
+static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset_exec_plan_t *cep) {
+  /* Start at depth 0, and always emit an initial newline */
+  printf("\n");
+  h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep, "", "", 0);
+}
+
+/* Forward declares for IR-emission functions */
+static bool h_llvm_build_ir_for_bitmap(HLLVMParserCompileContext *ctxt,
+                                       HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                       LLVMValueRef r,
+                                       LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
+static bool h_llvm_build_ir_for_scan(HLLVMParserCompileContext *ctxt,
+                                     HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                     LLVMValueRef r,
+                                     LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
+static bool h_llvm_build_ir_for_split(HLLVMParserCompileContext *ctxt,
+                                      llvm_charset_exec_plan_t *cep, LLVMValueRef r,
+                                      LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
+static bool h_llvm_cep_to_ir(HLLVMParserCompileContext *ctxt,
+                             LLVMValueRef r, llvm_charset_exec_plan_t *cep,
+                             LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
+
+/*
+ * Build IR for a CHARSET_ACTION_BITMAP
+ */
+
+static bool h_llvm_build_ir_for_bitmap(HLLVMParserCompileContext *ctxt,
+                                       HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                       LLVMValueRef r,
+                                       LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  int i, j;
+  uint32_t bitmap_entry;
+
+  if (!cs) return false;
+  if (!ctxt) return false;
+  if (idx_start > idx_end) return false;
+
+  /*
+   * Embed a 8x32 bitmap in the IR, turn the input value into an index by
+   * right-shifting 5 bits, load the relevant bitmap byte, then derive a mask
+   * from the low-order 5 bits of the input value.  & the mask with the bitmap
+   * byte, and compare.  If non-zero, accept, otherwise reject.
+   */
+  LLVMPositionBuilderAtEnd(ctxt->builder, in);
+
+  /* Construct the bitmap */
+  LLVMValueRef bitmap_entries[8];
+  for (i = 0; i < 8; ++i) {
+    bitmap_entry = 0x0;
+    /*
+     * Bit order; LSB is lowest-numbered char index 32*i, MSB is 32*i + 31.
+     * and then the mask we need is just 1 << (r & 0x1f).
+     */
+    for (j = 0; j < 32; ++j) {
+      /* Set the bit if necessary */
+      if (charset_isset(cs, (uint8_t)(32*i + j))) {
+        bitmap_entry |= ((uint32_t)(0x1) << j);
+      }
+    }
+
+    /* Make an LLVMValueRef for it */
+    bitmap_entries[i] = LLVMConstInt(LLVMInt32Type(), bitmap_entry, 0);
+  }
+  /* Now make an array out of them */
+  LLVMValueRef bitmap_initializer = LLVMConstArray(LLVMInt32Type(), bitmap_entries, 8);
+  /* ...and we need a global variable to stick it in to GEP it */
+  LLVMValueRef bitmap = LLVMAddGlobal(ctxt->mod, LLVMTypeOf(bitmap_initializer), "bitmap");
+  LLVMSetInitializer(bitmap, bitmap_initializer);
+
+  /* Compute the index into the bitmap */
+  LLVMValueRef word_index = LLVMBuildLShr(ctxt->builder, r,
+      LLVMConstInt(LLVMInt8Type(), 5, 0), "word_index");
+
+  /* Get a pointer to that word in the bitmap */
+  LLVMValueRef gep_indices[2];
+  gep_indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0);
+  gep_indices[1] = word_index;
+  LLVMValueRef bitmap_word_p =
+    LLVMBuildInBoundsGEP(ctxt->builder, bitmap, gep_indices, 2, "bitmap_word_p");
+  LLVMValueRef bitmap_word =
+    LLVMBuildLoad(ctxt->builder, bitmap_word_p, "bitmap_word");
+  /*
+   * Extract the low-order 5 bits of r, and expand to a 32-bit int for the
+   * mask
+   */
+  LLVMValueRef bit_index = LLVMBuildAnd(ctxt->builder, r,
+      LLVMConstInt(LLVMInt8Type(), 0x1f, 0), "bit_index");
+  LLVMValueRef bit_index_zext = LLVMBuildZExt(ctxt->builder, bit_index,
+      LLVMInt32Type(), "bit_index_zext");
+  /* Compute mask */
+  LLVMValueRef mask = LLVMBuildShl(ctxt->builder, LLVMConstInt(LLVMInt32Type(), 1, 0),
+      bit_index_zext, "mask");
+  /* AND the mask with the bitmap word */
+  LLVMValueRef masked_bitmap_word = LLVMBuildAnd(ctxt->builder, bitmap_word, mask,
+      "masked_bitmap_word");
+  /* Compare it to zero */
+  LLVMValueRef bitmap_icmp = LLVMBuildICmp(ctxt->builder, LLVMIntNE,
+      masked_bitmap_word, LLVMConstInt(LLVMInt32Type(), 0, 0), "bitmap_icmp");
+  /* If not zero, the char is in the set */
+  LLVMBuildCondBr(ctxt->builder, bitmap_icmp, yes, no);
+
+  return true;
+}
+
+/*
+ * Build IR for a CHARSET_ACTION_SCAN
+ */
+
+static bool h_llvm_build_ir_for_scan(HLLVMParserCompileContext *ctxt,
+                                     HCharset cs, uint8_t idx_start, uint8_t idx_end,
+                                     LLVMValueRef r,
+                                     LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  if (!cs) return false;
+  if (!ctxt) return false;
+  if (idx_start > idx_end) return false;
+
+  /*
+   * Scan the range of indices, and for each thing in the charset,
+   * compare and conditional branch.
+   */
+  LLVMPositionBuilderAtEnd(ctxt->builder, in);
+
+  for (int i = idx_start; i <= idx_end; ++i) {
+    if (charset_isset(cs, i)) {
+      char bbname[16];
+      uint8_t c = (uint8_t)i;
+      snprintf(bbname, 16, "cs_memb_%02x", c);
+      LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntEQ,
+          LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r");
+      LLVMBasicBlockRef bb = LLVMAppendBasicBlock(ctxt->func, bbname);
+      LLVMBuildCondBr(ctxt->builder, icmp, yes, bb);
+      LLVMPositionBuilderAtEnd(ctxt->builder, bb);
+    }
+  }
+
+  LLVMBuildBr(ctxt->builder, no);
+
+  return true;
+}
+
+/*
+ * Build IR for a CHARSET_ACTION_SPLIT
+ */
+
+static bool h_llvm_build_ir_for_split(HLLVMParserCompileContext *ctxt,
+                                      llvm_charset_exec_plan_t *cep, LLVMValueRef r,
+                                      LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  char name[18];
+  bool left_ok, right_ok;
+
+  /* Split validation */
+  if (!ctxt) return false;
+  if (!cep) return false;
+  if (cep->action != CHARSET_ACTION_SPLIT) return false;
+  if (cep->idx_start >= cep->idx_end) return false;
+  if (cep->split_point < cep->idx_start) return false;
+  if (cep->split_point >= cep->idx_end) return false;
+  if (!(cep->children[0] && cep->children[1])) return false;
+  if (cep->idx_start != cep->children[0]->idx_start) return false;
+  if (cep->split_point != cep->children[0]->idx_end) return false;
+  if (cep->split_point + 1 != cep->children[1]->idx_start) return false;
+  if (cep->idx_end != cep->children[1]->idx_end) return false;
+
+  /*
+   * Compare the value against the split point, and branch to the left
+   * child if <=, right child if >.
+   */
+  snprintf(name, 18, "cs_split_left_%02X", cep->split_point);
+  LLVMBasicBlockRef left = LLVMAppendBasicBlock(ctxt->func, name);
+  snprintf(name, 18, "cs_split_right_%02X", cep->split_point);
+  LLVMBasicBlockRef right = LLVMAppendBasicBlock(ctxt->func, name);
+  LLVMPositionBuilderAtEnd(ctxt->builder, in);
+  snprintf(name, 18, "r <= %02X", cep->split_point);
+  LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntULE,
+      r, LLVMConstInt(LLVMInt8Type(), cep->split_point, 0), name);
+  LLVMBuildCondBr(ctxt->builder, icmp, left, right);
+
+  /*
+   * Now build the subtrees starting from each of the output basic blocks
+   * of the comparison.
+   */
+  left_ok = h_llvm_cep_to_ir(ctxt, r, cep->children[0], left, yes, no);
+  right_ok = h_llvm_cep_to_ir(ctxt, r, cep->children[1], right, yes, no);
+
+  return left_ok && right_ok;
+}
+
+/*
+ * Turn an llvm_charset_exec_plan_t into IR
+ */
+
+static bool h_llvm_cep_to_ir(HLLVMParserCompileContext *ctxt,
+                             LLVMValueRef r, llvm_charset_exec_plan_t *cep,
+                             LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  bool rv;
+
+  if (!ctxt) return false;
+  if (!cep) return false;
+
+  switch (cep->action) {
+    case CHARSET_ACTION_SCAN:
+      rv = h_llvm_build_ir_for_scan(ctxt, cep->cs,
+          cep->idx_start, cep->idx_end, r, in, yes, no);
+      break;
+    case CHARSET_ACTION_ACCEPT:
+      /* Easy case; just unconditionally branch to the yes output */
+      LLVMPositionBuilderAtEnd(ctxt->builder, in);
+      LLVMBuildBr(ctxt->builder, yes);
+      rv = true;
+      break;
+    case CHARSET_ACTION_BITMAP:
+      rv = h_llvm_build_ir_for_bitmap(ctxt, cep->cs,
+          cep->idx_start, cep->idx_end, r, in, yes, no);
+      break;
+    case CHARSET_ACTION_COMPLEMENT:
+      /* This is trivial; just swap the 'yes' and 'no' outputs and build the child */
+      rv = h_llvm_cep_to_ir(ctxt, r, cep->children[0], in, no, yes);
+      break;
+    case CHARSET_ACTION_SPLIT:
+      rv = h_llvm_build_ir_for_split(ctxt, cep, r, in, yes, no);
+      break;
+    default:
+      /* Unknown action type */
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+      fprintf(stderr,
+              "cep %p has unknown action type\n",
+              (void *)cep);
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
+      rv = false;
+      break;
+  }
+
+  return rv;
+}
+
+/*
+ * Construct LLVM IR to decide if a runtime value is a member of a compile-time
+ * character set, and branch depending on the result.
+ *
+ * Parameters:
+ *  - mod [in]: an LLVMModuleRef
+ *  - func [in]: an LLVMValueRef to the function to add the new basic blocks
+ *  - builder [in]: an LLVMBuilderRef, positioned appropriately
+ *  - r [in]: an LLVMValueRef to the value to test
+ *  - cs [in]: the HCharset to test membership in
+ *  - yes [in]: the basic block to branch to if r is in cs
+ *  - no [in]: the basic block to branch to if r is not in cs
+ *
+ * Returns: true on success, false on failure
+ */
+
+bool h_llvm_make_charset_membership_test(HLLVMParserCompileContext *ctxt,
+                                         LLVMValueRef r, HCharset cs,
+                                         LLVMBasicBlockRef yes, LLVMBasicBlockRef no) {
+  /*
+   * A charset is a 256-element bit array, 32 bytes long in total.  Ours is
+   * static at compile time, so we can try to construct minimal LLVM IR for
+   * this particular charset.  In particular, we should handle cases like
+   * only one or two bits being set, or a long consecutive range, efficiently.
+   *
+   * In LLVM IR, we can test propositions like r == x, r <= x, r >= x and their
+   * negations efficiently, so the challenge here is to turn a character map
+   * into a minimal set of such propositions.
+   *
+   * We achieve this by building a tree of actions to minimize a cost metric,
+   * and then transforming the tree into IR.
+   */
+
+  HAllocator *mm__;
+  bool rv;
+
+  if (!ctxt) return false;
+  mm__ = ctxt->mm__;
+
+  /* Try building a charset exec plan */
+  llvm_charset_exec_plan_t *cep = h_llvm_build_charset_exec_plan(mm__, cs);
+  if (!cep) {
+    fprintf(stderr, "got null from h_llvm_build_charset_exec_plan()\n");
+    return false;
+  }
+
+#ifdef HAMMER_LLVM_CHARSET_DEBUG
+  bool ok = h_llvm_check_charset_exec_plan(cep);
+  if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep);
+  else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep);
+  h_llvm_pretty_print_charset_exec_plan(mm__, cep);
+  if (!ok) {
+    fprintf(stderr, "h_llvm_make_charset_membership_test() error-exiting "
+            "because consistency check failed\n");
+    h_llvm_free_charset_exec_plan(mm__, cep);
+    cep = NULL;
+    return false;
+  }
+#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */
+
+  /*
+   * XXX Note on memoization:
+   *
+   * How common is it for this to occur multiple times in a parser with the
+   * same charset?  If so, we will end up emitting code which differs only in
+   * its yes and no output basic blocks each time.  Is there a significant
+   * performance penalty for LLVMBuildIndirectBr() vs. LLVMBuildBr()?  If no,
+   * we should consider memoizing by charset using it and building a wrapper
+   * around it that just varies the output blocks to reduce emitted code size.
+   *
+   *
+   */
+
+  /* Create input block */
+  LLVMBasicBlockRef start = LLVMAppendBasicBlock(ctxt->func, "cs_start");
+  /*
+   * Make unconditional branch into input block from wherever our caller
+   * had us positioned.
+   */
+  LLVMBuildBr(ctxt->builder, start);
+
+  rv = h_llvm_cep_to_ir(ctxt, r, cep, start, yes, no);
+
+  h_llvm_free_charset_exec_plan(mm__, cep);
+  cep = NULL;
+
+  return rv;
+}
+
+#endif /* defined(HAMMER_LLVM_BACKEND) */
diff --git a/src/backends/llvm/llvm_suint.c b/src/backends/llvm/llvm_suint.c
new file mode 100644
index 0000000000000000000000000000000000000000..571d6b00d39cbfc30f7b66d65c44e6554372ec0b
--- /dev/null
+++ b/src/backends/llvm/llvm_suint.c
@@ -0,0 +1,97 @@
+#ifdef HAMMER_LLVM_BACKEND
+
+#include <llvm-c/Analysis.h>
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpedantic"
+#include <llvm-c/Core.h>
+#pragma GCC diagnostic pop
+#include <llvm-c/ExecutionEngine.h>
+#include "../../internal.h"
+#include "llvm.h"
+
+/*
+ * Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT
+ *
+ * Parameters:
+ *  - mod [in]: an LLVMModuleRef
+ *  - builder [in]: an LLVMBuilderRef, positioned appropriately
+ *  - stream [in]: a value ref to an llvm_inputstreamptr, for the input stream
+ *  - arena [in]: a value ref to an llvm_arenaptr to be used for the malloc
+ *  - r [in]: a value ref to the value to be used to this token
+ *  - mr_out [out]: the return value from make_result()
+ *
+ * TODO actually support TT_SINT, inputs other than 8 bit
+ */
+
+void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt,
+                          uint8_t length, uint8_t signedp,
+                          LLVMValueRef r, LLVMValueRef *mr_out) {
+  /* Set up call to h_arena_malloc() for a new HParsedToken */
+  LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
+  LLVMValueRef amalloc_args[] = { ctxt->arena, tok_size };
+  /* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */
+  LLVMValueRef amalloc = LLVMBuildCall(ctxt->builder,
+      LLVMGetNamedFunction(ctxt->mod, "h_arena_malloc"),
+      amalloc_args, 2, "h_arena_malloc");
+  /* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */
+  LLVMValueRef tok = LLVMBuildBitCast(ctxt->builder, amalloc, ctxt->llvm_parsedtokenptr, "tok");
+
+  /*
+   * tok->token_type = signedp ? TT_SINT : TT_UINT;
+   *
+   * %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0
+   */
+  LLVMValueRef toktype = LLVMBuildStructGEP(ctxt->builder, tok, 0, "token_type");
+  /* store i32 8, i32* %token_type */
+  LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt32Type(),
+        signedp ? TT_SINT : TT_UINT, 0), toktype);
+
+  /*
+   * tok->sint = r;
+   * or
+   * tok->uint = r;
+   *
+   * %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1
+   */
+  LLVMValueRef tokdata = LLVMBuildStructGEP(ctxt->builder, tok, 1, "token_data");
+  /*
+   * the token_data field is a union, but either an int64_t or a uint64_t in the
+   * cases we can be called for.
+   */
+  if (length < 64) {
+    /* Extend needed */
+    LLVMValueRef r_ext;
+    if (signedp) r_ext = LLVMBuildSExt(ctxt->builder, r, LLVMInt64Type(), "r_sext");
+    else r_ext = LLVMBuildZExt(ctxt->builder, r, LLVMInt64Type(), "r_zext");
+    LLVMBuildStore(ctxt->builder, r_ext, tokdata);
+  } else {
+    LLVMBuildStore(ctxt->builder, r, tokdata);
+  }
+  /*
+   * Store the index from the stream into the token
+   */
+  /* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */
+  LLVMValueRef tokindex = LLVMBuildStructGEP(ctxt->builder, tok, 2, "t_index");
+  /* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */
+  LLVMValueRef streamindex = LLVMBuildStructGEP(ctxt->builder, ctxt->stream, 2, "s_index");
+  /* %4 = load i64, i64* %s_index */
+  /* store i64 %4, i64* %t_index */
+  LLVMBuildStore(ctxt->builder, LLVMBuildLoad(ctxt->builder, streamindex, ""), tokindex);
+  /* Store the bit length into the token */
+  LLVMValueRef tokbitlen = LLVMBuildStructGEP(ctxt->builder, tok, 3, "bit_length");
+  LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt64Type(), length, 0), tokbitlen);
+
+  /*
+   * Now call make_result()
+   *
+   * %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3)
+   */
+  LLVMValueRef result_args[] = { ctxt->arena, tok };
+  LLVMValueRef mr = LLVMBuildCall(ctxt->builder,
+      LLVMGetNamedFunction(ctxt->mod, "make_result"),
+      result_args, 2, "make_result");
+
+  *mr_out = mr;
+}
+
+#endif /* defined(HAMMER_LLVM_BACKEND) */
diff --git a/src/backends/missing.c b/src/backends/missing.c
new file mode 100644
index 0000000000000000000000000000000000000000..2a46b57429b08e82d895fb8c7a56ef315e8e6d46
--- /dev/null
+++ b/src/backends/missing.c
@@ -0,0 +1,23 @@
+#include "missing.h"
+
+int h_missing_compile(HAllocator* mm__, HParser* parser, const void* params) {
+  /* Always fail */
+
+  return -1;
+}
+
+HParseResult *h_missing_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) {
+  /* Always fail */
+
+  return NULL;
+}
+
+void h_missing_free(HParser *parser) {
+  /* No-op */
+}
+
+HParserBackendVTable h__missing_backend_vtable = {
+  .compile = h_missing_compile, /* TODO */
+  .parse = h_missing_parse, /* TODO */
+  .free = h_missing_free, /* TODO */
+};
diff --git a/src/backends/missing.h b/src/backends/missing.h
new file mode 100644
index 0000000000000000000000000000000000000000..4efe5f350331a1dcc75dcd894bf1075e8f62bbd8
--- /dev/null
+++ b/src/backends/missing.h
@@ -0,0 +1,7 @@
+#ifndef HAMMER_BACKENDS_MISSING__H
+#define HAMMER_BACKENDS_MISSING__H
+
+#include "../hammer.h"
+#include "../internal.h"
+
+#endif /* !defined(HAMMER_BACKENDS_MISSING__H) */
diff --git a/src/bindings/dotnet/SConscript b/src/bindings/dotnet/SConscript
index 94f874ee41cc4741cff950ef4a88478dcfc06b31..1636c6633546c6aa46a376af65af726f6c0d2e98 100644
--- a/src/bindings/dotnet/SConscript
+++ b/src/bindings/dotnet/SConscript
@@ -11,6 +11,7 @@ dotnetenv.Append(CCFLAGS=["-fpic", '-DSWIG', '-Wno-all',
               LIBS=['hammer'],
               LIBPATH=["../.."],
               SWIGFLAGS=["-DHAMMER_INTERNAL__NO_STDARG_H",
+                         "-DSWIG2_CSHARP",
                          "-Isrc/", "-csharp",
                          "-dllimport","hammer_dotnet",
                          "-namespace", "Hammer.Internal"])
@@ -27,7 +28,7 @@ csfiles = os.path.join(thisdir, "*.cs")
 # target to stand in for.
 hammer_wrap = AlwaysBuild(dotnetenv.Command(['hammer_wrap.c'], swig,
                                             ["rm %s/*.cs || true" % (thisdir,),
-                                             "swig $SWIGFLAGS $SOURCE"]))
+                                             "swig3.0 $SWIGFLAGS $SOURCE"]))
 libhammer_dotnet = dotnetenv.SharedLibrary(['hammer_dotnet'], hammer_wrap)
 hammer_dll = AlwaysBuild(dotnetenv.Command(['hammer.dll'], Glob('ext/*.cs'),
                                            '$CSC -t:library -unsafe -out:$TARGET %s/*.cs $SOURCE' %(thisdir,)))
diff --git a/src/bindings/perl/SConscript b/src/bindings/perl/SConscript
index 49b693a7035cabfe1914c0a2fc172d31a07e23dd..8a192a5a3ac05e5b1f83473f13fa3631d252b300 100644
--- a/src/bindings/perl/SConscript
+++ b/src/bindings/perl/SConscript
@@ -20,7 +20,7 @@ if 'PERL5LIB' in os.environ:
 
 swig = ['hammer.i']
 
-hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig $SWIGFLAGS $SOURCE")
+hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig3.0 $SWIGFLAGS $SOURCE")
 makefile = perlenv.Command(['Makefile'], ['Makefile.PL'], "perl $SOURCE CC=" + perlenv['ENV']['CC'])
 
 targetdir = os.path.dirname(str(hammer_wrap[0].path))
diff --git a/src/bindings/php/SConscript b/src/bindings/php/SConscript
index 34728af238c9a1b3ad478737e997921e8a0ff0b8..6791cbcc46d6c4f67fda5c756d46570ee8347c29 100644
--- a/src/bindings/php/SConscript
+++ b/src/bindings/php/SConscript
@@ -11,7 +11,7 @@ phpenv.Append(LIBS = ['hammer'])
 phpenv.Append(LIBPATH = ['../../']) 
 
 swig = ['hammer.i']
-bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE')
+bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig3.0 -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE')
 libhammer_php = phpenv.SharedLibrary('hammer', ['hammer_wrap.c'])
 Default(swig, bindings_src, libhammer_php)
 
diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript
index 5619347ccee631c3142f7a6cd4b2be8a608118a2..383e1c60d63e2f77ad97f122a78d70290b4cdea8 100644
--- a/src/bindings/python/SConscript
+++ b/src/bindings/python/SConscript
@@ -1,20 +1,34 @@
 # -*- python -*-
 import os, os.path
 Import('env libhammer_shared testruns targets')
-Import('llvm_defines')
-Import('llvm_includes')
+# LLVM-related flags
+if GetOption("use_llvm"):
+    Import('llvm_defines')
+    Import('llvm_includes')
 
 pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0)
 
 swig = pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURCE"))
 setup = ['setup.py']
 pydir = os.path.join(env['BUILD_BASE'], 'src/bindings/python')
-define_list = ','.join(llvm_defines)
-inc_list = ' '.join(['-I' + e for e in llvm_includes])
-swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../ ' + inc_list
-libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], \
-    'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --inplace ' + inc_list + \
-    ' --define=\"' + define_list + '\" --swig-opts=\"' + swig_opt_list + '\"')
+if GetOption("use_llvm"):
+    define_list = ','.join(llvm_defines + ['HAMMER_LLVM_BACKEND'])
+    inc_list = ' '.join(['-I' + e for e in llvm_includes])
+else:
+    define_list = None
+    inc_list = None
+
+swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../'
+arg_list = 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --swig=swig3.0 --inplace '
+if inc_list:
+    arg_list = arg_list + inc_list
+    swig_opt_list = swig_opt_list + ' ' + inc_list
+if define_list:
+    arg_list = arg_list + ' --define=\"' + define_list + '\"'
+arg_list = arg_list + ' --swig-opts=\"' + swig_opt_list + '\"'
+
+libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], arg_list)
+
 Default(libhammer_python)
 
 pytestenv = pythonenv.Clone()
diff --git a/src/hammer.c b/src/hammer.c
index 2a7d5bc5be51059115b1af20a40c6a5de6633f2a..6b881554256414eccb13454b3ce773e133dfd54a 100644
--- a/src/hammer.c
+++ b/src/hammer.c
@@ -31,7 +31,23 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
   &h__llk_backend_vtable,
   &h__lalr_backend_vtable,
   &h__glr_backend_vtable,
+  /*
+   * Brittleness warning!
+   *
+   * We're using an enum as an index into this array (don't blame me...)
+   * so it's important that this array have the same size and order as
+   * the corresponding enum values in HParserBackend of src/hammer.h.
+   * Since callers use those enums as numeric constants to select a
+   * backend, dropping/reordering them breaks binary compatibility.
+   * If anyone adds any more optional backends in the future, don't
+   * #ifdef out those enum values in hammer.h, and do provide the
+   * 'missing' stub backend as an alternative here.
+   */
+#ifdef HAMMER_LLVM_BACKEND
   &h__llvm_backend_vtable,
+#else
+  &h__missing_backend_vtable,
+#endif
 };
 
 
diff --git a/src/hammer.h b/src/hammer.h
index 821924fa7e267009df54885c31a6a03628528de7..b86e4ae04016b10548b4bb92eef02e2a2a4a9b59 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -46,6 +46,10 @@ typedef enum HParserBackend_ {
   PB_LLk,
   PB_LALR,
   PB_GLR,
+  /*
+   * PB_LLVM stays even if no LLVM backend compiled in, since these constants
+   * are exposed to callers.
+   */
   PB_LLVM,
   PB_MAX = PB_LLVM
 } HParserBackend;
diff --git a/src/internal.h b/src/internal.h
index 69e27a2763c9eeed22473c34d6cc8f780d4493ad..2b2d6004266680b5a4a8ae7d20da80506d7082da 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -24,11 +24,14 @@
 #define HAMMER_INTERNAL__H
 #include <stdint.h>
 #include <assert.h>
+#include <limits.h>
 #include <string.h>
+#ifdef HAMMER_LLVM_BACKEND
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #include <llvm-c/Core.h>
 #pragma GCC diagnostic pop
+#endif
 #include "hammer.h"
 #include "platform.h"
 
@@ -156,23 +159,90 @@ static inline void h_sarray_clear(HSArray *arr) {
 
 // }}}
 
-typedef unsigned int *HCharset;
+typedef unsigned int HCharsetWord;
+#define CHARSET_WHOLE_WORD_MASK UINT_MAX
+
+typedef HCharsetWord *HCharset;
+
+#define CHARSET_BITS_PER_WORD (sizeof(HCharsetWord) * 8)
+#define CHARSET_WORDS (256 / CHARSET_BITS_PER_WORD)
+#define CHARSET_SIZE (CHARSET_WORDS * sizeof(HCharsetWord))
+#define CHARSET_BIT_IDX_TO_WORD(idx) \
+  (((unsigned int)(idx)) / CHARSET_BITS_PER_WORD)
+#define CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx) \
+  (((unsigned int)(idx)) % CHARSET_BITS_PER_WORD)
+#define CHART_WORD_AND_BIT_TO_BIT_IDX(word,bit) \
+    ((uint8_t)(CHARSET_BITS_PER_WORD * ((unsigned int)(word)) + \
+      ((unsigned int)(bit))))
+#define CHARSET_BIT_POS_IN_WORD_MASK(bit) \
+  ((((HCharsetWord)(1)) << (bit)) & CHARSET_WHOLE_WORD_MASK)
+/* Mask for all bits below a position */
+#define CHARSET_BIT_MASK_UP_TO_POS(bit) \
+  ((CHARSET_BIT_POS_IN_WORD_MASK((bit)) - 1) & CHARSET_WHOLE_WORD_MASK)
+/* Mask off all bits above and including a position */
+#define CHARSET_BIT_MASK_FROM_POS(bit) \
+  ((~CHARSET_BIT_MASK_UP_TO_POS((bit))) & CHARSET_WHOLE_WORD_MASK)
+
+static inline HCharset copy_charset(HAllocator *mm__, HCharset in) {
+  HCharset cs = h_new(HCharsetWord, CHARSET_WORDS);
+  memcpy(cs, in, CHARSET_SIZE);
+  return cs;
+}
 
 static inline HCharset new_charset(HAllocator* mm__) {
-  HCharset cs = h_new(unsigned int, 256 / (sizeof(unsigned int) * 8));
-  memset(cs, 0, 32);  // 32 bytes = 256 bits
+  HCharset cs = h_new(HCharsetWord, CHARSET_WORDS);
+  memset(cs, 0, CHARSET_SIZE);
   return cs;
 }
 
+static inline void charset_complement(HCharset cs) {
+  for (unsigned int i = 0; i < CHARSET_WORDS; ++i) cs[i] = ~(cs[i]);
+}
+
 static inline int charset_isset(HCharset cs, uint8_t pos) {
-  return !!(cs[pos / (sizeof(*cs)*8)] & (1 << (pos % (sizeof(*cs)*8))));
+  return !!(cs[CHARSET_BIT_IDX_TO_WORD(pos)] &
+      CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos)));
+}
+
+static inline void charset_restrict_to_range(HCharset cs, uint8_t idx_start, uint8_t idx_end) {
+  HCharsetWord mask;
+
+  if (idx_end < idx_start) {
+    /* Range is empty, clear the charset */
+    memset(cs, 0, CHARSET_SIZE);
+  } else {
+    /* Clear below, if any */
+    if (CHARSET_BIT_IDX_TO_WORD(idx_start) > 0) {
+      memset(cs, 0, CHARSET_BIT_IDX_TO_WORD(idx_start) * sizeof(HCharsetWord));
+    }
+    /* Note this partial start/ending word code still works if they are the same word */
+    /* Mask partial starting word, if any */
+    if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start) != 0) {
+      mask = CHARSET_BIT_MASK_FROM_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start));
+      cs[CHARSET_BIT_IDX_TO_WORD(idx_start)] &= mask;
+    }
+    /* Mask partial ending word, if any */
+    if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end) != CHARSET_BITS_PER_WORD - 1) {
+      mask = CHARSET_BIT_MASK_UP_TO_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end));
+      mask |= CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end));
+      cs[CHARSET_BIT_IDX_TO_WORD(idx_end)] &= mask;
+    }
+    /* Clear above, if any */
+    if (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1 < CHARSET_WORDS) {
+      memset(cs + CHARSET_BIT_IDX_TO_WORD(idx_end) + 1, 0,
+             (CHARSET_WORDS - (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1)) *
+             sizeof(HCharsetWord));
+    }
+  }
 }
 
 static inline void charset_set(HCharset cs, uint8_t pos, int val) {
-  cs[pos / (sizeof(*cs)*8)] =
+  cs[CHARSET_BIT_IDX_TO_WORD(pos)] =
     val
-    ? cs[pos / (sizeof(*cs)*8)] |  (1 << (pos % (sizeof(*cs)*8)))
-    : cs[pos / (sizeof(*cs)*8)] & ~(1 << (pos % (sizeof(*cs)*8)));
+    ? cs[CHARSET_BIT_IDX_TO_WORD(pos)] |  CHARSET_BIT_POS_IN_WORD_MASK(
+        CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos))
+    : cs[CHARSET_BIT_IDX_TO_WORD(pos)] & ~CHARSET_BIT_POS_IN_WORD_MASK(
+        CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos));
 }
 
 typedef unsigned int HHashValue;
@@ -326,7 +396,10 @@ extern HParserBackendVTable h__packrat_backend_vtable;
 extern HParserBackendVTable h__llk_backend_vtable;
 extern HParserBackendVTable h__lalr_backend_vtable;
 extern HParserBackendVTable h__glr_backend_vtable;
+extern HParserBackendVTable h__missing_backend_vtable;
+#ifdef HAMMER_LLVM_BACKEND
 extern HParserBackendVTable h__llvm_backend_vtable;
+#endif
 // }}}
 
 // TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
@@ -418,13 +491,19 @@ struct HCFSequence_ {
   HCFChoice **items; // last one is NULL
 };
 
+#ifdef HAMMER_LLVM_BACKEND
+typedef struct HLLVMParserCompileContext_ HLLVMParserCompileContext;
+#endif
+
 struct HParserVtable_ {
   HParseResult* (*parse)(void *env, HParseState *state);
   bool (*isValidRegular)(void *env);
   bool (*isValidCF)(void *env);
   bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean.
   void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env);
-  bool (*llvm)(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void *env);
+#ifdef HAMMER_LLVM_BACKEND
+  bool (*llvm)(HLLVMParserCompileContext *ctxt, void *env);
+#endif
   bool higher; // false if primitive
 };
 
diff --git a/src/llvm.h b/src/llvm.h
deleted file mode 100644
index 369f5729d54c0c0f3e2babec784a887cb0bc824e..0000000000000000000000000000000000000000
--- a/src/llvm.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef HAMMER_LLVM__H
-#define HAMMER_LLVM__H
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpedantic"
-#include <llvm-c/Core.h>
-#pragma GCC diagnostic pop
-
-LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr;
-LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr;
-
-void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
-                                         LLVMValueRef r, HCharset cs,
-                                         LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
-void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder,
-                          LLVMValueRef stream, LLVMValueRef arena, 
-                          LLVMValueRef r, LLVMValueRef *mr_out);
-
-#endif // #ifndef HAMMER_LLVM__H
diff --git a/src/parsers/bits.c b/src/parsers/bits.c
index ae3243006c96fafc973bd1ca1e00be17cefab113..bd9fa8e064e7431a1b84a3de520ce35fc43fd7f7 100644
--- a/src/parsers/bits.c
+++ b/src/parsers/bits.c
@@ -1,10 +1,12 @@
 #include <assert.h>
+#ifdef HAMMER_LLVM_BACKEND
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #include <llvm-c/Core.h>
 #pragma GCC diagnostic pop
+#include "../backends/llvm/llvm.h"
+#endif
 #include "parser_internal.h"
-#include "../llvm.h"
 
 struct bits_env {
   uint8_t length;
@@ -22,76 +24,46 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
   return make_result(state->arena, result);
 }
 
-static bool bits_llvm(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void* env) {
-  /*   %result = alloca %struct.HParsedToken_*, align 8 */
-  #pragma GCC diagnostic push
-  #pragma GCC diagnostic ignored "-Wunused-variable"
-  LLVMValueRef result = LLVMBuildAlloca(builder, llvm_parsedtoken, "result");
-  #pragma GCC diagnostic pop
-  /*   store i8* %env, i8** %1, align 8 */
-  /*   store %struct.HParseState_* %state, %struct.HParseState_** %2, align 8 */
-  /*   %3 = load i8** %1, align 8 */
-  /*   %4 = bitcast i8* %3 to %struct.bits_env* */
-  /*   store %struct.bits_env* %4, %struct.bits_env** %env, align 8 */
-  /*   %5 = load %struct.HParseState_** %2, align 8 */
-  /*   %6 = getelementptr inbounds %struct.HParseState_* %5, i32 0, i32 2 */
-  /*   %7 = load %struct.HArena_** %6, align 8 */
-  /*   %8 = call noalias i8* @h_arena_malloc(%struct.HArena_* %7, i64 48) */
-  /*   %9 = bitcast i8* %8 to %struct.HParsedToken_* */
-  /*   store %struct.HParsedToken_* %9, %struct.HParsedToken_** %result, align 8 */
-  /*   %10 = load %struct.bits_env** %env_, align 8 */
-  /*   %11 = getelementptr inbounds %struct.bits_env* %10, i32 0, i32 1 */
-  /*   %12 = load i8* %11, align 1 */
-  /*   %13 = zext i8 %12 to i32 */
-  /*   %14 = icmp ne i32 %13, 0 */
-  /*   %15 = select i1 %14, i32 4, i32 8 */
-  /*   %16 = load %struct.HParsedToken_** %result, align 8 */
-  /*   %17 = getelementptr inbounds %struct.HParsedToken_* %16, i32 0, i32 0 */
-  /*   store i32 %15, i32* %17, align 4 */
-  /*   %18 = load %struct.bits_env** %env_, align 8 */
-  /*   %19 = getelementptr inbounds %struct.bits_env* %18, i32 0, i32 1 */
-  /*   %20 = load i8* %19, align 1 */
-  /*   %21 = icmp ne i8 %20, 0 */
-  /*   br i1 %21, label %22, label %33 */
-
-  /* ; <label>:22                                      ; preds = %0 */
-  /*   %23 = load %struct.HParseState_** %2, align 8 */
-  /*   %24 = getelementptr inbounds %struct.HParseState_* %23, i32 0, i32 1 */
-  /*   %25 = load %struct.bits_env** %env_, align 8 */
-  /*   %26 = getelementptr inbounds %struct.bits_env* %25, i32 0, i32 0 */
-  /*   %27 = load i8* %26, align 1 */
-  /*   %28 = zext i8 %27 to i32 */
-  /*   %29 = call i64 @h_read_bits(%struct.HInputStream_* %24, i32 %28, i8 signext 1) */
-  /*   %30 = load %struct.HParsedToken_** %result, align 8 */
-  /*   %31 = getelementptr inbounds %struct.HParsedToken_* %30, i32 0, i32 1 */
-  /*   %32 = bitcast %union.anon* %31 to i64* */
-  /*   store i64 %29, i64* %32, align 8 */
-  /*   br label %44 */
-
-  /* ; <label>:33                                      ; preds = %0 */
-  /*   %34 = load %struct.HParseState_** %2, align 8 */
-  /*   %35 = getelementptr inbounds %struct.HParseState_* %34, i32 0, i32 1 */
-  /*   %36 = load %struct.bits_env** %env_, align 8 */
-  /*   %37 = getelementptr inbounds %struct.bits_env* %36, i32 0, i32 0 */
-  /*   %38 = load i8* %37, align 1 */
-  /*   %39 = zext i8 %38 to i32 */
-  /*   %40 = call i64 @h_read_bits(%struct.HInputStream_* %35, i32 %39, i8 signext 0) */
-  /*   %41 = load %struct.HParsedToken_** %result, align 8 */
-  /*   %42 = getelementptr inbounds %struct.HParsedToken_* %41, i32 0, i32 1 */
-  /*   %43 = bitcast %union.anon* %42 to i64* */
-  /*   store i64 %40, i64* %43, align 8 */
-  /*   br label %44 */
-  
-  /* ; <label>:44                                      ; preds = %33, %22 */
-  /*   %45 = load %struct.HParseState_** %2, align 8 */
-  /*   %46 = getelementptr inbounds %struct.HParseState_* %45, i32 0, i32 2 */
-  /*   %47 = load %struct.HArena_** %46, align 8 */
-  /*   %48 = load %struct.HParsedToken_** %result, align 8 */
-  /*   %49 = call %struct.HParseResult_* @make_result(%struct.HArena_* %47, %struct.HParsedToken_* %48) */
-  /*   ret %struct.HParseResult_* %49 */
+#ifdef HAMMER_LLVM_BACKEND
+
+static bool bits_llvm(HLLVMParserCompileContext *ctxt, void* env) {
+  /* Emit LLVM IR to parse ((struct bits_env *)env)->length bits */
+
+  if (!ctxt) return false;
+
+  struct bits_env *env_ = env;
+  /* Error out on unsupported length */
+  if (env_->length > 64 || env_->length == 0) return false;
+  /* Set up params for call to h_read_bits */
+  LLVMValueRef bits_args[3];
+  bits_args[0] = ctxt->stream;
+  bits_args[1] = LLVMConstInt(LLVMInt32Type(), env_->length, 0);
+  bits_args[2] = LLVMConstInt(LLVMInt8Type(), env_->signedp ? 1 : 0, 0);
+
+  /* Set up basic blocks: entry, success and failure branches, then exit */
+  LLVMBasicBlockRef bits_bb = LLVMAppendBasicBlock(ctxt->func, "bits");
+
+  /* Basic block: entry */
+  LLVMBuildBr(ctxt->builder, bits_bb);
+  LLVMPositionBuilderAtEnd(ctxt->builder, bits_bb);
+
+  /* Call to h_read_bits() */
+  // %read_bits = call i64 @h_read_bits(%struct.HInputStream_* %8, i32 env_->length, i8 signext env_->signedp)
+  LLVMValueRef bits = LLVMBuildCall(ctxt->builder,
+      LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits");
+
+  /* Make an HParseResult out of it */
+  LLVMValueRef mr;
+  h_llvm_make_tt_suint(ctxt, env_->length, env_->signedp, bits, &mr);
+
+  /* Return mr */
+  LLVMBuildRet(ctxt->builder, mr);
+
   return true;
 }
 
+#endif
+
 static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
   // signedp == NULL iff unsigned
   bool signedp = (signedp_p != NULL);
@@ -177,7 +149,9 @@ static const HParserVtable bits_vt = {
   .isValidCF = h_true,
   .desugar = desugar_bits,
   .compile_to_rvm = bits_ctrvm,
+#ifdef HAMMER_LLVM_BACKEND
   .llvm = bits_llvm,
+#endif
   .higher = false,
 };
 
diff --git a/src/parsers/ch.c b/src/parsers/ch.c
index 1c396a2f3c8c2e2e8a7433964c397f8776688462..e22ed8c105673fd7b989b60e623e83ee9bead050 100644
--- a/src/parsers/ch.c
+++ b/src/parsers/ch.c
@@ -1,11 +1,13 @@
 #include <stdint.h>
 #include <assert.h>
+#ifdef HAMMER_LLVM_BACKEND
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #include <llvm-c/Core.h>
 #pragma GCC diagnostic pop
+#include "../backends/llvm/llvm.h"
+#endif
 #include "parser_internal.h"
-#include "../llvm.h"
 
 static HParseResult* parse_ch(void* env, HParseState *state) {
   uint8_t c = (uint8_t)(uintptr_t)(env);
@@ -46,77 +48,83 @@ static bool ch_ctrvm(HRVMProg *prog, void* env) {
   return true;
 }
 
-static bool ch_llvm(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void* env) {
+#ifdef HAMMER_LLVM_BACKEND
+
+static bool ch_llvm(HLLVMParserCompileContext *ctxt, void* env) {
   // Build a new LLVM function to parse a character
 
   // Set up params for calls to h_read_bits() and h_arena_malloc()
   LLVMValueRef bits_args[3];
-  LLVMValueRef stream = LLVMGetFirstParam(func);
-  stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
-  bits_args[0] = stream;
+  bits_args[0] = ctxt->stream;
   bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0);
   bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0);
-  LLVMValueRef arena = LLVMGetLastParam(func);
 
   // Set up basic blocks: entry, success and failure branches, then exit
-  LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "ch_entry");
-  LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "ch_success");
-  LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "ch_end");
+  LLVMBasicBlockRef entry = LLVMAppendBasicBlock(ctxt->func, "ch_entry");
+  LLVMBasicBlockRef success = LLVMAppendBasicBlock(ctxt->func, "ch_success");
+  LLVMBasicBlockRef end = LLVMAppendBasicBlock(ctxt->func, "ch_end");
 
   // Basic block: entry
-  LLVMPositionBuilderAtEnd(builder, entry);
+  LLVMBuildBr(ctxt->builder, entry);
+  LLVMPositionBuilderAtEnd(ctxt->builder, entry);
 
   // Call to h_read_bits()
   // %read_bits = call i64 @h_read_bits(%struct.HInputStream_* %8, i32 8, i8 signext 0)
-  LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits");
+  LLVMValueRef bits = LLVMBuildCall(ctxt->builder,
+      LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits");
   // %2 = trunc i64 %read_bits to i8
-  LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // do we actually need this?
+  LLVMValueRef r = LLVMBuildTrunc(ctxt->builder,
+      bits, LLVMInt8Type(), ""); // do we actually need this?
 
   // Check if h_read_bits succeeded
   // %"c == r" = icmp eq i8 -94, %2 ; the -94 comes from c_
   uint8_t c_ = (uint8_t)(uintptr_t)(env);
   LLVMValueRef c = LLVMConstInt(LLVMInt8Type(), c_, 0);
-  LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, c, r, "c == r");
+  LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntEQ, c, r, "c == r");
 
   // Branch so success or failure basic block, as appropriate
   // br i1 %"c == r", label %ch_success, label %ch_fail
-  LLVMBuildCondBr(builder, icmp, success, end);
+  LLVMBuildCondBr(ctxt->builder, icmp, success, end);
 
   // Basic block: success
-  LLVMPositionBuilderAtEnd(builder, success);
+  LLVMPositionBuilderAtEnd(ctxt->builder, success);
 
   /* Make a token */
   LLVMValueRef mr;
-  h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
+  h_llvm_make_tt_suint(ctxt, 8, 0, r, &mr);
 
   // br label %ch_end
-  LLVMBuildBr(builder, end);
+  LLVMBuildBr(ctxt->builder, end);
   
   // Basic block: end
-  LLVMPositionBuilderAtEnd(builder, end);
+  LLVMPositionBuilderAtEnd(ctxt->builder, end);
   // %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ]
-  LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv");
+  LLVMValueRef rv = LLVMBuildPhi(ctxt->builder, ctxt->llvm_parseresultptr, "rv");
   LLVMBasicBlockRef rv_phi_incoming_blocks[] = {
     success,
     entry
     };
   LLVMValueRef rv_phi_incoming_values[] = {
     mr,
-    LLVMConstNull(llvm_parseresultptr)
+    LLVMConstNull(ctxt->llvm_parseresultptr)
     };
   LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2);
   // ret %struct.HParseResult_.3* %rv
-  LLVMBuildRet(builder, rv);
+  LLVMBuildRet(ctxt->builder, rv);
   return true;
 }
 
+#endif /* defined(HAMMER_LLVM_BACKEND) */
+
 static const HParserVtable ch_vt = {
   .parse = parse_ch,
   .isValidRegular = h_true,
   .isValidCF = h_true,
   .desugar = desugar_ch,
   .compile_to_rvm = ch_ctrvm,
+#ifdef HAMMER_LLVM_BACKEND
   .llvm = ch_llvm,
+#endif
   .higher = false,
 };
 
diff --git a/src/parsers/charset.c b/src/parsers/charset.c
index 2f73da722408c1575eab883afd0242eef63eacd2..907fe927a79184c3c292a96630a49fc83fc0b1e3 100644
--- a/src/parsers/charset.c
+++ b/src/parsers/charset.c
@@ -1,12 +1,14 @@
 #include <assert.h>
 #include <string.h>
 #include "../internal.h"
+#ifdef HAMMER_LLVM_BACKEND
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wpedantic"
 #include <llvm-c/Core.h>
 #pragma GCC diagnostic pop
+#include "../backends/llvm/llvm.h"
+#endif /* defined(HAMMER_LLVM_BACKEND) */
 #include "parser_internal.h"
-#include "../llvm.h"
 
 static HParseResult* parse_charset(void *env, HParseState *state) {
   uint8_t in = h_read_bits(&state->input_stream, 8, false);
@@ -75,82 +77,89 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
   return true;
 }
 
-static bool cs_llvm(LLVMBuilderRef builder, LLVMValueRef func,
-                    LLVMModuleRef mod, void* env) {
+#ifdef HAMMER_LLVM_BACKEND
+
+static bool cs_llvm(HLLVMParserCompileContext *ctxt, void* env) {
   /*
    * LLVM to build a function to parse a charset; the args are a stream and an
    * arena.
    */
+  bool ok;
 
-  LLVMValueRef stream = LLVMGetFirstParam(func);
-  stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
-  LLVMValueRef arena = LLVMGetLastParam(func);
+  if (!ctxt) return false;
 
   /* Set up our basic blocks */
-  LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "cs_entry");
-  LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "cs_success");
-  LLVMBasicBlockRef fail = LLVMAppendBasicBlock(func, "cs_fail");
-  LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "cs_end");
+  LLVMBasicBlockRef entry = LLVMAppendBasicBlock(ctxt->func, "cs_entry");
+  LLVMBasicBlockRef success = LLVMAppendBasicBlock(ctxt->func, "cs_success");
+  LLVMBasicBlockRef fail = LLVMAppendBasicBlock(ctxt->func, "cs_fail");
+  LLVMBasicBlockRef end = LLVMAppendBasicBlock(ctxt->func, "cs_end");
 
   /* Basic block: entry */
-  LLVMPositionBuilderAtEnd(builder, entry);
+  LLVMBuildBr(ctxt->builder, entry);
+  LLVMPositionBuilderAtEnd(ctxt->builder, entry);
   /* First we read the char */
   LLVMValueRef bits_args[3];
-  bits_args[0] = stream;
+  bits_args[0] = ctxt->stream;
   bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0);
   bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0);
-  LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits");
-  LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm())
+  LLVMValueRef bits = LLVMBuildCall(ctxt->builder,
+      LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits");
+  LLVMValueRef r =
+    LLVMBuildTrunc(ctxt->builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm())
 
   /* We have a char, need to check if it's in the charset */
   HCharset cs = (HCharset)env;
   /* Branch to either success or end, conditional on whether r is in cs */
-  h_llvm_make_charset_membership_test(mod, func, builder, r, cs, success, fail);
+  ok = h_llvm_make_charset_membership_test(ctxt, r, cs, success, fail);
 
   /* Basic block: success */
-  LLVMPositionBuilderAtEnd(builder, success);
+  LLVMPositionBuilderAtEnd(ctxt->builder, success);
 
   LLVMValueRef mr;
-  h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
+  h_llvm_make_tt_suint(ctxt, 8, 0, r, &mr);
 
   /* br label %ch_end */
-  LLVMBuildBr(builder, end);
+  LLVMBuildBr(ctxt->builder, end);
 
   /* Basic block: fail */
-  LLVMPositionBuilderAtEnd(builder, fail);
+  LLVMPositionBuilderAtEnd(ctxt->builder, fail);
   /*
    * We just branch straight to end; this exists so that the phi node in 
    * end knows where all the incoming edges are from, rather than needing
    * some basic block constructed in h_llvm_make_charset_membership_test()
    */
-  LLVMBuildBr(builder, end);
+  LLVMBuildBr(ctxt->builder, end);
 
   /* Basic block: end */
-  LLVMPositionBuilderAtEnd(builder, end);
+  LLVMPositionBuilderAtEnd(ctxt->builder, end);
   // %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ]
-  LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv");
+  LLVMValueRef rv = LLVMBuildPhi(ctxt->builder, ctxt->llvm_parseresultptr, "rv");
   LLVMBasicBlockRef rv_phi_incoming_blocks[] = {
     success,
     fail
   };
   LLVMValueRef rv_phi_incoming_values[] = {
     mr,
-    LLVMConstNull(llvm_parseresultptr)
+    LLVMConstNull(ctxt->llvm_parseresultptr)
   };
   LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2);
   // ret %struct.HParseResult_.3* %rv
-  LLVMBuildRet(builder, rv);
+  LLVMBuildRet(ctxt->builder, rv);
 
-  return true;
+  return ok;
 }
 
+#endif /* defined(HAMMER_LLVM_BACKEND) */
+
 static const HParserVtable charset_vt = {
   .parse = parse_charset,
   .isValidRegular = h_true,
   .isValidCF = h_true,
   .desugar = desugar_charset,
   .compile_to_rvm = cs_ctrvm,
+#ifdef HAMMER_LLVM_BACKEND
   .llvm = cs_llvm,
+#endif
   .higher = false,
 };
 
diff --git a/src/t_parser.c b/src/t_parser.c
index f7c4baf7c0b59342949b3e5b0a5ce1d1b913ac2b..304362e49c61775cdef9edb8c459365695e5af5e 100644
--- a/src/t_parser.c
+++ b/src/t_parser.c
@@ -21,10 +21,19 @@ static void test_ch(gconstpointer backend) {
 }
 
 static void test_ch_range(gconstpointer backend) {
-  const HParser *range_ = h_ch_range('a', 'c');
-
-  g_check_parse_match(range_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
-  g_check_parse_failed(range_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
+  const HParser *range_1 = h_ch_range('a', 'c');
+  const HParser *range_2 = h_ch_range('a', 'z');
+  const HParser *range_3 = h_ch_range('A', 'z');
+  const HParser *range_all = h_ch_range(0, 255);
+
+  g_check_parse_match(range_1, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
+  g_check_parse_failed(range_1, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
+  g_check_parse_match(range_2, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
+  g_check_parse_failed(range_2, (HParserBackend)GPOINTER_TO_INT(backend), "C", 1);
+  g_check_parse_match(range_3, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42");
+  g_check_parse_failed(range_3, (HParserBackend)GPOINTER_TO_INT(backend), "2", 1);
+  /* range_all never fails anything */
+  g_check_parse_match(range_all, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42");
 }
 
 //@MARK_START
@@ -213,18 +222,71 @@ static void test_action(gconstpointer backend) {
 
 static void test_in(gconstpointer backend) {
   uint8_t options[3] = { 'a', 'b', 'c' };
+  uint8_t odds[128];
+  uint8_t _1_mod_4[64];
+  uint8_t scattered[3] = { 'A', 'b', 'z' };
+  int i;
+
   const HParser *in_ = h_in(options, 3);
   g_check_parse_match(in_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
   g_check_parse_failed(in_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
 
+  for (i = 0; i < 128; ++i) odds[i] = (uint8_t)(2*i + 1);
+  const HParser *odds_ = h_in(odds, 128);
+  g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1, "u0x63");
+  g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1, "u0x45");
+  g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
+  g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1);
+
+  for (i = 0; i < 64; ++i) _1_mod_4[i] = (uint8_t)(4*i + 1);
+  const HParser *_1_mod_4_ = h_in(_1_mod_4, 64);
+  g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
+  g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1, "u0x45");
+  g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1);
+  g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1);
+  g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1);
+
+  const HParser *scattered_ = h_in(scattered, 3);
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1, "u0x41");
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "z", 1, "u0x7a");
+  g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "y", 1);
+  g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1);
 }
 
 static void test_not_in(gconstpointer backend) {
   uint8_t options[3] = { 'a', 'b', 'c' };
+  uint8_t odds[128];
+  uint8_t _1_mod_4[64];
+  uint8_t scattered[3] = { 'A', 'b', 'z' };
+  int i;
+
   const HParser *not_in_ = h_not_in(options, 3);
   g_check_parse_match(not_in_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "u0x64");
   g_check_parse_failed(not_in_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1);
 
+  for (i = 0; i < 128; ++i) odds[i] = (uint8_t)(2*i + 1);
+  const HParser *odds_ = h_not_in(odds, 128);
+  g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "u0x64");
+  g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1, "u0x46");
+  g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1);
+  g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1);
+
+  for (i = 0; i < 64; ++i) _1_mod_4[i] = (uint8_t)(4*i + 1);
+  const HParser *_1_mod_4_ = h_not_in(_1_mod_4, 64);
+  g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62");
+  g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1, "u0x46");
+  g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "e", 1);
+  g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1);
+
+  const HParser *scattered_ = h_not_in(scattered, 3);
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42");
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61");
+  g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "y", 1, "u0x79");
+  g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1);
+  g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1);
+  g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "z", 1);
+
 }
 
 static void test_end_p(gconstpointer backend) {
@@ -962,6 +1024,18 @@ void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length);
   g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position);
 
+#ifdef HAMMER_LLVM_BACKEND
   g_test_add_data_func("/core/parser/llvm/ch", GINT_TO_POINTER(PB_LLVM), test_ch);
   g_test_add_data_func("/core/parser/llvm/ch_range", GINT_TO_POINTER(PB_LLVM), test_ch_range);
+  g_test_add_data_func("/core/parser/llvm/int64", GINT_TO_POINTER(PB_LLVM), test_int64);
+  g_test_add_data_func("/core/parser/llvm/int32", GINT_TO_POINTER(PB_LLVM), test_int32);
+  g_test_add_data_func("/core/parser/llvm/int16", GINT_TO_POINTER(PB_LLVM), test_int16);
+  g_test_add_data_func("/core/parser/llvm/int8", GINT_TO_POINTER(PB_LLVM), test_int8);
+  g_test_add_data_func("/core/parser/llvm/uint64", GINT_TO_POINTER(PB_LLVM), test_uint64);
+  g_test_add_data_func("/core/parser/llvm/uint32", GINT_TO_POINTER(PB_LLVM), test_uint32);
+  g_test_add_data_func("/core/parser/llvm/uint16", GINT_TO_POINTER(PB_LLVM), test_uint16);
+  g_test_add_data_func("/core/parser/llvm/uint8", GINT_TO_POINTER(PB_LLVM), test_uint8);
+  g_test_add_data_func("/core/parser/llvm/in", GINT_TO_POINTER(PB_LLVM), test_in);
+  g_test_add_data_func("/core/parser/llvm/not_in", GINT_TO_POINTER(PB_LLVM), test_not_in);
+#endif /* defined(HAMMER_LLVM_BACKEND) */
 }