diff --git a/.gitignore b/.gitignore index ed8fd180d2eeedc877681b2c8ea025f2fd3e8274..f14c6e9ee356cf57c2f0a01ac717a90318b3dee2 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ build/ *.pyc *.ll libhammer.pc +TestResult.xml diff --git a/.travis.yml b/.travis.yml index 01d0f512a906642fbcf016b3b285350c91a1b528..566fdbfc87108c23cad6411c47097d813d690a87 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,100 +8,176 @@ env: global: - LLVM_CONFIG=llvm-config-3.8 matrix: - - BINDINGS=none + - BINDINGS=none LLVM_BACKEND=0 + - BINDINGS=none LLVM_BACKEND=1 matrix: include: - compiler: gcc language: ruby rvm: ruby-2.2.5 - env: BINDINGS=ruby + env: BINDINGS=ruby LLVM_BACKEND=0 + - compiler: gcc + language: ruby + rvm: ruby-2.2.5 + env: BINDINGS=ruby LLVM_BACKEND=1 - compiler: clang language: ruby rvm: ruby-2.2.5 - env: BINDINGS=ruby CC=clang + env: BINDINGS=ruby CC=clang LLVM_BACKEND=0 + - compiler: clang + language: ruby + rvm: ruby-2.2.5 + env: BINDINGS=ruby CC=clang LLVM_BACKEND=1 + - compiler: gcc + language: ruby + rvm: ruby-2.3.1 + env: BINDINGS=ruby LLVM_BACKEND=0 - compiler: gcc language: ruby rvm: ruby-2.3.1 - env: BINDINGS=ruby + env: BINDINGS=ruby LLVM_BACKEND=1 - compiler: clang language: ruby rvm: ruby-2.3.1 - env: BINDINGS=ruby CC=clang + env: BINDINGS=ruby CC=clang LLVM_BACKEND=0 + - compiler: clang + language: ruby + rvm: ruby-2.3.1 + env: BINDINGS=ruby CC=clang LLVM_BACKEND=1 + - compiler: gcc + language: python + python: "2.7.10" + env: BINDINGS=python LLVM_BACKEND=0 - compiler: gcc language: python - python: "2.7" - env: BINDINGS=python + python: "2.7.10" + env: BINDINGS=python LLVM_BACKEND=1 - compiler: clang language: python - python: "2.7" - env: BINDINGS=python CC=clang + python: "2.7.10" + env: BINDINGS=python CC=clang LLVM_BACKEND=0 + - compiler: clang + language: python + python: "2.7.10" + env: BINDINGS=python CC=clang LLVM_BACKEND=1 - compiler: gcc language: perl perl: "5.18" - env: BINDINGS=perl + env: BINDINGS=perl LLVM_BACKEND=0 + - compiler: gcc + language: perl + perl: "5.18" + env: BINDINGS=perl LLVM_BACKEND=1 - compiler: clang language: perl perl: "5.18" - env: BINDINGS=perl CC=clang + env: BINDINGS=perl CC=clang LLVM_BACKEND=0 + - compiler: clang + language: perl + perl: "5.18" + env: BINDINGS=perl CC=clang LLVM_BACKEND=1 + - compiler: gcc + language: perl + perl: "5.14" + env: BINDINGS=perl LLVM_BACKEND=0 - compiler: gcc language: perl perl: "5.14" - env: BINDINGS=perl + env: BINDINGS=perl LLVM_BACKEND=1 + - compiler: clang + language: perl + perl: "5.14" + env: BINDINGS=perl CC=clang LLVM_BACKEND=0 - compiler: clang language: perl perl: "5.14" - env: BINDINGS=perl CC=clang + env: BINDINGS=perl CC=clang LLVM_BACKEND=1 + - compiler: gcc + language: perl + perl: "5.10" + env: BINDINGS=perl LLVM_BACKEND=0 - compiler: gcc language: perl perl: "5.10" - env: BINDINGS=perl + env: BINDINGS=perl LLVM_BACKEND=1 + - compiler: clang + language: perl + perl: "5.10" + env: BINDINGS=perl CC=clang LLVM_BACKEND=0 - compiler: clang language: perl perl: "5.10" - env: BINDINGS=perl CC=clang + env: BINDINGS=perl CC=clang LLVM_BACKEND=1 - compiler: gcc language: php php: "5.5" - env: BINDINGS=php + env: BINDINGS=php LLVM_BACKEND=0 + - compiler: gcc + language: php + php: "5.5" + env: BINDINGS=php LLVM_BACKEND=1 - compiler: clang language: php php: "5.5" - env: BINDINGS=php CC=clang + env: BINDINGS=php CC=clang LLVM_BACKEND=0 + - compiler: clang + language: php + php: "5.5" + env: BINDINGS=php CC=clang LLVM_BACKEND=1 - compiler: gcc language: php php: "5.4" - env: BINDINGS=php + env: BINDINGS=php LLVM_BACKEND=0 + - compiler: gcc + language: php + php: "5.4" + env: BINDINGS=php LLVM_BACKEND=1 - compiler: clang language: php php: "5.4" - env: BINDINGS=php CC=clang + env: BINDINGS=php CC=clang LLVM_BACKEND=0 + - compiler: clang + language: php + php: "5.4" + env: BINDINGS=php CC=clang LLVM_BACKEND=1 + - compiler: gcc + language: dotnet + env: BINDINGS=dotnet LLVM_BACKEND=0 - compiler: gcc language: dotnet - env: BINDINGS=dotnet + env: BINDINGS=dotnet LLVM_BACKEND=1 - compiler: clang language: dotnet - env: BINDINGS=dotnet CC=clang + env: BINDINGS=dotnet CC=clang LLVM_BACKEND=0 + - compiler: clang + language: dotnet + env: BINDINGS=dotnet CC=clang LLVM_BACKEND=1 - compiler: gcc language: cpp - env: BINDINGS=cpp + env: BINDINGS=cpp LLVM_BACKEND=0 - compiler: gcc language: cpp - env: BINDINGS=cpp CC=clang + env: BINDINGS=cpp LLVM_BACKEND=1 + - compiler: clang + language: cpp + env: BINDINGS=cpp CC=clang LLVM_BACKEND=0 + - compiler: clang + language: cpp + env: BINDINGS=cpp CC=clang LLVM_BACKEND=1 before_install: - sudo apt-get update -qq - - sudo apt-get install lcov + - sudo apt-get install -y lcov - gem install coveralls-lcov - if [ "$CC" == "gcc" ]; then sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y; sudo apt-get update -qq; sudo apt-get install gcc-5; fi - sudo apt-get install llvm-3.8 llvm-3.8-dev clang-3.8 - - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi - - if [ "$BINDINGS" == "perl" ]; then sudo apt-get install -t trusty-backports swig3.0; fi + - if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; sudo apt-get install -t trusty-backports swig3.0; fi - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi - if [ "$BINDINGS" == "dotnet" ]; then sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; fi install: true before_script: - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi script: - - if [ "$BINDINGS" == "none" ]; then scons test --variant=debug --coverage; else scons bindings=$BINDINGS test; fi + - if [ "$BINDINGS" == "none" ]; then if [ "$LLVM_BACKEND" == "1" ]; then LLVM_FLAG="--enable-llvm-backend"; else LLVM_FLAG="--disable-llvm-backend"; fi; scons test --variant=debug $LLVM_FLAG --coverage; else scons $LLVM_FLAG bindings=$BINDINGS test; fi after_success: - if [ "$BINDINGS" == "none" ]; then if [ "$CC" == "clang" ]; then llvm-cov gcov -o coverage.info build/debug/src/test_suite.gcda; else lcov --capture --directory build/debug/src --output-file coverage.info; fi; fi - coveralls-lcov coverage.info diff --git a/SConstruct b/SConstruct index 43ba87ea10730be4731e79ce412e06b944e0758c..8dc2a8dc7f32b84b5d18868273466424ddf74d43 100644 --- a/SConstruct +++ b/SConstruct @@ -46,7 +46,6 @@ if 'DESTDIR' in env: print >>sys.stderr, "--!!-- you want; files will be installed in" print >>sys.stderr, "--!!-- %s" % (calcInstallPath("$prefix"),) -env['LLVM_CONFIG'] = "llvm-config" if 'includedir' in env: env['incpath'] = calcInstallPath("$includedir", "hammer") else: @@ -62,7 +61,7 @@ else: env['parsersincpath'] = calcInstallPath("$includedir", "hammer", "parsers") env['backendsincpath'] = calcInstallPath("$includedir", "hammer", "backends") -env.MergeFlags("-std=gnu11 -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wall -Wextra -Werror") +env.MergeFlags("-std=gnu11 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable") if env['PLATFORM'] == 'darwin': env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}') @@ -91,6 +90,16 @@ AddOption("--in-place", action="store_true", help="Build in-place, rather than in the build/<variant> tree") +AddOption("--disable-llvm-backend", + dest="use_llvm", + default=False, + action="store_false", + help="Disable the LLVM backend (and don't require LLVM library dependencies)") +AddOption("--enable-llvm-backend", + dest="use_llvm", + default=False, + action="store_true", + help="Enable the LLVM backend (and require LLVM library dependencies)") dbg = env.Clone(VARIANT='debug') dbg.MergeFlags("-g -O0") @@ -105,7 +114,12 @@ else: env["CC"] = os.getenv("CC") or env["CC"] env["CXX"] = os.getenv("CXX") or env["CXX"] -env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"] + +if GetOption("use_llvm"): + # Overridable default path to llvm-config + env['LLVM_CONFIG'] = "llvm-config" + env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"] + env.MergeFlags("-DHAMMER_LLVM_BACKEND") if GetOption("coverage"): env.Append(CFLAGS=["--coverage"], @@ -114,8 +128,11 @@ if GetOption("coverage"): if env["CC"] == "gcc": env.Append(LIBS=['gcov']) else: - env.ParseConfig('%s --cflags --ldflags --libs core executionengine mcjit analysis x86codegen x86info' % \ - env["LLVM_CONFIG"]) + # XXX Why do need this with --coverage when we're doing it anyway? + if GetOption("use_llvm"): + env.ParseConfig('%s --cflags --ldflags --libs core executionengine mcjit analysis x86codegen x86info' % \ + env["LLVM_CONFIG"]) + if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin': env.Replace(CC="clang", @@ -126,113 +143,122 @@ env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) #rootpath = env['ROOTPATH'] = os.path.abspath('.') #env.Append(CPPPATH=os.path.join('#', "hammer")) +if GetOption("use_llvm"): # Set up LLVM config stuff to export # some llvm versions are old and will not work; some require --system-libs # with llvm-config, and some will break if given it -llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \ - shell=True, \ - stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() -if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"): - print "This LLVM version %s is too old" % llvm_config_version - Exit(1) - -if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \ - LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"): - llvm_system_libs_flag = "--system-libs" -else: - llvm_system_libs_flag = "" - -# Only keep one copy of this -llvm_required_components = "core executionengine mcjit analysis x86codegen x86info" -# Stubbing this out so we can implement static-only mode if needed later -llvm_use_shared = True -# Can we ask for shared/static from llvm-config? -if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"): - # Nope - llvm_linkage_type_flag = "" - llvm_use_computed_shared_lib_name = True -else: - # Woo, they finally fixed the dumb - llvm_use_computed_shared_lib_name = False - if llvm_use_shared: - llvm_linkage_type_flag = "--link-shared" + llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \ + shell=True, \ + stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() + if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"): + print "This LLVM version %s is too old" % llvm_config_version[0].strip() + Exit(1) + + if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \ + LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"): + llvm_system_libs_flag = "--system-libs" else: - llvm_linkage_type_flag = "--link-static" - -if llvm_use_computed_shared_lib_name: - # Okay, pull out the major and minor version numbers (barf barf) - p = re.compile("^(\d+)\.(\d+).*$") - m = p.match(llvm_config_version[0]) - if m: - llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2))) + llvm_system_libs_flag = "" + + # Only keep one copy of this + llvm_required_components = "core executionengine mcjit analysis x86codegen x86info" + # Stubbing this out so we can implement static-only mode if needed later + llvm_use_shared = True + # Can we ask for shared/static from llvm-config? + if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"): + # Nope + llvm_linkage_type_flag = "" + llvm_use_computed_shared_lib_name = True else: - print "Couldn't compute shared library name from LLVM version '%s', but needed to" % \ - llvm_config_version[0] - Exit(1) -else: - # We won't be needing it - llvm_computed_shared_lib_name = None - -# llvm-config 'helpfully' supplies -g and -O flags; educate it with this -# custom ParseConfig function arg; make it a class with a method so we can -# pass it around with scons export/import - -class LLVMConfigSanitizer: - def sanitize(self, env, cmd, unique=1): - # cmd is output from llvm-config - flags = cmd.split() - # match -g or -O flags - p = re.compile("^-[gO].*$") - filtered_flags = [flag for flag in flags if not p.match(flag)] - filtered_cmd = ' '.join(filtered_flags) - # print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd) - env.MergeFlags(filtered_cmd, unique) -llvm_config_sanitizer = LLVMConfigSanitizer() - -# LLVM defines, which the python bindings need -try: - llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \ - shell=True, \ - stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() - flags = llvm_config_cflags[0].split() - # get just the -D ones - p = re.compile("^-D(.*)$") - llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)] -except: - print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"] - Exit(1) - -# Get the llvm includedir, which the python bindings need -try: - llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \ - shell=True, \ - stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() - llvm_includes = llvm_config_includes[0].splitlines() -except: - print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"] - Exit(1) - -# This goes here so we already know all the LLVM crap -# Make a fresh environment to parse the config into, to read out just LLVM stuff -llvm_dummy_env = Environment() -# Get LLVM stuff into LIBS/LDFLAGS -llvm_dummy_env.ParseConfig('%s --ldflags %s %s %s' % \ - (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, \ - llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -# Get the right -l lines in -if llvm_use_shared: + # Woo, they finally fixed the dumb + llvm_use_computed_shared_lib_name = False + if llvm_use_shared: + llvm_linkage_type_flag = "--link-shared" + else: + llvm_linkage_type_flag = "--link-static" + if llvm_use_computed_shared_lib_name: - llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ]) + # Okay, pull out the major and minor version numbers (barf barf) + p = re.compile("^(\d+)\.(\d+).*$") + m = p.match(llvm_config_version[0]) + if m: + llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2))) + else: + print "Couldn't compute shared library name from LLVM version '%s', but needed to" % \ + llvm_config_version[0] + Exit(1) else: - llvm_dummy_env.ParseConfig('%s %s --libs %s' % \ - (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -llvm_dummy_env.Append(LIBS=['stdc++', ], ) + # We won't be needing it + llvm_computed_shared_lib_name = None + + # llvm-config 'helpfully' supplies -g and -O flags; educate it with this + # custom ParseConfig function arg; make it a class with a method so we can + # pass it around with scons export/import + + class LLVMConfigSanitizer: + def sanitize(self, env, cmd, unique=1): + # cmd is output from llvm-config + flags = cmd.split() + # match -g or -O flags + p = re.compile("^-[gO].*$") + filtered_flags = [flag for flag in flags if not p.match(flag)] + filtered_cmd = ' '.join(filtered_flags) + # print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd) + env.MergeFlags(filtered_cmd, unique) + llvm_config_sanitizer = LLVMConfigSanitizer() + + # LLVM defines, which the python bindings need + try: + llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \ + shell=True, \ + stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() + flags = llvm_config_cflags[0].split() + # get just the -D ones + p = re.compile("^-D(.*)$") + llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)] + except: + print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"] + Exit(1) + + # Get the llvm includedir, which the python bindings need + try: + llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \ + shell=True, \ + stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate() + llvm_includes = llvm_config_includes[0].splitlines() + except: + print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"] + Exit(1) + + # This goes here so we already know all the LLVM crap + # Make a fresh environment to parse the config into, to read out just LLVM stuff + llvm_dummy_env = Environment() + # Get LLVM stuff into LIBS/LDFLAGS + llvm_dummy_env.ParseConfig('%s --ldflags %s %s %s' % \ + (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, \ + llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) + # Get the right -l lines in + if llvm_use_shared: + if llvm_use_computed_shared_lib_name: + llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ]) + else: + llvm_dummy_env.ParseConfig('%s %s --libs %s' % \ + (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) + llvm_dummy_env.Append(LIBS=['stdc++', ], ) +#endif GetOption("use_llvm") + +# The .pc.in file has substs for llvm_lib_flags and llvm_libdir_flags, so if +# we aren't using LLVM, set them to the empty string +if GetOption("use_llvm"): + env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS') + env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS') +else: + env['llvm_libdir_flags'] = "" + env['llvm_lib_flags'] = "" -env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS') -env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS') pkgconfig = env.ScanReplace('libhammer.pc.in') Default(pkgconfig) env.Install("$pkgconfigpath", pkgconfig) @@ -249,16 +275,17 @@ Export('env') Export('testruns') Export('targets') # LLVM-related flags -Export('llvm_computed_shared_lib_name') -Export('llvm_config_sanitizer') -Export('llvm_config_version') -Export('llvm_defines') -Export('llvm_includes') -Export('llvm_linkage_type_flag') -Export('llvm_required_components') -Export('llvm_system_libs_flag') -Export('llvm_use_computed_shared_lib_name') -Export('llvm_use_shared') +if GetOption("use_llvm"): + Export('llvm_computed_shared_lib_name') + Export('llvm_config_sanitizer') + Export('llvm_config_version') + Export('llvm_defines') + Export('llvm_includes') + Export('llvm_linkage_type_flag') + Export('llvm_required_components') + Export('llvm_system_libs_flag') + Export('llvm_use_computed_shared_lib_name') + Export('llvm_use_shared') if not GetOption("in_place"): env['BUILD_BASE'] = 'build/$VARIANT' diff --git a/src/SConscript b/src/SConscript index 80d96bf45c85822f888c896dfa479417ae298edc..9b89730d9b7015dad78e40f790a414cdab00fb44 100644 --- a/src/SConscript +++ b/src/SConscript @@ -6,14 +6,15 @@ import subprocess Import('env testruns') # LLVM-related flags -Import('llvm_computed_shared_lib_name') -Import('llvm_config_sanitizer') -Import('llvm_config_version') -Import('llvm_linkage_type_flag') -Import('llvm_required_components') -Import('llvm_system_libs_flag') -Import('llvm_use_computed_shared_lib_name') -Import('llvm_use_shared') +if GetOption("use_llvm"): + Import('llvm_computed_shared_lib_name') + Import('llvm_config_sanitizer') + Import('llvm_config_version') + Import('llvm_linkage_type_flag') + Import('llvm_required_components') + Import('llvm_system_libs_flag') + Import('llvm_use_computed_shared_lib_name') + Import('llvm_use_shared') dist_headers = [ "hammer.h", @@ -31,6 +32,7 @@ parsers_headers = [ backends_headers = [ "backends/regex.h", "backends/contextfree.h" + "backends/missing.h" ] parsers = ['parsers/%s.c'%s for s in @@ -61,10 +63,15 @@ parsers = ['parsers/%s.c'%s for s in 'unimplemented', 'whitespace', 'xor', - 'value']] + 'value']] backends = ['backends/%s.c' % s for s in - ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'llvm']] + ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', "missing"]] + +# Add LLVM backend if enabled +if GetOption("use_llvm"): + llvm_backend_files = ['llvm.c', 'llvm_charset.c', 'llvm_suint.c'] + backends = backends + ['backends/llvm/%s' % s for s in llvm_backend_files] misc_hammer_parts = [ 'allocator.c', @@ -89,29 +96,33 @@ ctests = ['t_benchmark.c', 't_misc.c', 't_regression.c'] -env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize) +if GetOption("use_llvm"): + env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) # Use a cloned env for the shared library so we can have library dependencies shared_env = env.Clone() -# Get LLVM stuff into LIBS/LDFLAGS -shared_env.ParseConfig('%s --ldflags %s %s %s' % \ - (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -# Get the right -l lines in -if llvm_use_shared: - if llvm_use_computed_shared_lib_name: - shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ]) +if GetOption("use_llvm"): + # Get LLVM stuff into LIBS/LDFLAGS + shared_env.ParseConfig('%s --ldflags %s %s %s' % \ + (env["LLVM_CONFIG"], llvm_system_libs_flag, \ + llvm_linkage_type_flag, llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) + # Get the right -l lines in + if llvm_use_shared: + if llvm_use_computed_shared_lib_name: + shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ]) + else: + shared_env.ParseConfig('%s %s --libs %s' % \ + (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) else: + # Just grab the statics regardless of version shared_env.ParseConfig('%s %s --libs %s' % \ (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ function=llvm_config_sanitizer.sanitize) -else: - # Just grab the statics regardless of version - shared_env.ParseConfig('%s %s --libs %s' % \ - (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.']) + shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.']) + libhammer_shared = shared_env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) Default(libhammer_shared, libhammer_static) @@ -124,24 +135,27 @@ env.Install("$backendsincpath", backends_headers) testenv = env.Clone() testenv.Append(LIBS=['hammer'], LIBPATH=['.']) testenv.ParseConfig('pkg-config --cflags --libs glib-2.0') -# Get LLVM stuff into LIBS/LDFLAGS -testenv.ParseConfig('%s --ldflags %s %s %s' % \ - (env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -# Get the right -l lines in -if llvm_use_shared: - if llvm_use_computed_shared_lib_name: - testenv.Append(LIBS=[llvm_computed_shared_lib_name, ]) +if GetOption("use_llvm"): + # Get LLVM stuff into LIBS/LDFLAGS + testenv.ParseConfig('%s --ldflags %s %s %s' % \ + (env["LLVM_CONFIG"], llvm_system_libs_flag, \ + llvm_linkage_type_flag, llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) + # Get the right -l lines in + if llvm_use_shared: + if llvm_use_computed_shared_lib_name: + testenv.Append(LIBS=[llvm_computed_shared_lib_name, ]) + else: + testenv.ParseConfig('%s %s --libs %s' % \ + (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ + function=llvm_config_sanitizer.sanitize) else: + # Just grab the statics regardless of version testenv.ParseConfig('%s %s --libs %s' % \ (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ function=llvm_config_sanitizer.sanitize) -else: - # Just grab the statics regardless of version - testenv.ParseConfig('%s %s --libs %s' % \ - (env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \ - function=llvm_config_sanitizer.sanitize) -testenv.Append(LIBS=['stdc++'], LIBPATH=['.']) + testenv.Append(LIBS=['stdc++'], LIBPATH=['.']) + ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS="--coverage" if testenv.GetOption("coverage") else None) ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path])) AlwaysBuild(ctest) diff --git a/src/backends/llvm.c b/src/backends/llvm.c deleted file mode 100644 index 79f91eafab27dac54b544601363e39e2c4567276..0000000000000000000000000000000000000000 --- a/src/backends/llvm.c +++ /dev/null @@ -1,317 +0,0 @@ -#include <llvm-c/Analysis.h> -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpedantic" -#include <llvm-c/Core.h> -#pragma GCC diagnostic pop -#include <llvm-c/ExecutionEngine.h> -#include "../internal.h" -#include "../llvm.h" - -typedef struct HLLVMParser_ { - LLVMModuleRef mod; - LLVMValueRef func; - LLVMExecutionEngineRef engine; - LLVMBuilderRef builder; -} HLLVMParser; - -HParseResult* make_result(HArena *arena, HParsedToken *tok) { - HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); - ret->ast = tok; - ret->arena = arena; - ret->bit_length = 0; // This way it gets overridden in h_do_parse - return ret; -} - -void h_llvm_declare_common(LLVMModuleRef mod) { - llvm_inputstream = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HInputStream_"); - LLVMTypeRef llvm_inputstream_struct_types[] = { - LLVMPointerType(LLVMInt8Type(), 0), - LLVMInt64Type(), - LLVMInt64Type(), - LLVMInt64Type(), - LLVMInt8Type(), - LLVMInt8Type(), - LLVMInt8Type(), - LLVMInt8Type(), - LLVMInt8Type() - }; - LLVMStructSetBody(llvm_inputstream, llvm_inputstream_struct_types, 9, 0); - llvm_inputstreamptr = LLVMPointerType(llvm_inputstream, 0); - llvm_arena = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HArena_"); - llvm_arenaptr = LLVMPointerType(llvm_arena, 0); - llvm_parsedtoken = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParsedToken_"); - LLVMTypeRef llvm_parsedtoken_struct_types[] = { - LLVMInt32Type(), // actually an enum value - LLVMInt64Type(), // actually this is a union; the largest thing in it is 64 bits - LLVMInt64Type(), // FIXME sizeof(size_t) will be 32 bits on 32-bit platforms - LLVMInt64Type(), // FIXME ditto - LLVMInt8Type() - }; - LLVMStructSetBody(llvm_parsedtoken, llvm_parsedtoken_struct_types, 5, 0); - llvm_parsedtokenptr = LLVMPointerType(llvm_parsedtoken, 0); - llvm_parseresult = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParseResult_"); - LLVMTypeRef llvm_parseresult_struct_types[] = { - llvm_parsedtokenptr, - LLVMInt64Type(), - llvm_arenaptr - }; - LLVMStructSetBody(llvm_parseresult, llvm_parseresult_struct_types, 3, 0); - llvm_parseresultptr = LLVMPointerType(llvm_parseresult, 0); - LLVMTypeRef readbits_pt[] = { - llvm_inputstreamptr, - LLVMInt32Type(), - LLVMInt8Type() - }; - LLVMTypeRef readbits_ret = LLVMFunctionType(LLVMInt64Type(), readbits_pt, 3, 0); - LLVMAddFunction(mod, "h_read_bits", readbits_ret); - - LLVMTypeRef amalloc_pt[] = { - llvm_arenaptr, - LLVMInt32Type() - }; - LLVMTypeRef amalloc_ret = LLVMFunctionType(LLVMPointerType(LLVMVoidType(), 0), amalloc_pt, 2, 0); - LLVMAddFunction(mod, "h_arena_malloc", amalloc_ret); - - LLVMTypeRef makeresult_pt[] = { - llvm_arenaptr, - llvm_parsedtokenptr - }; - LLVMTypeRef makeresult_ret = LLVMFunctionType(llvm_parseresultptr, makeresult_pt, 2, 0); - LLVMAddFunction(mod, "make_result", makeresult_ret); -} - -int h_llvm_compile(HAllocator* mm__, HParser* parser, const void* params) { - // Boilerplate to set up a translation unit, aka a module. - const char* name = params ? (const char*)params : "parse"; - LLVMModuleRef mod = LLVMModuleCreateWithName(name); - h_llvm_declare_common(mod); - // Boilerplate to set up the parser function to add to the module. It takes an HInputStream* and - // returns an HParseResult. - LLVMTypeRef param_types[] = { - llvm_inputstreamptr, - llvm_arenaptr - }; - LLVMTypeRef ret_type = LLVMFunctionType(llvm_parseresultptr, param_types, 2, 0); - LLVMValueRef parse_func = LLVMAddFunction(mod, name, ret_type); - // Parse function is now declared; time to define it - LLVMBuilderRef builder = LLVMCreateBuilder(); - // Translate the contents of the children of `parser` into their LLVM instruction equivalents - if (parser->vtable->llvm(builder, parse_func, mod, parser->env)) { - // But first, verification - char *error = NULL; - LLVMVerifyModule(mod, LLVMAbortProcessAction, &error); - LLVMDisposeMessage(error); - error = NULL; - // OK, link that sonofabitch - LLVMLinkInMCJIT(); - LLVMInitializeNativeTarget(); - LLVMInitializeNativeAsmPrinter(); - LLVMExecutionEngineRef engine = NULL; - LLVMCreateExecutionEngineForModule(&engine, mod, &error); - if (error) { - fprintf(stderr, "error: %s\n", error); - LLVMDisposeMessage(error); - return -1; - } - char* dump = LLVMPrintModuleToString(mod); - fprintf(stderr, "\n\n%s\n\n", dump); - // Package up the pointers that comprise the module and stash it in the original HParser - HLLVMParser *llvm_parser = h_new(HLLVMParser, 1); - llvm_parser->mod = mod; - llvm_parser->func = parse_func; - llvm_parser->engine = engine; - llvm_parser->builder = builder; - parser->backend_data = llvm_parser; - return 0; - } else { - return -1; - } -} - -void h_llvm_free(HParser *parser) { - HLLVMParser *llvm_parser = parser->backend_data; - LLVMModuleRef mod_out; - char *err_out; - - llvm_parser->func = NULL; - LLVMRemoveModule(llvm_parser->engine, llvm_parser->mod, &mod_out, &err_out); - LLVMDisposeExecutionEngine(llvm_parser->engine); - llvm_parser->engine = NULL; - - LLVMDisposeBuilder(llvm_parser->builder); - llvm_parser->builder = NULL; - - LLVMDisposeModule(llvm_parser->mod); - llvm_parser->mod = NULL; -} - -/* - * Construct LLVM IR to decide if a runtime value is a member of a compile-time - * character set, and branch depending on the result. - * - * Parameters: - * - mod [in]: an LLVMModuleRef - * - func [in]: an LLVMValueRef to the function to add the new basic blocks - * - builder [in]: an LLVMBuilderRef, positioned appropriately - * - r [in]: an LLVMValueRef to the value to test - * - cs [in]: the HCharset to test membership in - * - yes [in]: the basic block to branch to if r is in cs - * - no [in]: the basic block to branch to if r is not in cs - */ - -void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, - LLVMValueRef r, HCharset cs, - LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { - /* - * A charset is a 256-element bit array, 32 bytes long in total. Ours is - * static at compile time, so we can try to construct minimal LLVM IR for - * this particular charset. In particular, we should handle cases like - * only one or two bits being set, or a long consecutive range, efficiently. - * - * In LLVM IR, we can test propositions like r == x, r <= x, r >= x and their - * negations efficiently, so the challenge here is to turn a character map - * into a minimal set of such propositions. - * - * TODO: actually do this; right now for the sake of a first pass we're just - * testing r == x for every x in cs. - */ - - for (int i = 0; i < 256; ++i) { - if (charset_isset(cs, i)) { - char bbname[16]; - uint8_t c = (uint8_t)i; - snprintf(bbname, 16, "cs_memb_%02x", c); - LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, - LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r"); - LLVMBasicBlockRef bb = LLVMAppendBasicBlock(func, bbname); - LLVMBuildCondBr(builder, icmp, yes, bb); - LLVMPositionBuilderAtEnd(builder, bb); - } - } - - LLVMBuildBr(builder, no); -} - -/* - * Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT - * - * Parameters: - * - mod [in]: an LLVMModuleRef - * - builder [in]: an LLVMBuilderRef, positioned appropriately - * - stream [in]: a value ref to an llvm_inputstreamptr, for the input stream - * - arena [in]: a value ref to an llvm_arenaptr to be used for the malloc - * - r [in]: a value ref to the value to be used to this token - * - mr_out [out]: the return value from make_result() - * - * TODO actually support TT_SINT, inputs other than 8 bit - */ - -void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder, - LLVMValueRef stream, LLVMValueRef arena, - LLVMValueRef r, LLVMValueRef *mr_out) { - /* Set up call to h_arena_malloc() for a new HParsedToken */ - LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0); - LLVMValueRef amalloc_args[] = { arena, tok_size }; - /* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */ - LLVMValueRef amalloc = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_arena_malloc"), - amalloc_args, 2, "h_arena_malloc"); - /* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */ - LLVMValueRef tok = LLVMBuildBitCast(builder, amalloc, llvm_parsedtokenptr, "tok"); - - /* - * tok->token_type = TT_UINT; - * - * %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0 - * - * TODO if we handle TT_SINT too, adjust here and the zero-ext below - */ - LLVMValueRef toktype = LLVMBuildStructGEP(builder, tok, 0, "token_type"); - /* store i32 8, i32* %token_type */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), 8, 0), toktype); - - /* - * tok->uint = r; - * - * %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1 - */ - LLVMValueRef tokdata = LLVMBuildStructGEP(builder, tok, 1, "token_data"); - /* - * TODO - * - * This is where we'll need to adjust to handle other types (sign vs. zero extend, omit extend if - * r is 64-bit already - */ - LLVMBuildStore(builder, LLVMBuildZExt(builder, r, LLVMInt64Type(), "r"), tokdata); - /* - * Store the index from the stream into the token - */ - /* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */ - LLVMValueRef tokindex = LLVMBuildStructGEP(builder, tok, 2, "t_index"); - /* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */ - LLVMValueRef streamindex = LLVMBuildStructGEP(builder, stream, 2, "s_index"); - /* %4 = load i64, i64* %s_index */ - /* store i64 %4, i64* %t_index */ - LLVMBuildStore(builder, LLVMBuildLoad(builder, streamindex, ""), tokindex); - /* Store the bit length into the token */ - LLVMValueRef tokbitlen = LLVMBuildStructGEP(builder, tok, 3, "bit_length"); - /* TODO handle multiple bit lengths */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt64Type(), 8, 0), tokbitlen); - - /* - * Now call make_result() - * - * %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3) - */ - LLVMValueRef result_args[] = { arena, tok }; - LLVMValueRef mr = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "make_result"), - result_args, 2, "make_result"); - - *mr_out = mr; -} - -HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { - const HLLVMParser *llvm_parser = parser->backend_data; - HArena *arena = h_new_arena(mm__, 0); - - // LLVMRunFunction only supports certain signatures for dumb reasons; it's this hack with - // memcpy and function pointers, or writing a shim in LLVM IR. - // - // LLVMGenericValueRef args[] = { - // LLVMCreateGenericValueOfPointer(input_stream), - // LLVMCreateGenericValueOfPointer(arena) - // }; - // LLVMGenericValueRef res = LLVMRunFunction(llvm_parser->engine, llvm_parser->func, 2, args); - // HParseResult *ret = (HParseResult*)LLVMGenericValueToPointer(res); - - void *parse_func_ptr_v; - HParseResult * (*parse_func_ptr)(HInputStream *input_stream, HArena *arena); - parse_func_ptr_v = LLVMGetPointerToGlobal(llvm_parser->engine, llvm_parser->func); - memcpy(&parse_func_ptr, &parse_func_ptr_v, sizeof(parse_func_ptr)); - HParseResult *ret = parse_func_ptr(input_stream, arena); - if (ret) { - ret->arena = arena; - if (!input_stream->overrun) { - size_t bit_length = h_input_stream_pos(input_stream); - if (ret->bit_length == 0) { - ret->bit_length = bit_length; - } - if (ret->ast && ret->ast->bit_length != 0) { - ((HParsedToken*)(ret->ast))->bit_length = bit_length; - } - } else { - ret->bit_length = 0; - } - } else { - ret = NULL; - } - if (input_stream->overrun) { - return NULL; // overrun is always failure. - } - return ret; -} - -HParserBackendVTable h__llvm_backend_vtable = { - .compile = h_llvm_compile, - .parse = h_llvm_parse, - .free = h_llvm_free -}; diff --git a/src/backends/llvm/llvm.c b/src/backends/llvm/llvm.c new file mode 100644 index 0000000000000000000000000000000000000000..185367a2e166920ada40a3e629ab72a38b747261 --- /dev/null +++ b/src/backends/llvm/llvm.c @@ -0,0 +1,229 @@ +#ifdef HAMMER_LLVM_BACKEND + +#include <llvm-c/Analysis.h> +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#include <llvm-c/Core.h> +#pragma GCC diagnostic pop +#include <llvm-c/ExecutionEngine.h> +#include "../../internal.h" +#include "llvm.h" + +typedef struct HLLVMParser_ { + LLVMModuleRef mod; + LLVMValueRef func; + LLVMExecutionEngineRef engine; + LLVMBuilderRef builder; + HLLVMParserCompileContext *compile_ctxt; +} HLLVMParser; + +HParseResult* make_result(HArena *arena, HParsedToken *tok) { + HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); + ret->ast = tok; + ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse + return ret; +} + +void h_llvm_declare_common(HLLVMParserCompileContext *ctxt) { + ctxt->llvm_inputstream = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HInputStream_"); + LLVMTypeRef llvm_inputstream_struct_types[] = { + LLVMPointerType(LLVMInt8Type(), 0), + LLVMInt64Type(), + LLVMInt64Type(), + LLVMInt64Type(), + LLVMInt8Type(), + LLVMInt8Type(), + LLVMInt8Type(), + LLVMInt8Type(), + LLVMInt8Type() + }; + LLVMStructSetBody(ctxt->llvm_inputstream, llvm_inputstream_struct_types, 9, 0); + ctxt->llvm_inputstreamptr = LLVMPointerType(ctxt->llvm_inputstream, 0); + ctxt->llvm_arena = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HArena_"); + ctxt->llvm_arenaptr = LLVMPointerType(ctxt->llvm_arena, 0); + ctxt->llvm_parsedtoken = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParsedToken_"); + LLVMTypeRef llvm_parsedtoken_struct_types[] = { + LLVMInt32Type(), // actually an enum value + LLVMInt64Type(), // actually this is a union; the largest thing in it is 64 bits + LLVMInt64Type(), // FIXME sizeof(size_t) will be 32 bits on 32-bit platforms + LLVMInt64Type(), // FIXME ditto + LLVMInt8Type() + }; + LLVMStructSetBody(ctxt->llvm_parsedtoken, llvm_parsedtoken_struct_types, 5, 0); + ctxt->llvm_parsedtokenptr = LLVMPointerType(ctxt->llvm_parsedtoken, 0); + ctxt->llvm_parseresult = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParseResult_"); + LLVMTypeRef llvm_parseresult_struct_types[] = { + ctxt->llvm_parsedtokenptr, + LLVMInt64Type(), + ctxt->llvm_arenaptr + }; + LLVMStructSetBody(ctxt->llvm_parseresult, llvm_parseresult_struct_types, 3, 0); + ctxt->llvm_parseresultptr = LLVMPointerType(ctxt->llvm_parseresult, 0); + LLVMTypeRef readbits_pt[] = { + ctxt->llvm_inputstreamptr, + LLVMInt32Type(), + LLVMInt8Type() + }; + LLVMTypeRef readbits_ret = LLVMFunctionType(LLVMInt64Type(), readbits_pt, 3, 0); + LLVMAddFunction(ctxt->mod, "h_read_bits", readbits_ret); + + LLVMTypeRef amalloc_pt[] = { + ctxt->llvm_arenaptr, + LLVMInt32Type() + }; + LLVMTypeRef amalloc_ret = LLVMFunctionType(LLVMPointerType(LLVMVoidType(), 0), amalloc_pt, 2, 0); + LLVMAddFunction(ctxt->mod, "h_arena_malloc", amalloc_ret); + + LLVMTypeRef makeresult_pt[] = { + ctxt->llvm_arenaptr, + ctxt->llvm_parsedtokenptr + }; + LLVMTypeRef makeresult_ret = LLVMFunctionType(ctxt->llvm_parseresultptr, makeresult_pt, 2, 0); + LLVMAddFunction(ctxt->mod, "make_result", makeresult_ret); +} + +int h_llvm_compile(HAllocator* mm__, HParser* parser, const void* params) { + HLLVMParserCompileContext *ctxt; + // Boilerplate to set up a translation unit, aka a module. + const char* name = params ? (const char*)params : "parse"; + + /* Build a parser compilation context */ + ctxt = h_new(HLLVMParserCompileContext, 1); + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->mm__ = mm__; + ctxt->mod = LLVMModuleCreateWithName(name); + h_llvm_declare_common(ctxt); + + // Boilerplate to set up the parser function to add to the module. It takes an HInputStream* and + // returns an HParseResult. + LLVMTypeRef param_types[] = { + ctxt->llvm_inputstreamptr, + ctxt->llvm_arenaptr + }; + LLVMTypeRef ret_type = LLVMFunctionType(ctxt->llvm_parseresultptr, param_types, 2, 0); + ctxt->func = LLVMAddFunction(ctxt->mod, name, ret_type); + + // Parse function is now declared; time to define it + ctxt->builder = LLVMCreateBuilder(); + LLVMBasicBlockRef preamble = LLVMAppendBasicBlock(ctxt->func, "preamble"); + LLVMPositionBuilderAtEnd(ctxt->builder, preamble); + + /* + * First thing it needs to do is get its stream and arena args and stick + * value refs in the context. + * + * XXX do we always need arena? Can we make a dummy valueref the generated + * IR refers to, and then fill in arena if we need it after we know whether + * we need it? Similar concerns apply to setting up storage needed for, e.g. + * memoizing charsets. + */ + ctxt->stream = LLVMBuildBitCast(ctxt->builder, LLVMGetFirstParam(ctxt->func), + ctxt->llvm_inputstreamptr, "stream"); + ctxt->arena = LLVMGetLastParam(ctxt->func); + + // Translate the contents of the children of `parser` into their LLVM instruction equivalents + if (parser->vtable->llvm(ctxt, parser->env)) { + // But first, verification + char *error = NULL; + LLVMVerifyModule(ctxt->mod, LLVMAbortProcessAction, &error); + LLVMDisposeMessage(error); + error = NULL; + // OK, link that sonofabitch + LLVMLinkInMCJIT(); + LLVMInitializeNativeTarget(); + LLVMInitializeNativeAsmPrinter(); + LLVMExecutionEngineRef engine = NULL; + LLVMCreateExecutionEngineForModule(&engine, ctxt->mod, &error); + if (error) { + fprintf(stderr, "error: %s\n", error); + LLVMDisposeMessage(error); + return -1; + } + char* dump = LLVMPrintModuleToString(ctxt->mod); + fprintf(stderr, "\n\n%s\n\n", dump); + // Package up the pointers that comprise the module and stash it in the original HParser + HLLVMParser *llvm_parser = h_new(HLLVMParser, 1); + llvm_parser->mod = ctxt->mod; + llvm_parser->func = ctxt->func; + llvm_parser->engine = engine; + llvm_parser->builder = ctxt->builder; + llvm_parser->compile_ctxt = ctxt; + parser->backend_data = llvm_parser; + return 0; + } else { + return -1; + } +} + +void h_llvm_free(HParser *parser) { + HAllocator *mm__; + HLLVMParser *llvm_parser = parser->backend_data; + LLVMModuleRef mod_out; + char *err_out; + + mm__ = llvm_parser->compile_ctxt->mm__; + h_free(llvm_parser->compile_ctxt); + llvm_parser->compile_ctxt = NULL; + mm__ = NULL; + + llvm_parser->func = NULL; + LLVMRemoveModule(llvm_parser->engine, llvm_parser->mod, &mod_out, &err_out); + LLVMDisposeExecutionEngine(llvm_parser->engine); + llvm_parser->engine = NULL; + + LLVMDisposeBuilder(llvm_parser->builder); + llvm_parser->builder = NULL; + + LLVMDisposeModule(llvm_parser->mod); + llvm_parser->mod = NULL; +} + +HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { + const HLLVMParser *llvm_parser = parser->backend_data; + HArena *arena = h_new_arena(mm__, 0); + + // LLVMRunFunction only supports certain signatures for dumb reasons; it's this hack with + // memcpy and function pointers, or writing a shim in LLVM IR. + // + // LLVMGenericValueRef args[] = { + // LLVMCreateGenericValueOfPointer(input_stream), + // LLVMCreateGenericValueOfPointer(arena) + // }; + // LLVMGenericValueRef res = LLVMRunFunction(llvm_parser->engine, llvm_parser->func, 2, args); + // HParseResult *ret = (HParseResult*)LLVMGenericValueToPointer(res); + + void *parse_func_ptr_v; + HParseResult * (*parse_func_ptr)(HInputStream *input_stream, HArena *arena); + parse_func_ptr_v = LLVMGetPointerToGlobal(llvm_parser->engine, llvm_parser->func); + memcpy(&parse_func_ptr, &parse_func_ptr_v, sizeof(parse_func_ptr)); + HParseResult *ret = parse_func_ptr(input_stream, arena); + if (ret) { + ret->arena = arena; + if (!input_stream->overrun) { + size_t bit_length = h_input_stream_pos(input_stream); + if (ret->bit_length == 0) { + ret->bit_length = bit_length; + } + if (ret->ast && ret->ast->bit_length != 0) { + ((HParsedToken*)(ret->ast))->bit_length = bit_length; + } + } else { + ret->bit_length = 0; + } + } else { + ret = NULL; + } + if (input_stream->overrun) { + return NULL; // overrun is always failure. + } + return ret; +} + +HParserBackendVTable h__llvm_backend_vtable = { + .compile = h_llvm_compile, + .parse = h_llvm_parse, + .free = h_llvm_free +}; + +#endif /* defined(HAMMER_LLVM_BACKEND) */ diff --git a/src/backends/llvm/llvm.h b/src/backends/llvm/llvm.h new file mode 100644 index 0000000000000000000000000000000000000000..0721c3733b818877090af2e420d611902b375e5b --- /dev/null +++ b/src/backends/llvm/llvm.h @@ -0,0 +1,45 @@ +#ifdef HAMMER_LLVM_BACKEND + +#ifndef HAMMER_LLVM__H +#define HAMMER_LLVM__H + +#include "../../internal.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#include <llvm-c/Core.h> +#pragma GCC diagnostic pop + +/* The typedef is in internal.h */ + +struct HLLVMParserCompileContext_ { + /* Allocator */ + HAllocator* mm__; + /* Module/function/builder */ + LLVMModuleRef mod; + LLVMValueRef func; + LLVMBuilderRef builder; + /* Typerefs */ + LLVMTypeRef llvm_inputstream; + LLVMTypeRef llvm_inputstreamptr; + LLVMTypeRef llvm_arena; + LLVMTypeRef llvm_arenaptr; + LLVMTypeRef llvm_parsedtoken; + LLVMTypeRef llvm_parsedtokenptr; + LLVMTypeRef llvm_parseresult; + LLVMTypeRef llvm_parseresultptr; + /* Set up in function preamble */ + LLVMValueRef stream; + LLVMValueRef arena; +}; + +bool h_llvm_make_charset_membership_test(HLLVMParserCompileContext *ctxt, + LLVMValueRef r, HCharset cs, + LLVMBasicBlockRef yes, LLVMBasicBlockRef no); +void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt, + uint8_t length, uint8_t signedp, + LLVMValueRef r, LLVMValueRef *mr_out); + +#endif // #ifndef HAMMER_LLVM__H + +#endif /* defined(HAMMER_LLVM_BACKEND) */ diff --git a/src/backends/llvm/llvm_charset.c b/src/backends/llvm/llvm_charset.c new file mode 100644 index 0000000000000000000000000000000000000000..56e3e80c1d421f37d6bf32bd5a1ba20e393d6676 --- /dev/null +++ b/src/backends/llvm/llvm_charset.c @@ -0,0 +1,1117 @@ +#ifdef HAMMER_LLVM_BACKEND + +#include <llvm-c/Analysis.h> +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#include <llvm-c/Core.h> +#pragma GCC diagnostic pop +#include <llvm-c/ExecutionEngine.h> +#include "../../internal.h" +#include "llvm.h" + +/* + * Set this #define to enable some debug logging and internal consistency + * checking. + */ +#define HAMMER_LLVM_CHARSET_DEBUG + +typedef enum { + /* + * Accept action; this entire range is in the charset. This action type + * has no children and terminates handling the input character. + */ + CHARSET_ACTION_ACCEPT, + /* + * Scan action; test input char against each set character in the charset. + * This action type has no children and terminates handling the input + * character. + */ + CHARSET_ACTION_SCAN, + /* + * Bitmap action; test input char against a bitmap in the IR at fixed + * cost. + */ + CHARSET_ACTION_BITMAP, + /* + * Complement action; invert the sense of the charset. This action type + * has one child node, with the bounds unchanged and the portion of the + * charset within the bounds complemented. + */ + CHARSET_ACTION_COMPLEMENT, + /* + * Split action; check whether the input char is above or below a split + * point, and branch into one of two children depending. + */ + CHARSET_ACTION_SPLIT +} llvm_charset_exec_plan_action_t; + +typedef struct llvm_charset_exec_plan_s llvm_charset_exec_plan_t; +struct llvm_charset_exec_plan_s { + /* + * The charset at this node, with transforms such as range restriction + * or complementation applied. + */ + HCharset cs; + /* + * Char values for the range of this node, and the split point if this + * is CHARSET_ACTION_SPLIT + */ + uint8_t idx_start, idx_end, split_point; + /* Action to take at this node */ + llvm_charset_exec_plan_action_t action; + /* Estimated cost metric */ + int cost; + /* Depth in exec plan */ + int depth; + /* Children, if any (zero, one or two depending on action) */ + llvm_charset_exec_plan_t *children[2]; +}; + +/* Forward prototypes for charset llvm stuff */ +static int h_llvm_build_charset_exec_plan_impl(HAllocator* mm__, HCharset cs, + llvm_charset_exec_plan_t *parent, llvm_charset_exec_plan_t *cep, + int allow_complement, uint8_t *split_point); +static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan_impl_alloc( + HAllocator* mm__, llvm_charset_exec_plan_t *parent, HCharset cs, + uint8_t idx_start, uint8_t idx_end, int allow_complement); +static void h_llvm_free_charset_exec_plan(HAllocator* mm__, + llvm_charset_exec_plan_t *cep); + +/* + * Check if this charset is eligible for CHARSET_ACTION_ACCEPT on a range + */ + +static int h_llvm_charset_eligible_for_accept(HCharset cs, uint8_t idx_start, uint8_t idx_end) { + int eligible = 1, i; + + for (i = idx_start; i <= idx_end; ++i) { + if (!(charset_isset(cs, (uint8_t)i))) { + eligible = 0; + break; + } + } + + return eligible; +} + +/* + * Estimate cost of CHARSET_ACTION_SCAN for this charset (~proportional to number of set chars, min 1) + */ + +static int h_llvm_charset_estimate_scan_cost(HCharset cs, uint8_t idx_start, uint8_t idx_end) { + int i, cost; + + cost = 1; + for (i = idx_start; i <= idx_end; ++i) { + if (charset_isset(cs, (uint8_t)i)) ++cost; + } + + return cost; +} + +/* + * Given a skeletal CHARSET_ACTION_SPLIT node from h_llvm_build_charset_exec_plan_impl(), + * binary search for the best split point we can find and return the cost metric. + * Unfortunately the search space is quite large, so we're going to use some silly + * heuristics here such as looking for the longest run of present or absent chars at + * one end of a charset, and proposing it as a split, or just trying the midpoint. + * It may be possible to do better. + */ + +static int h_llvm_find_best_split(HAllocator* mm__, llvm_charset_exec_plan_t *split) { + int rv, best_end_run, i, contiguous; + uint8_t best_end_run_split, midpoint; + llvm_charset_exec_plan_t *best_left, *best_right, *left, *right; + int best_cost, cost; + + /* Sanity-check: we should be a split with a range at least two indices long */ + if (!split || split->action != CHARSET_ACTION_SPLIT) return -1; + if (split->idx_end <= split->idx_start) return -1; + + /* Find the longest end run; split a run of length 1 at the left end as a + * fallback, since there's always a run of length 1 at each end. */ + best_end_run = 1; + best_end_run_split = split->idx_start; + contiguous = 0; + /* Try the low end */ + i = 0; + while (i <= split->idx_end - split->idx_start && + (charset_isset(split->cs, split->idx_start + i) == + charset_isset(split->cs, split->idx_start))) ++i; + if (i <= split->idx_end - split->idx_start) { + /* This run has length i */ + if (i > best_end_run) { + best_end_run = i; + /* + * -1 since split points are last index of left child, and i + * is first index that wasn't in the run + */ + best_end_run_split = split->idx_start + i - 1; + } + + /* Now the same thing from the high end */ + i = 0; + while (i <= split->idx_end - split->idx_start && + (charset_isset(split->cs, split->idx_end - i) == + charset_isset(split->cs, split->idx_end))) ++i; + if (i <= split->idx_end - split->idx_start && i > best_end_run) { + best_end_run = i; + best_end_run_split = split->idx_end - i; + } + } else { + /* Wow, contiguous - any split will turn out well - just use the midpoint */ + contiguous = 1; + } + + /* Initialize, start trying things */ + best_left = best_right = left = right = NULL; + rv = -1; + + /* Try a midpoint split */ + midpoint = split->idx_start + (split->idx_end - split->idx_start) / 2; + left = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs, + split->idx_start, midpoint, 1); + right = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs, + midpoint + 1, split->idx_end, 1); + if (left && right) { + /* Cost of the split == 1 + max(left->cost, right->cost) */ + cost = left->cost; + if (right->cost > cost) cost = right->cost; + ++cost; + /* We haven't tried the end-run one yet, so always accept this */ + best_left = left; + best_right = right; + best_cost = cost; + left = right = NULL; + } else goto err; + + /* + * Try an end-run split; if we decided we had a contiguous run earlier, + * all are equally good, so don't bother and just use the midpoint + */ + + if (!contiguous) { + /* + * Sanity-check the indices; error out if the scanner gave us + * something silly + */ + if (best_end_run_split < split->idx_start || + best_end_run_split >= split->idx_end) goto err; + left = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs, + split->idx_start, best_end_run_split, 1); + right = h_llvm_build_charset_exec_plan_impl_alloc(mm__, split, split->cs, + best_end_run_split + 1, split->idx_end, 1); + if (left && right) { + /* Cost of the split == 1 + max(left->cost, right->cost) */ + cost = left->cost; + if (right->cost > cost) cost = right->cost; + ++cost; + /* Check if against what we already have */ + if (cost < best_cost) { + if (best_left) h_llvm_free_charset_exec_plan(mm__, best_left); + if (best_right) h_llvm_free_charset_exec_plan(mm__, best_right); + best_left = left; + best_right = right; + best_cost = cost; + left = right = NULL; + } + } else goto err; + } + + /* Set up the split node with our best results */ + split->cost = best_cost; + split->children[0] = best_left; + split->children[1] = best_right; + split->split_point = best_left->idx_end; + best_left = best_right = NULL; + rv = split->cost; + + err: + /* Error/cleanup case */ + if (left) h_llvm_free_charset_exec_plan(mm__, left); + if (right) h_llvm_free_charset_exec_plan(mm__, right); + if (best_left) h_llvm_free_charset_exec_plan(mm__, best_left); + if (best_right) h_llvm_free_charset_exec_plan(mm__, best_right); + + return rv; +} + +/* + * Setup call to h_llvm_build_charset_exec_plan_impl(), while allocating a new + * llvm_charset_exec_plan_t. + */ +static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan_impl_alloc( + HAllocator* mm__, llvm_charset_exec_plan_t *parent, HCharset cs, + uint8_t idx_start, uint8_t idx_end, int allow_complement) { + int cost; + llvm_charset_exec_plan_t *cep; + + if (!mm__) return NULL; + if (!cs) return NULL; + if (idx_start > idx_end) return NULL; + + cep = h_new(llvm_charset_exec_plan_t, 1); + memset(cep, 0, sizeof(*cep)); + cep->cs = NULL; + /* + * Initializing these is important; if the parent is CHARSET_ACTION_SPLIT, + * these are how h_llvm_build_charset_exec_plan_impl() knows the range for + * the child it's constructing. + */ + cep->idx_start = idx_start; + cep->idx_end = idx_end; + cost = h_llvm_build_charset_exec_plan_impl(mm__, cs, parent, cep, + allow_complement, NULL); + if (cost >= 0) cep->cost = cost; + else { + h_llvm_free_charset_exec_plan(mm__, cep); + cep = NULL; + } + + return cep; +} + +/* + * Given a charset, optionally its parent containing range restrictions, and + * an allow_complement parameter, search for the best exec plan and write it + * to another (skeletal) charset which will receive an action and range. If + * the action is CHARSET_ACTION_SPLIT, also output a split point. Return a + * cost estimate. + */ + +static int h_llvm_build_charset_exec_plan_impl(HAllocator* mm__, HCharset cs, + llvm_charset_exec_plan_t *parent, llvm_charset_exec_plan_t *cep, + int allow_complement, uint8_t *split_point) { + int eligible_for_accept, best_cost, depth; + int estimated_complement_cost, estimated_scan_cost, estimated_split_cost; + int estimated_bitmap_cost; + uint8_t idx_start, idx_end; + llvm_charset_exec_plan_t complement_cep, split_cep; + llvm_charset_exec_plan_action_t chosen_action; + + /* Check args */ + if (!(mm__ && cep)) return -1; + + /* + * The index bounds come from either the parent or maximal bounds by + * default. Exception is the case that we are a child of a split, in + * which case h_llvm_find_best_split() should have set bounds in cep. + */ + if (parent && parent->action == CHARSET_ACTION_SPLIT && + ((cep->idx_start == parent->idx_start && + cep->idx_end < parent->idx_end) || + (cep->idx_start > parent->idx_start && + cep->idx_end == parent->idx_end))) { + idx_start = cep->idx_start; + idx_end = cep->idx_end; + } else if (parent) { + idx_start = parent->idx_start; + idx_end = parent->idx_end; + } else { + idx_start = 0; + idx_end = UINT8_MAX; + } + + /* Get the depth in the exec plan */ + if (parent) depth = parent->depth + 1; + else depth = 0; + + eligible_for_accept = h_llvm_charset_eligible_for_accept(cs, idx_start, idx_end); + if (eligible_for_accept) { + /* if we can use CHARSET_ACTION_ACCEPT, always do so */ + cep->cs = copy_charset(mm__, cs); + charset_restrict_to_range(cep->cs, idx_start, idx_end); + cep->idx_start = idx_start; + cep->idx_end = idx_end; + cep->split_point = 0; + /* Acceptance (or rejection, under a complement) is free */ + cep->cost = 0; + cep->depth = depth; + cep->action = CHARSET_ACTION_ACCEPT; + cep->children[0] = NULL; + cep->children[1] = NULL; + + return cep->cost; + } else { + /* + * Estimate cost for CHARSET_ACTION_SCAN, and for the tree below + * CHARSET_ACTION_COMPLEMENT if we are eligible to use it. + */ + estimated_scan_cost = h_llvm_charset_estimate_scan_cost(cs, idx_start, idx_end); + /* + * We can always use CHARSET_ACTION_BITMAP; this constant controls how + * strongly we prefer it over the compare-and-branch approach. + */ + estimated_bitmap_cost = 6; + /* >= 0 is a flag we have a complement we may need to free later */ + estimated_complement_cost = -1; + if (allow_complement) { + HCharset child_cs; + + /* Complement the charset within the range */ + memset(&complement_cep, 0, sizeof(complement_cep)); + complement_cep.cs = copy_charset(mm__, cs); + charset_restrict_to_range(complement_cep.cs, idx_start, idx_end); + child_cs = copy_charset(mm__, complement_cep.cs); + charset_complement(child_cs); + charset_restrict_to_range(child_cs, idx_start, idx_end); + complement_cep.idx_start = idx_start; + complement_cep.idx_end = idx_end; + complement_cep.split_point = 0; + complement_cep.depth = depth; + complement_cep.action = CHARSET_ACTION_COMPLEMENT; + complement_cep.children[0] = h_new(llvm_charset_exec_plan_t, 1); + memset(complement_cep.children[0], 0, sizeof(llvm_charset_exec_plan_t)); + complement_cep.children[1] = NULL; + /* + * Find the child; the complement has cost 0 since it just swizzles success + * and fail output basic blocks; it's important we test for complement last + * below then, so we break ties in favor of not stacking complements up. We + * set allow_complement = 0 so we never stack two complements. + */ + complement_cep.cost = h_llvm_build_charset_exec_plan_impl(mm__, child_cs, &complement_cep, + complement_cep.children[0], 0, NULL); + estimated_complement_cost = complement_cep.cost; + h_free(child_cs); + } + + /* + * Set up split node if it makes sense; the depth cutoff here limits the + * cost of the search for complex charsets. + */ + if (idx_start < idx_end && depth < 5) { + split_cep.cs = copy_charset(mm__, cs); + charset_restrict_to_range(split_cep.cs, idx_start, idx_end); + split_cep.idx_start = idx_start; + split_cep.idx_end = idx_end; + split_cep.split_point = 0; + split_cep.action = CHARSET_ACTION_SPLIT; + split_cep.cost = -1; + split_cep.depth = depth; + split_cep.children[0] = NULL; + split_cep.children[1] = NULL; + /* h_llvm_find_best_split() sets split_cep.cost */ + estimated_split_cost = h_llvm_find_best_split(mm__, &split_cep); + if (estimated_split_cost < 0) { + /* This shouldn't happen, but make sure we free the charset */ + h_free(split_cep.cs); + } + } else { + estimated_split_cost = -1; + } + + /* Pick the action type with the lowest cost */ + best_cost = -1; + if (estimated_scan_cost >= 0 && + (best_cost < 0 || estimated_scan_cost < best_cost)) { + chosen_action = CHARSET_ACTION_SCAN; + best_cost = estimated_scan_cost; + } + + if (estimated_bitmap_cost >= 0 && + (best_cost < 0 || estimated_bitmap_cost < best_cost)) { + chosen_action = CHARSET_ACTION_BITMAP; + best_cost = estimated_bitmap_cost; + } + + if (estimated_split_cost >= 0 && + (best_cost < 0 || estimated_split_cost < best_cost)) { + chosen_action = CHARSET_ACTION_SPLIT; + best_cost = estimated_split_cost; + } + + if (allow_complement && estimated_complement_cost >= 0 && + (best_cost < 0 || estimated_complement_cost < best_cost)) { + chosen_action = CHARSET_ACTION_COMPLEMENT; + best_cost = estimated_complement_cost; + } + + /* Fill out cep based on the chosen action */ + switch (chosen_action) { + case CHARSET_ACTION_SCAN: + /* Set up a scan */ + cep->cs = copy_charset(mm__, cs); + charset_restrict_to_range(cep->cs, idx_start, idx_end); + cep->idx_start = idx_start; + cep->idx_end = idx_end; + cep->split_point = 0; + cep->action = CHARSET_ACTION_SCAN; + cep->cost = estimated_scan_cost; + cep->depth = depth; + cep->children[0] = NULL; + cep->children[1] = NULL; + break; + case CHARSET_ACTION_BITMAP: + /* Set up a bitmap */ + cep->cs = copy_charset(mm__, cs); + charset_restrict_to_range(cep->cs, idx_start, idx_end); + cep->idx_start = idx_start; + cep->idx_end = idx_end; + cep->split_point = 0; + cep->action = CHARSET_ACTION_BITMAP; + cep->cost = estimated_bitmap_cost; + cep->depth = depth; + cep->children[0] = NULL; + cep->children[1] = NULL; + break; + case CHARSET_ACTION_COMPLEMENT: + /* + * We have a CEP filled out we can just copy over; be sure to set + * estimated_complement_cost = -1 so we know not to free it on the + * way out. + */ + memcpy(cep, &complement_cep, sizeof(complement_cep)); + memset(&complement_cep, 0, sizeof(complement_cep)); + estimated_complement_cost = -1; + break; + case CHARSET_ACTION_SPLIT: + /* + * We have a CEP filled out we can just copy over; be sure to set + * estimated_split_cost = -1 so we know not to free it on the way + * out. + */ + memcpy(cep, &split_cep, sizeof(split_cep)); + memset(&split_cep, 0, sizeof(split_cep)); + estimated_split_cost = -1; + break; + default: + /* Not supported */ + best_cost = -1; + memset(cep, 0, sizeof(*cep)); + break; + } + } + + /* Free temporary CEPs if needed */ + + if (estimated_complement_cost >= 0) { + /* + * We have a complement_cep we ended up not using; free its child and + * charset + */ + h_llvm_free_charset_exec_plan(mm__, complement_cep.children[0]); + h_free(complement_cep.cs); + memset(&complement_cep, 0, sizeof(complement_cep)); + estimated_complement_cost = -1; + } + + if (estimated_split_cost >= 0) { + /* + * We have a split_cep we ended up not using; free its children and + * charset. + */ + h_llvm_free_charset_exec_plan(mm__, split_cep.children[0]); + h_llvm_free_charset_exec_plan(mm__, split_cep.children[1]); + h_free(split_cep.cs); + memset(&split_cep, 0, sizeof(split_cep)); + estimated_split_cost = -1; + } + + return best_cost; +} + +/* + * Build a charset exec plan for a charset + */ + +static llvm_charset_exec_plan_t * h_llvm_build_charset_exec_plan( + HAllocator* mm__, HCharset cs) { + llvm_charset_exec_plan_t *cep = NULL; + int best_cost; + + cep = h_new(llvm_charset_exec_plan_t, 1); + best_cost = h_llvm_build_charset_exec_plan_impl(mm__, cs, NULL, cep, 1, NULL); + + if (best_cost < 0) { + /* h_llvm_build_charset_exec_plan_impl() failed */ + h_free(cep); + cep = NULL; + } + + return cep; +} + +/* + * Consistency-check a charset exec plan + */ + +static bool h_llvm_check_charset_exec_plan(llvm_charset_exec_plan_t *cep) { + bool consistent = false; + uint8_t i; + + if (cep) { + /* Check that we have a charset */ + if (!(cep->cs)) goto done; + /* Check that the range makes sense */ + if (cep->idx_start > cep->idx_end) goto done; + /* Check that the charset is empty outside the range */ + for (i = 0; i < cep->idx_start; ++i) { + /* Failed check */ + if (charset_isset(cep->cs, i)) goto done; + /* Prevent wraparound */ + if (i == UINT8_MAX) break; + } + + if (cep->idx_end < UINT8_MAX) { + /* We break at the end */ + for (i = cep->idx_end + 1; ; ++i) { + /* Failed check */ + if (charset_isset(cep->cs, i)) goto done; + /* Prevent wraparound */ + if (i == UINT8_MAX) break; + } + } + + /* Minimum cost estimate is 0; complements and accepts can be free */ + if (cep->cost < 0) goto done; + + /* No split point unlesswe're CHARSET_ACTION_SPLIT */ + if (cep->action != CHARSET_ACTION_SPLIT && cep->split_point != 0) goto done; + + /* Action type dependent part */ + switch (cep->action) { + case CHARSET_ACTION_ACCEPT: + case CHARSET_ACTION_SCAN: + case CHARSET_ACTION_BITMAP: + /* These are always okay and have no children */ + if (cep->children[0] || cep->children[1]) goto done; + consistent = true; + break; + case CHARSET_ACTION_COMPLEMENT: + /* This has one child, which should have the same range */ + if (cep->children[1]) goto done; + if (cep->children[0]) { + if (cep->children[0]->idx_start == cep->idx_start && + cep->children[0]->idx_end == cep->idx_end) { + /* The cost cannot be lower than the child */ + if (cep->cost < cep->children[0]->cost) goto done; + /* Okay, we're consistent if the child node is */ + consistent = h_llvm_check_charset_exec_plan(cep->children[0]); + } + } + break; + case CHARSET_ACTION_SPLIT: + /* This has two children, which should split the range */ + if (cep->children[0] && cep->children[1]) { + if (cep->children[0]->idx_start == cep->idx_start && + cep->children[0]->idx_end + 1 == cep->children[1]->idx_start && + cep->children[1]->idx_end == cep->idx_end) { + /* The split point must match the children */ + if (cep->split_point != cep->children[0]->idx_end) goto done; + /* + * The cost must be in the range defined by the children, + 1 for + * the comparison at most + */ + int child_min_cost = (cep->children[0]->cost < cep->children[1]->cost) ? + cep->children[0]->cost : cep->children[1]->cost; + int child_max_cost = (cep->children[0]->cost > cep->children[1]->cost) ? + cep->children[0]->cost : cep->children[1]->cost; + if ((cep->cost < child_min_cost) || (cep->cost > child_max_cost + 1)) goto done; + /* Okay, we're consistent if both children are */ + consistent = h_llvm_check_charset_exec_plan(cep->children[0]) && + h_llvm_check_charset_exec_plan(cep->children[1]); + } + } + break; + default: + break; + } + } + + done: + return consistent; +} + +/* + * Free a charset exec plan using the supplied allocator + */ + +static void h_llvm_free_charset_exec_plan(HAllocator* mm__, + llvm_charset_exec_plan_t *cep) { + int n_children, i; + + if (cep) { + n_children = 0; + switch (cep->action) { + case CHARSET_ACTION_COMPLEMENT: + n_children = 1; + break; + case CHARSET_ACTION_SPLIT: + n_children = 2; + break; + default: + break; + } + + for (i = 0; i < n_children; ++i) { + h_llvm_free_charset_exec_plan(mm__, cep->children[i]); + } + h_free(cep->cs); + h_free(cep); + } +} + +/* + * Pretty-print a charset exec plan to stdout + */ + +static void h_llvm_pretty_print_charset_exec_plan_impl(HAllocator *mm__, llvm_charset_exec_plan_t *cep, + const char *pfx_on_action_line, const char *pfx, + int depth) { + const char *action_string = NULL, *pfx_incr = NULL; + const char *pfx_incr_child_action = NULL, *pfx_incr_last_child = NULL; + char *next_pfx = NULL, *next_pfx_child_action_line = NULL, *next_pfx_last_child = NULL; + int n_children = 0, i, j, next_pfx_len; + uint8_t ch; + + if (!cep) { + action_string = "NULL"; + } else { + switch (cep->action) { + case CHARSET_ACTION_ACCEPT: + action_string = "CHARSET_ACTION_ACCEPT"; + break; + case CHARSET_ACTION_SCAN: + action_string = "CHARSET_ACTION_SCAN"; + break; + case CHARSET_ACTION_BITMAP: + action_string = "CHARSET_ACTION_BITMAP"; + break; + case CHARSET_ACTION_COMPLEMENT: + action_string = "CHARSET_ACTION_COMPLEMENT"; + n_children = 1; + break; + case CHARSET_ACTION_SPLIT: + action_string = "CHARSET_ACTION_SPLIT"; + n_children = 2; + break; + default: + action_string = "UNKNOWN"; + break; + } + } + + if (n_children > 0) { + pfx_incr = " | "; + } else { + pfx_incr = " "; + } + + + if (depth > 0 || strlen(pfx_on_action_line) > 0) { + printf("%s-%s\n", pfx_on_action_line, action_string); + pfx_incr = (n_children > 0) ? " | " : " "; + pfx_incr_child_action = " +-"; + pfx_incr_last_child = " "; + } else { + printf("%s\n", action_string); + pfx_incr = (n_children > 0) ? "| " : " "; + pfx_incr_child_action = "+-"; + pfx_incr_last_child = " "; + } + + /* + * Now do the charset, 8 lines of 32 bits with spaces in between to + * fit [] range markers and | split point marker. + */ + int open = 0, close = 0, split = 0; + for (ch = 0, i = 0; i < 8; ++i) { + /* Special case: [ should go before first char on line */ + if (ch == cep->idx_start) { + printf("%s%s [", pfx, pfx_incr); + } else { + printf("%s%s ", pfx, pfx_incr); + } + for (j = 0; j < 32; ++j, ++ch) { + open = close = split = 0; + /* Figure out markers, avoid wraparound */ + if (cep->idx_start != 0 && ch + 1 == cep->idx_start) { + /* There should be a [ right after this char */ + open = 1; + } else if (ch == cep->idx_end) { + /* There should be a ] right after this char */ + close = 1; + } else if (ch == cep->split_point && + cep->action == CHARSET_ACTION_SPLIT) { + /* There should be a | right after this char */ + split = 1; + } + + if (charset_isset(cep->cs, ch)) printf("X"); + else printf("."); + + if (open) printf("["); + else if (close) printf("]"); + else if (split) printf("|"); + else printf(" "); + } + printf("\n"); + } + + if (cep->action == CHARSET_ACTION_SPLIT) { + printf("%s%s idx_start = %u, split_point = %u, idx_end = %u\n", + pfx, pfx_incr, cep->idx_start, cep->split_point, cep->idx_end); + } else { + printf("%s%s idx_start = %u, idx_end = %u\n", + pfx, pfx_incr, cep->idx_start, cep->idx_end); + } + + printf("%s%s cost = %d, depth = %d\n", pfx, pfx_incr, cep->cost, cep->depth); + + if (n_children > 0) { + if (n_children > 1) { + next_pfx_len = strlen(pfx) + strlen(pfx_incr) + 1; + next_pfx = h_new(char, next_pfx_len); + snprintf(next_pfx, next_pfx_len, "%s%s", pfx, pfx_incr); + } else { + /* Won't be needed */ + next_pfx = NULL; + } + next_pfx_len = strlen(pfx) + strlen(pfx_incr_child_action) + 1; + next_pfx_child_action_line = h_new(char, next_pfx_len); + snprintf(next_pfx_child_action_line, next_pfx_len, + "%s%s", pfx, pfx_incr_child_action); + next_pfx_len = strlen(pfx) + strlen(pfx_incr_last_child) + 1; + next_pfx_last_child = h_new(char, next_pfx_len); + snprintf(next_pfx_last_child, next_pfx_len, + "%s%s", pfx, pfx_incr_last_child); + + for (i = 0; i < n_children; ++i) { + /* Space things out */ + printf("%s%s\n", pfx, pfx_incr); + h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep->children[i], + next_pfx_child_action_line, (i + 1 == n_children) ? next_pfx_last_child : next_pfx, + depth + 1); + } + + if (next_pfx) h_free(next_pfx); + h_free(next_pfx_last_child); + h_free(next_pfx_child_action_line); + } +} + +static void h_llvm_pretty_print_charset_exec_plan(HAllocator *mm__, llvm_charset_exec_plan_t *cep) { + /* Start at depth 0, and always emit an initial newline */ + printf("\n"); + h_llvm_pretty_print_charset_exec_plan_impl(mm__, cep, "", "", 0); +} + +/* Forward declares for IR-emission functions */ +static bool h_llvm_build_ir_for_bitmap(HLLVMParserCompileContext *ctxt, + HCharset cs, uint8_t idx_start, uint8_t idx_end, + LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no); +static bool h_llvm_build_ir_for_scan(HLLVMParserCompileContext *ctxt, + HCharset cs, uint8_t idx_start, uint8_t idx_end, + LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no); +static bool h_llvm_build_ir_for_split(HLLVMParserCompileContext *ctxt, + llvm_charset_exec_plan_t *cep, LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no); +static bool h_llvm_cep_to_ir(HLLVMParserCompileContext *ctxt, + LLVMValueRef r, llvm_charset_exec_plan_t *cep, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no); + +/* + * Build IR for a CHARSET_ACTION_BITMAP + */ + +static bool h_llvm_build_ir_for_bitmap(HLLVMParserCompileContext *ctxt, + HCharset cs, uint8_t idx_start, uint8_t idx_end, + LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + int i, j; + uint32_t bitmap_entry; + + if (!cs) return false; + if (!ctxt) return false; + if (idx_start > idx_end) return false; + + /* + * Embed a 8x32 bitmap in the IR, turn the input value into an index by + * right-shifting 5 bits, load the relevant bitmap byte, then derive a mask + * from the low-order 5 bits of the input value. & the mask with the bitmap + * byte, and compare. If non-zero, accept, otherwise reject. + */ + LLVMPositionBuilderAtEnd(ctxt->builder, in); + + /* Construct the bitmap */ + LLVMValueRef bitmap_entries[8]; + for (i = 0; i < 8; ++i) { + bitmap_entry = 0x0; + /* + * Bit order; LSB is lowest-numbered char index 32*i, MSB is 32*i + 31. + * and then the mask we need is just 1 << (r & 0x1f). + */ + for (j = 0; j < 32; ++j) { + /* Set the bit if necessary */ + if (charset_isset(cs, (uint8_t)(32*i + j))) { + bitmap_entry |= ((uint32_t)(0x1) << j); + } + } + + /* Make an LLVMValueRef for it */ + bitmap_entries[i] = LLVMConstInt(LLVMInt32Type(), bitmap_entry, 0); + } + /* Now make an array out of them */ + LLVMValueRef bitmap_initializer = LLVMConstArray(LLVMInt32Type(), bitmap_entries, 8); + /* ...and we need a global variable to stick it in to GEP it */ + LLVMValueRef bitmap = LLVMAddGlobal(ctxt->mod, LLVMTypeOf(bitmap_initializer), "bitmap"); + LLVMSetInitializer(bitmap, bitmap_initializer); + + /* Compute the index into the bitmap */ + LLVMValueRef word_index = LLVMBuildLShr(ctxt->builder, r, + LLVMConstInt(LLVMInt8Type(), 5, 0), "word_index"); + + /* Get a pointer to that word in the bitmap */ + LLVMValueRef gep_indices[2]; + gep_indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + gep_indices[1] = word_index; + LLVMValueRef bitmap_word_p = + LLVMBuildInBoundsGEP(ctxt->builder, bitmap, gep_indices, 2, "bitmap_word_p"); + LLVMValueRef bitmap_word = + LLVMBuildLoad(ctxt->builder, bitmap_word_p, "bitmap_word"); + /* + * Extract the low-order 5 bits of r, and expand to a 32-bit int for the + * mask + */ + LLVMValueRef bit_index = LLVMBuildAnd(ctxt->builder, r, + LLVMConstInt(LLVMInt8Type(), 0x1f, 0), "bit_index"); + LLVMValueRef bit_index_zext = LLVMBuildZExt(ctxt->builder, bit_index, + LLVMInt32Type(), "bit_index_zext"); + /* Compute mask */ + LLVMValueRef mask = LLVMBuildShl(ctxt->builder, LLVMConstInt(LLVMInt32Type(), 1, 0), + bit_index_zext, "mask"); + /* AND the mask with the bitmap word */ + LLVMValueRef masked_bitmap_word = LLVMBuildAnd(ctxt->builder, bitmap_word, mask, + "masked_bitmap_word"); + /* Compare it to zero */ + LLVMValueRef bitmap_icmp = LLVMBuildICmp(ctxt->builder, LLVMIntNE, + masked_bitmap_word, LLVMConstInt(LLVMInt32Type(), 0, 0), "bitmap_icmp"); + /* If not zero, the char is in the set */ + LLVMBuildCondBr(ctxt->builder, bitmap_icmp, yes, no); + + return true; +} + +/* + * Build IR for a CHARSET_ACTION_SCAN + */ + +static bool h_llvm_build_ir_for_scan(HLLVMParserCompileContext *ctxt, + HCharset cs, uint8_t idx_start, uint8_t idx_end, + LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + if (!cs) return false; + if (!ctxt) return false; + if (idx_start > idx_end) return false; + + /* + * Scan the range of indices, and for each thing in the charset, + * compare and conditional branch. + */ + LLVMPositionBuilderAtEnd(ctxt->builder, in); + + for (int i = idx_start; i <= idx_end; ++i) { + if (charset_isset(cs, i)) { + char bbname[16]; + uint8_t c = (uint8_t)i; + snprintf(bbname, 16, "cs_memb_%02x", c); + LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntEQ, + LLVMConstInt(LLVMInt8Type(), c, 0), r, "c == r"); + LLVMBasicBlockRef bb = LLVMAppendBasicBlock(ctxt->func, bbname); + LLVMBuildCondBr(ctxt->builder, icmp, yes, bb); + LLVMPositionBuilderAtEnd(ctxt->builder, bb); + } + } + + LLVMBuildBr(ctxt->builder, no); + + return true; +} + +/* + * Build IR for a CHARSET_ACTION_SPLIT + */ + +static bool h_llvm_build_ir_for_split(HLLVMParserCompileContext *ctxt, + llvm_charset_exec_plan_t *cep, LLVMValueRef r, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + char name[18]; + bool left_ok, right_ok; + + /* Split validation */ + if (!ctxt) return false; + if (!cep) return false; + if (cep->action != CHARSET_ACTION_SPLIT) return false; + if (cep->idx_start >= cep->idx_end) return false; + if (cep->split_point < cep->idx_start) return false; + if (cep->split_point >= cep->idx_end) return false; + if (!(cep->children[0] && cep->children[1])) return false; + if (cep->idx_start != cep->children[0]->idx_start) return false; + if (cep->split_point != cep->children[0]->idx_end) return false; + if (cep->split_point + 1 != cep->children[1]->idx_start) return false; + if (cep->idx_end != cep->children[1]->idx_end) return false; + + /* + * Compare the value against the split point, and branch to the left + * child if <=, right child if >. + */ + snprintf(name, 18, "cs_split_left_%02X", cep->split_point); + LLVMBasicBlockRef left = LLVMAppendBasicBlock(ctxt->func, name); + snprintf(name, 18, "cs_split_right_%02X", cep->split_point); + LLVMBasicBlockRef right = LLVMAppendBasicBlock(ctxt->func, name); + LLVMPositionBuilderAtEnd(ctxt->builder, in); + snprintf(name, 18, "r <= %02X", cep->split_point); + LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntULE, + r, LLVMConstInt(LLVMInt8Type(), cep->split_point, 0), name); + LLVMBuildCondBr(ctxt->builder, icmp, left, right); + + /* + * Now build the subtrees starting from each of the output basic blocks + * of the comparison. + */ + left_ok = h_llvm_cep_to_ir(ctxt, r, cep->children[0], left, yes, no); + right_ok = h_llvm_cep_to_ir(ctxt, r, cep->children[1], right, yes, no); + + return left_ok && right_ok; +} + +/* + * Turn an llvm_charset_exec_plan_t into IR + */ + +static bool h_llvm_cep_to_ir(HLLVMParserCompileContext *ctxt, + LLVMValueRef r, llvm_charset_exec_plan_t *cep, + LLVMBasicBlockRef in, LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + bool rv; + + if (!ctxt) return false; + if (!cep) return false; + + switch (cep->action) { + case CHARSET_ACTION_SCAN: + rv = h_llvm_build_ir_for_scan(ctxt, cep->cs, + cep->idx_start, cep->idx_end, r, in, yes, no); + break; + case CHARSET_ACTION_ACCEPT: + /* Easy case; just unconditionally branch to the yes output */ + LLVMPositionBuilderAtEnd(ctxt->builder, in); + LLVMBuildBr(ctxt->builder, yes); + rv = true; + break; + case CHARSET_ACTION_BITMAP: + rv = h_llvm_build_ir_for_bitmap(ctxt, cep->cs, + cep->idx_start, cep->idx_end, r, in, yes, no); + break; + case CHARSET_ACTION_COMPLEMENT: + /* This is trivial; just swap the 'yes' and 'no' outputs and build the child */ + rv = h_llvm_cep_to_ir(ctxt, r, cep->children[0], in, no, yes); + break; + case CHARSET_ACTION_SPLIT: + rv = h_llvm_build_ir_for_split(ctxt, cep, r, in, yes, no); + break; + default: + /* Unknown action type */ +#ifdef HAMMER_LLVM_CHARSET_DEBUG + fprintf(stderr, + "cep %p has unknown action type\n", + (void *)cep); +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ + rv = false; + break; + } + + return rv; +} + +/* + * Construct LLVM IR to decide if a runtime value is a member of a compile-time + * character set, and branch depending on the result. + * + * Parameters: + * - mod [in]: an LLVMModuleRef + * - func [in]: an LLVMValueRef to the function to add the new basic blocks + * - builder [in]: an LLVMBuilderRef, positioned appropriately + * - r [in]: an LLVMValueRef to the value to test + * - cs [in]: the HCharset to test membership in + * - yes [in]: the basic block to branch to if r is in cs + * - no [in]: the basic block to branch to if r is not in cs + * + * Returns: true on success, false on failure + */ + +bool h_llvm_make_charset_membership_test(HLLVMParserCompileContext *ctxt, + LLVMValueRef r, HCharset cs, + LLVMBasicBlockRef yes, LLVMBasicBlockRef no) { + /* + * A charset is a 256-element bit array, 32 bytes long in total. Ours is + * static at compile time, so we can try to construct minimal LLVM IR for + * this particular charset. In particular, we should handle cases like + * only one or two bits being set, or a long consecutive range, efficiently. + * + * In LLVM IR, we can test propositions like r == x, r <= x, r >= x and their + * negations efficiently, so the challenge here is to turn a character map + * into a minimal set of such propositions. + * + * We achieve this by building a tree of actions to minimize a cost metric, + * and then transforming the tree into IR. + */ + + HAllocator *mm__; + bool rv; + + if (!ctxt) return false; + mm__ = ctxt->mm__; + + /* Try building a charset exec plan */ + llvm_charset_exec_plan_t *cep = h_llvm_build_charset_exec_plan(mm__, cs); + if (!cep) { + fprintf(stderr, "got null from h_llvm_build_charset_exec_plan()\n"); + return false; + } + +#ifdef HAMMER_LLVM_CHARSET_DEBUG + bool ok = h_llvm_check_charset_exec_plan(cep); + if (ok) fprintf(stderr, "cep %p passes consistency check\n", (void *)cep); + else fprintf(stderr, "cep %p fails consistency check\n", (void *)cep); + h_llvm_pretty_print_charset_exec_plan(mm__, cep); + if (!ok) { + fprintf(stderr, "h_llvm_make_charset_membership_test() error-exiting " + "because consistency check failed\n"); + h_llvm_free_charset_exec_plan(mm__, cep); + cep = NULL; + return false; + } +#endif /* defined(HAMMER_LLVM_CHARSET_DEBUG) */ + + /* + * XXX Note on memoization: + * + * How common is it for this to occur multiple times in a parser with the + * same charset? If so, we will end up emitting code which differs only in + * its yes and no output basic blocks each time. Is there a significant + * performance penalty for LLVMBuildIndirectBr() vs. LLVMBuildBr()? If no, + * we should consider memoizing by charset using it and building a wrapper + * around it that just varies the output blocks to reduce emitted code size. + * + * + */ + + /* Create input block */ + LLVMBasicBlockRef start = LLVMAppendBasicBlock(ctxt->func, "cs_start"); + /* + * Make unconditional branch into input block from wherever our caller + * had us positioned. + */ + LLVMBuildBr(ctxt->builder, start); + + rv = h_llvm_cep_to_ir(ctxt, r, cep, start, yes, no); + + h_llvm_free_charset_exec_plan(mm__, cep); + cep = NULL; + + return rv; +} + +#endif /* defined(HAMMER_LLVM_BACKEND) */ diff --git a/src/backends/llvm/llvm_suint.c b/src/backends/llvm/llvm_suint.c new file mode 100644 index 0000000000000000000000000000000000000000..571d6b00d39cbfc30f7b66d65c44e6554372ec0b --- /dev/null +++ b/src/backends/llvm/llvm_suint.c @@ -0,0 +1,97 @@ +#ifdef HAMMER_LLVM_BACKEND + +#include <llvm-c/Analysis.h> +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" +#include <llvm-c/Core.h> +#pragma GCC diagnostic pop +#include <llvm-c/ExecutionEngine.h> +#include "../../internal.h" +#include "llvm.h" + +/* + * Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT + * + * Parameters: + * - mod [in]: an LLVMModuleRef + * - builder [in]: an LLVMBuilderRef, positioned appropriately + * - stream [in]: a value ref to an llvm_inputstreamptr, for the input stream + * - arena [in]: a value ref to an llvm_arenaptr to be used for the malloc + * - r [in]: a value ref to the value to be used to this token + * - mr_out [out]: the return value from make_result() + * + * TODO actually support TT_SINT, inputs other than 8 bit + */ + +void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt, + uint8_t length, uint8_t signedp, + LLVMValueRef r, LLVMValueRef *mr_out) { + /* Set up call to h_arena_malloc() for a new HParsedToken */ + LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0); + LLVMValueRef amalloc_args[] = { ctxt->arena, tok_size }; + /* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */ + LLVMValueRef amalloc = LLVMBuildCall(ctxt->builder, + LLVMGetNamedFunction(ctxt->mod, "h_arena_malloc"), + amalloc_args, 2, "h_arena_malloc"); + /* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */ + LLVMValueRef tok = LLVMBuildBitCast(ctxt->builder, amalloc, ctxt->llvm_parsedtokenptr, "tok"); + + /* + * tok->token_type = signedp ? TT_SINT : TT_UINT; + * + * %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0 + */ + LLVMValueRef toktype = LLVMBuildStructGEP(ctxt->builder, tok, 0, "token_type"); + /* store i32 8, i32* %token_type */ + LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt32Type(), + signedp ? TT_SINT : TT_UINT, 0), toktype); + + /* + * tok->sint = r; + * or + * tok->uint = r; + * + * %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1 + */ + LLVMValueRef tokdata = LLVMBuildStructGEP(ctxt->builder, tok, 1, "token_data"); + /* + * the token_data field is a union, but either an int64_t or a uint64_t in the + * cases we can be called for. + */ + if (length < 64) { + /* Extend needed */ + LLVMValueRef r_ext; + if (signedp) r_ext = LLVMBuildSExt(ctxt->builder, r, LLVMInt64Type(), "r_sext"); + else r_ext = LLVMBuildZExt(ctxt->builder, r, LLVMInt64Type(), "r_zext"); + LLVMBuildStore(ctxt->builder, r_ext, tokdata); + } else { + LLVMBuildStore(ctxt->builder, r, tokdata); + } + /* + * Store the index from the stream into the token + */ + /* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */ + LLVMValueRef tokindex = LLVMBuildStructGEP(ctxt->builder, tok, 2, "t_index"); + /* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */ + LLVMValueRef streamindex = LLVMBuildStructGEP(ctxt->builder, ctxt->stream, 2, "s_index"); + /* %4 = load i64, i64* %s_index */ + /* store i64 %4, i64* %t_index */ + LLVMBuildStore(ctxt->builder, LLVMBuildLoad(ctxt->builder, streamindex, ""), tokindex); + /* Store the bit length into the token */ + LLVMValueRef tokbitlen = LLVMBuildStructGEP(ctxt->builder, tok, 3, "bit_length"); + LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt64Type(), length, 0), tokbitlen); + + /* + * Now call make_result() + * + * %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3) + */ + LLVMValueRef result_args[] = { ctxt->arena, tok }; + LLVMValueRef mr = LLVMBuildCall(ctxt->builder, + LLVMGetNamedFunction(ctxt->mod, "make_result"), + result_args, 2, "make_result"); + + *mr_out = mr; +} + +#endif /* defined(HAMMER_LLVM_BACKEND) */ diff --git a/src/backends/missing.c b/src/backends/missing.c new file mode 100644 index 0000000000000000000000000000000000000000..2a46b57429b08e82d895fb8c7a56ef315e8e6d46 --- /dev/null +++ b/src/backends/missing.c @@ -0,0 +1,23 @@ +#include "missing.h" + +int h_missing_compile(HAllocator* mm__, HParser* parser, const void* params) { + /* Always fail */ + + return -1; +} + +HParseResult *h_missing_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) { + /* Always fail */ + + return NULL; +} + +void h_missing_free(HParser *parser) { + /* No-op */ +} + +HParserBackendVTable h__missing_backend_vtable = { + .compile = h_missing_compile, /* TODO */ + .parse = h_missing_parse, /* TODO */ + .free = h_missing_free, /* TODO */ +}; diff --git a/src/backends/missing.h b/src/backends/missing.h new file mode 100644 index 0000000000000000000000000000000000000000..4efe5f350331a1dcc75dcd894bf1075e8f62bbd8 --- /dev/null +++ b/src/backends/missing.h @@ -0,0 +1,7 @@ +#ifndef HAMMER_BACKENDS_MISSING__H +#define HAMMER_BACKENDS_MISSING__H + +#include "../hammer.h" +#include "../internal.h" + +#endif /* !defined(HAMMER_BACKENDS_MISSING__H) */ diff --git a/src/bindings/dotnet/SConscript b/src/bindings/dotnet/SConscript index 94f874ee41cc4741cff950ef4a88478dcfc06b31..1636c6633546c6aa46a376af65af726f6c0d2e98 100644 --- a/src/bindings/dotnet/SConscript +++ b/src/bindings/dotnet/SConscript @@ -11,6 +11,7 @@ dotnetenv.Append(CCFLAGS=["-fpic", '-DSWIG', '-Wno-all', LIBS=['hammer'], LIBPATH=["../.."], SWIGFLAGS=["-DHAMMER_INTERNAL__NO_STDARG_H", + "-DSWIG2_CSHARP", "-Isrc/", "-csharp", "-dllimport","hammer_dotnet", "-namespace", "Hammer.Internal"]) @@ -27,7 +28,7 @@ csfiles = os.path.join(thisdir, "*.cs") # target to stand in for. hammer_wrap = AlwaysBuild(dotnetenv.Command(['hammer_wrap.c'], swig, ["rm %s/*.cs || true" % (thisdir,), - "swig $SWIGFLAGS $SOURCE"])) + "swig3.0 $SWIGFLAGS $SOURCE"])) libhammer_dotnet = dotnetenv.SharedLibrary(['hammer_dotnet'], hammer_wrap) hammer_dll = AlwaysBuild(dotnetenv.Command(['hammer.dll'], Glob('ext/*.cs'), '$CSC -t:library -unsafe -out:$TARGET %s/*.cs $SOURCE' %(thisdir,))) diff --git a/src/bindings/perl/SConscript b/src/bindings/perl/SConscript index 49b693a7035cabfe1914c0a2fc172d31a07e23dd..8a192a5a3ac05e5b1f83473f13fa3631d252b300 100644 --- a/src/bindings/perl/SConscript +++ b/src/bindings/perl/SConscript @@ -20,7 +20,7 @@ if 'PERL5LIB' in os.environ: swig = ['hammer.i'] -hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig $SWIGFLAGS $SOURCE") +hammer_wrap = perlenv.Command(['hammer_wrap.c', 'hammer.pm'], swig, "swig3.0 $SWIGFLAGS $SOURCE") makefile = perlenv.Command(['Makefile'], ['Makefile.PL'], "perl $SOURCE CC=" + perlenv['ENV']['CC']) targetdir = os.path.dirname(str(hammer_wrap[0].path)) diff --git a/src/bindings/php/SConscript b/src/bindings/php/SConscript index 34728af238c9a1b3ad478737e997921e8a0ff0b8..6791cbcc46d6c4f67fda5c756d46570ee8347c29 100644 --- a/src/bindings/php/SConscript +++ b/src/bindings/php/SConscript @@ -11,7 +11,7 @@ phpenv.Append(LIBS = ['hammer']) phpenv.Append(LIBPATH = ['../../']) swig = ['hammer.i'] -bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE') +bindings_src = phpenv.Command(['hammer.php', 'hammer_wrap.c', 'php_hammer.h'], swig, 'swig3.0 -php -DHAMMER_INTERNAL__NO_STDARG_H -Isrc/ $SOURCE') libhammer_php = phpenv.SharedLibrary('hammer', ['hammer_wrap.c']) Default(swig, bindings_src, libhammer_php) diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript index 5619347ccee631c3142f7a6cd4b2be8a608118a2..383e1c60d63e2f77ad97f122a78d70290b4cdea8 100644 --- a/src/bindings/python/SConscript +++ b/src/bindings/python/SConscript @@ -1,20 +1,34 @@ # -*- python -*- import os, os.path Import('env libhammer_shared testruns targets') -Import('llvm_defines') -Import('llvm_includes') +# LLVM-related flags +if GetOption("use_llvm"): + Import('llvm_defines') + Import('llvm_includes') pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0) swig = pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURCE")) setup = ['setup.py'] pydir = os.path.join(env['BUILD_BASE'], 'src/bindings/python') -define_list = ','.join(llvm_defines) -inc_list = ' '.join(['-I' + e for e in llvm_includes]) -swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../ ' + inc_list -libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], \ - 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --inplace ' + inc_list + \ - ' --define=\"' + define_list + '\" --swig-opts=\"' + swig_opt_list + '\"') +if GetOption("use_llvm"): + define_list = ','.join(llvm_defines + ['HAMMER_LLVM_BACKEND']) + inc_list = ' '.join(['-I' + e for e in llvm_includes]) +else: + define_list = None + inc_list = None + +swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../' +arg_list = 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --swig=swig3.0 --inplace ' +if inc_list: + arg_list = arg_list + inc_list + swig_opt_list = swig_opt_list + ' ' + inc_list +if define_list: + arg_list = arg_list + ' --define=\"' + define_list + '\"' +arg_list = arg_list + ' --swig-opts=\"' + swig_opt_list + '\"' + +libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], arg_list) + Default(libhammer_python) pytestenv = pythonenv.Clone() diff --git a/src/hammer.c b/src/hammer.c index 2a7d5bc5be51059115b1af20a40c6a5de6633f2a..6b881554256414eccb13454b3ce773e133dfd54a 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -31,7 +31,23 @@ static HParserBackendVTable *backends[PB_MAX + 1] = { &h__llk_backend_vtable, &h__lalr_backend_vtable, &h__glr_backend_vtable, + /* + * Brittleness warning! + * + * We're using an enum as an index into this array (don't blame me...) + * so it's important that this array have the same size and order as + * the corresponding enum values in HParserBackend of src/hammer.h. + * Since callers use those enums as numeric constants to select a + * backend, dropping/reordering them breaks binary compatibility. + * If anyone adds any more optional backends in the future, don't + * #ifdef out those enum values in hammer.h, and do provide the + * 'missing' stub backend as an alternative here. + */ +#ifdef HAMMER_LLVM_BACKEND &h__llvm_backend_vtable, +#else + &h__missing_backend_vtable, +#endif }; diff --git a/src/hammer.h b/src/hammer.h index 821924fa7e267009df54885c31a6a03628528de7..b86e4ae04016b10548b4bb92eef02e2a2a4a9b59 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -46,6 +46,10 @@ typedef enum HParserBackend_ { PB_LLk, PB_LALR, PB_GLR, + /* + * PB_LLVM stays even if no LLVM backend compiled in, since these constants + * are exposed to callers. + */ PB_LLVM, PB_MAX = PB_LLVM } HParserBackend; diff --git a/src/internal.h b/src/internal.h index 69e27a2763c9eeed22473c34d6cc8f780d4493ad..2b2d6004266680b5a4a8ae7d20da80506d7082da 100644 --- a/src/internal.h +++ b/src/internal.h @@ -24,11 +24,14 @@ #define HAMMER_INTERNAL__H #include <stdint.h> #include <assert.h> +#include <limits.h> #include <string.h> +#ifdef HAMMER_LLVM_BACKEND #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" #include <llvm-c/Core.h> #pragma GCC diagnostic pop +#endif #include "hammer.h" #include "platform.h" @@ -156,23 +159,90 @@ static inline void h_sarray_clear(HSArray *arr) { // }}} -typedef unsigned int *HCharset; +typedef unsigned int HCharsetWord; +#define CHARSET_WHOLE_WORD_MASK UINT_MAX + +typedef HCharsetWord *HCharset; + +#define CHARSET_BITS_PER_WORD (sizeof(HCharsetWord) * 8) +#define CHARSET_WORDS (256 / CHARSET_BITS_PER_WORD) +#define CHARSET_SIZE (CHARSET_WORDS * sizeof(HCharsetWord)) +#define CHARSET_BIT_IDX_TO_WORD(idx) \ + (((unsigned int)(idx)) / CHARSET_BITS_PER_WORD) +#define CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx) \ + (((unsigned int)(idx)) % CHARSET_BITS_PER_WORD) +#define CHART_WORD_AND_BIT_TO_BIT_IDX(word,bit) \ + ((uint8_t)(CHARSET_BITS_PER_WORD * ((unsigned int)(word)) + \ + ((unsigned int)(bit)))) +#define CHARSET_BIT_POS_IN_WORD_MASK(bit) \ + ((((HCharsetWord)(1)) << (bit)) & CHARSET_WHOLE_WORD_MASK) +/* Mask for all bits below a position */ +#define CHARSET_BIT_MASK_UP_TO_POS(bit) \ + ((CHARSET_BIT_POS_IN_WORD_MASK((bit)) - 1) & CHARSET_WHOLE_WORD_MASK) +/* Mask off all bits above and including a position */ +#define CHARSET_BIT_MASK_FROM_POS(bit) \ + ((~CHARSET_BIT_MASK_UP_TO_POS((bit))) & CHARSET_WHOLE_WORD_MASK) + +static inline HCharset copy_charset(HAllocator *mm__, HCharset in) { + HCharset cs = h_new(HCharsetWord, CHARSET_WORDS); + memcpy(cs, in, CHARSET_SIZE); + return cs; +} static inline HCharset new_charset(HAllocator* mm__) { - HCharset cs = h_new(unsigned int, 256 / (sizeof(unsigned int) * 8)); - memset(cs, 0, 32); // 32 bytes = 256 bits + HCharset cs = h_new(HCharsetWord, CHARSET_WORDS); + memset(cs, 0, CHARSET_SIZE); return cs; } +static inline void charset_complement(HCharset cs) { + for (unsigned int i = 0; i < CHARSET_WORDS; ++i) cs[i] = ~(cs[i]); +} + static inline int charset_isset(HCharset cs, uint8_t pos) { - return !!(cs[pos / (sizeof(*cs)*8)] & (1 << (pos % (sizeof(*cs)*8)))); + return !!(cs[CHARSET_BIT_IDX_TO_WORD(pos)] & + CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos))); +} + +static inline void charset_restrict_to_range(HCharset cs, uint8_t idx_start, uint8_t idx_end) { + HCharsetWord mask; + + if (idx_end < idx_start) { + /* Range is empty, clear the charset */ + memset(cs, 0, CHARSET_SIZE); + } else { + /* Clear below, if any */ + if (CHARSET_BIT_IDX_TO_WORD(idx_start) > 0) { + memset(cs, 0, CHARSET_BIT_IDX_TO_WORD(idx_start) * sizeof(HCharsetWord)); + } + /* Note this partial start/ending word code still works if they are the same word */ + /* Mask partial starting word, if any */ + if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start) != 0) { + mask = CHARSET_BIT_MASK_FROM_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start)); + cs[CHARSET_BIT_IDX_TO_WORD(idx_start)] &= mask; + } + /* Mask partial ending word, if any */ + if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end) != CHARSET_BITS_PER_WORD - 1) { + mask = CHARSET_BIT_MASK_UP_TO_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end)); + mask |= CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end)); + cs[CHARSET_BIT_IDX_TO_WORD(idx_end)] &= mask; + } + /* Clear above, if any */ + if (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1 < CHARSET_WORDS) { + memset(cs + CHARSET_BIT_IDX_TO_WORD(idx_end) + 1, 0, + (CHARSET_WORDS - (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1)) * + sizeof(HCharsetWord)); + } + } } static inline void charset_set(HCharset cs, uint8_t pos, int val) { - cs[pos / (sizeof(*cs)*8)] = + cs[CHARSET_BIT_IDX_TO_WORD(pos)] = val - ? cs[pos / (sizeof(*cs)*8)] | (1 << (pos % (sizeof(*cs)*8))) - : cs[pos / (sizeof(*cs)*8)] & ~(1 << (pos % (sizeof(*cs)*8))); + ? cs[CHARSET_BIT_IDX_TO_WORD(pos)] | CHARSET_BIT_POS_IN_WORD_MASK( + CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos)) + : cs[CHARSET_BIT_IDX_TO_WORD(pos)] & ~CHARSET_BIT_POS_IN_WORD_MASK( + CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos)); } typedef unsigned int HHashValue; @@ -326,7 +396,10 @@ extern HParserBackendVTable h__packrat_backend_vtable; extern HParserBackendVTable h__llk_backend_vtable; extern HParserBackendVTable h__lalr_backend_vtable; extern HParserBackendVTable h__glr_backend_vtable; +extern HParserBackendVTable h__missing_backend_vtable; +#ifdef HAMMER_LLVM_BACKEND extern HParserBackendVTable h__llvm_backend_vtable; +#endif // }}} // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. @@ -418,13 +491,19 @@ struct HCFSequence_ { HCFChoice **items; // last one is NULL }; +#ifdef HAMMER_LLVM_BACKEND +typedef struct HLLVMParserCompileContext_ HLLVMParserCompileContext; +#endif + struct HParserVtable_ { HParseResult* (*parse)(void *env, HParseState *state); bool (*isValidRegular)(void *env); bool (*isValidCF)(void *env); bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean. void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env); - bool (*llvm)(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void *env); +#ifdef HAMMER_LLVM_BACKEND + bool (*llvm)(HLLVMParserCompileContext *ctxt, void *env); +#endif bool higher; // false if primitive }; diff --git a/src/llvm.h b/src/llvm.h deleted file mode 100644 index 369f5729d54c0c0f3e2babec784a887cb0bc824e..0000000000000000000000000000000000000000 --- a/src/llvm.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef HAMMER_LLVM__H -#define HAMMER_LLVM__H - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpedantic" -#include <llvm-c/Core.h> -#pragma GCC diagnostic pop - -LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr; -LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr; - -void h_llvm_make_charset_membership_test(LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder, - LLVMValueRef r, HCharset cs, - LLVMBasicBlockRef yes, LLVMBasicBlockRef no); -void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder, - LLVMValueRef stream, LLVMValueRef arena, - LLVMValueRef r, LLVMValueRef *mr_out); - -#endif // #ifndef HAMMER_LLVM__H diff --git a/src/parsers/bits.c b/src/parsers/bits.c index ae3243006c96fafc973bd1ca1e00be17cefab113..bd9fa8e064e7431a1b84a3de520ce35fc43fd7f7 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -1,10 +1,12 @@ #include <assert.h> +#ifdef HAMMER_LLVM_BACKEND #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" #include <llvm-c/Core.h> #pragma GCC diagnostic pop +#include "../backends/llvm/llvm.h" +#endif #include "parser_internal.h" -#include "../llvm.h" struct bits_env { uint8_t length; @@ -22,76 +24,46 @@ static HParseResult* parse_bits(void* env, HParseState *state) { return make_result(state->arena, result); } -static bool bits_llvm(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void* env) { - /* %result = alloca %struct.HParsedToken_*, align 8 */ - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wunused-variable" - LLVMValueRef result = LLVMBuildAlloca(builder, llvm_parsedtoken, "result"); - #pragma GCC diagnostic pop - /* store i8* %env, i8** %1, align 8 */ - /* store %struct.HParseState_* %state, %struct.HParseState_** %2, align 8 */ - /* %3 = load i8** %1, align 8 */ - /* %4 = bitcast i8* %3 to %struct.bits_env* */ - /* store %struct.bits_env* %4, %struct.bits_env** %env, align 8 */ - /* %5 = load %struct.HParseState_** %2, align 8 */ - /* %6 = getelementptr inbounds %struct.HParseState_* %5, i32 0, i32 2 */ - /* %7 = load %struct.HArena_** %6, align 8 */ - /* %8 = call noalias i8* @h_arena_malloc(%struct.HArena_* %7, i64 48) */ - /* %9 = bitcast i8* %8 to %struct.HParsedToken_* */ - /* store %struct.HParsedToken_* %9, %struct.HParsedToken_** %result, align 8 */ - /* %10 = load %struct.bits_env** %env_, align 8 */ - /* %11 = getelementptr inbounds %struct.bits_env* %10, i32 0, i32 1 */ - /* %12 = load i8* %11, align 1 */ - /* %13 = zext i8 %12 to i32 */ - /* %14 = icmp ne i32 %13, 0 */ - /* %15 = select i1 %14, i32 4, i32 8 */ - /* %16 = load %struct.HParsedToken_** %result, align 8 */ - /* %17 = getelementptr inbounds %struct.HParsedToken_* %16, i32 0, i32 0 */ - /* store i32 %15, i32* %17, align 4 */ - /* %18 = load %struct.bits_env** %env_, align 8 */ - /* %19 = getelementptr inbounds %struct.bits_env* %18, i32 0, i32 1 */ - /* %20 = load i8* %19, align 1 */ - /* %21 = icmp ne i8 %20, 0 */ - /* br i1 %21, label %22, label %33 */ - - /* ; <label>:22 ; preds = %0 */ - /* %23 = load %struct.HParseState_** %2, align 8 */ - /* %24 = getelementptr inbounds %struct.HParseState_* %23, i32 0, i32 1 */ - /* %25 = load %struct.bits_env** %env_, align 8 */ - /* %26 = getelementptr inbounds %struct.bits_env* %25, i32 0, i32 0 */ - /* %27 = load i8* %26, align 1 */ - /* %28 = zext i8 %27 to i32 */ - /* %29 = call i64 @h_read_bits(%struct.HInputStream_* %24, i32 %28, i8 signext 1) */ - /* %30 = load %struct.HParsedToken_** %result, align 8 */ - /* %31 = getelementptr inbounds %struct.HParsedToken_* %30, i32 0, i32 1 */ - /* %32 = bitcast %union.anon* %31 to i64* */ - /* store i64 %29, i64* %32, align 8 */ - /* br label %44 */ - - /* ; <label>:33 ; preds = %0 */ - /* %34 = load %struct.HParseState_** %2, align 8 */ - /* %35 = getelementptr inbounds %struct.HParseState_* %34, i32 0, i32 1 */ - /* %36 = load %struct.bits_env** %env_, align 8 */ - /* %37 = getelementptr inbounds %struct.bits_env* %36, i32 0, i32 0 */ - /* %38 = load i8* %37, align 1 */ - /* %39 = zext i8 %38 to i32 */ - /* %40 = call i64 @h_read_bits(%struct.HInputStream_* %35, i32 %39, i8 signext 0) */ - /* %41 = load %struct.HParsedToken_** %result, align 8 */ - /* %42 = getelementptr inbounds %struct.HParsedToken_* %41, i32 0, i32 1 */ - /* %43 = bitcast %union.anon* %42 to i64* */ - /* store i64 %40, i64* %43, align 8 */ - /* br label %44 */ - - /* ; <label>:44 ; preds = %33, %22 */ - /* %45 = load %struct.HParseState_** %2, align 8 */ - /* %46 = getelementptr inbounds %struct.HParseState_* %45, i32 0, i32 2 */ - /* %47 = load %struct.HArena_** %46, align 8 */ - /* %48 = load %struct.HParsedToken_** %result, align 8 */ - /* %49 = call %struct.HParseResult_* @make_result(%struct.HArena_* %47, %struct.HParsedToken_* %48) */ - /* ret %struct.HParseResult_* %49 */ +#ifdef HAMMER_LLVM_BACKEND + +static bool bits_llvm(HLLVMParserCompileContext *ctxt, void* env) { + /* Emit LLVM IR to parse ((struct bits_env *)env)->length bits */ + + if (!ctxt) return false; + + struct bits_env *env_ = env; + /* Error out on unsupported length */ + if (env_->length > 64 || env_->length == 0) return false; + /* Set up params for call to h_read_bits */ + LLVMValueRef bits_args[3]; + bits_args[0] = ctxt->stream; + bits_args[1] = LLVMConstInt(LLVMInt32Type(), env_->length, 0); + bits_args[2] = LLVMConstInt(LLVMInt8Type(), env_->signedp ? 1 : 0, 0); + + /* Set up basic blocks: entry, success and failure branches, then exit */ + LLVMBasicBlockRef bits_bb = LLVMAppendBasicBlock(ctxt->func, "bits"); + + /* Basic block: entry */ + LLVMBuildBr(ctxt->builder, bits_bb); + LLVMPositionBuilderAtEnd(ctxt->builder, bits_bb); + + /* Call to h_read_bits() */ + // %read_bits = call i64 @h_read_bits(%struct.HInputStream_* %8, i32 env_->length, i8 signext env_->signedp) + LLVMValueRef bits = LLVMBuildCall(ctxt->builder, + LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits"); + + /* Make an HParseResult out of it */ + LLVMValueRef mr; + h_llvm_make_tt_suint(ctxt, env_->length, env_->signedp, bits, &mr); + + /* Return mr */ + LLVMBuildRet(ctxt->builder, mr); + return true; } +#endif + static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) { // signedp == NULL iff unsigned bool signedp = (signedp_p != NULL); @@ -177,7 +149,9 @@ static const HParserVtable bits_vt = { .isValidCF = h_true, .desugar = desugar_bits, .compile_to_rvm = bits_ctrvm, +#ifdef HAMMER_LLVM_BACKEND .llvm = bits_llvm, +#endif .higher = false, }; diff --git a/src/parsers/ch.c b/src/parsers/ch.c index 1c396a2f3c8c2e2e8a7433964c397f8776688462..e22ed8c105673fd7b989b60e623e83ee9bead050 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -1,11 +1,13 @@ #include <stdint.h> #include <assert.h> +#ifdef HAMMER_LLVM_BACKEND #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" #include <llvm-c/Core.h> #pragma GCC diagnostic pop +#include "../backends/llvm/llvm.h" +#endif #include "parser_internal.h" -#include "../llvm.h" static HParseResult* parse_ch(void* env, HParseState *state) { uint8_t c = (uint8_t)(uintptr_t)(env); @@ -46,77 +48,83 @@ static bool ch_ctrvm(HRVMProg *prog, void* env) { return true; } -static bool ch_llvm(LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void* env) { +#ifdef HAMMER_LLVM_BACKEND + +static bool ch_llvm(HLLVMParserCompileContext *ctxt, void* env) { // Build a new LLVM function to parse a character // Set up params for calls to h_read_bits() and h_arena_malloc() LLVMValueRef bits_args[3]; - LLVMValueRef stream = LLVMGetFirstParam(func); - stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream"); - bits_args[0] = stream; + bits_args[0] = ctxt->stream; bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0); bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0); - LLVMValueRef arena = LLVMGetLastParam(func); // Set up basic blocks: entry, success and failure branches, then exit - LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "ch_entry"); - LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "ch_success"); - LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "ch_end"); + LLVMBasicBlockRef entry = LLVMAppendBasicBlock(ctxt->func, "ch_entry"); + LLVMBasicBlockRef success = LLVMAppendBasicBlock(ctxt->func, "ch_success"); + LLVMBasicBlockRef end = LLVMAppendBasicBlock(ctxt->func, "ch_end"); // Basic block: entry - LLVMPositionBuilderAtEnd(builder, entry); + LLVMBuildBr(ctxt->builder, entry); + LLVMPositionBuilderAtEnd(ctxt->builder, entry); // Call to h_read_bits() // %read_bits = call i64 @h_read_bits(%struct.HInputStream_* %8, i32 8, i8 signext 0) - LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits"); + LLVMValueRef bits = LLVMBuildCall(ctxt->builder, + LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits"); // %2 = trunc i64 %read_bits to i8 - LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // do we actually need this? + LLVMValueRef r = LLVMBuildTrunc(ctxt->builder, + bits, LLVMInt8Type(), ""); // do we actually need this? // Check if h_read_bits succeeded // %"c == r" = icmp eq i8 -94, %2 ; the -94 comes from c_ uint8_t c_ = (uint8_t)(uintptr_t)(env); LLVMValueRef c = LLVMConstInt(LLVMInt8Type(), c_, 0); - LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, c, r, "c == r"); + LLVMValueRef icmp = LLVMBuildICmp(ctxt->builder, LLVMIntEQ, c, r, "c == r"); // Branch so success or failure basic block, as appropriate // br i1 %"c == r", label %ch_success, label %ch_fail - LLVMBuildCondBr(builder, icmp, success, end); + LLVMBuildCondBr(ctxt->builder, icmp, success, end); // Basic block: success - LLVMPositionBuilderAtEnd(builder, success); + LLVMPositionBuilderAtEnd(ctxt->builder, success); /* Make a token */ LLVMValueRef mr; - h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr); + h_llvm_make_tt_suint(ctxt, 8, 0, r, &mr); // br label %ch_end - LLVMBuildBr(builder, end); + LLVMBuildBr(ctxt->builder, end); // Basic block: end - LLVMPositionBuilderAtEnd(builder, end); + LLVMPositionBuilderAtEnd(ctxt->builder, end); // %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ] - LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv"); + LLVMValueRef rv = LLVMBuildPhi(ctxt->builder, ctxt->llvm_parseresultptr, "rv"); LLVMBasicBlockRef rv_phi_incoming_blocks[] = { success, entry }; LLVMValueRef rv_phi_incoming_values[] = { mr, - LLVMConstNull(llvm_parseresultptr) + LLVMConstNull(ctxt->llvm_parseresultptr) }; LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2); // ret %struct.HParseResult_.3* %rv - LLVMBuildRet(builder, rv); + LLVMBuildRet(ctxt->builder, rv); return true; } +#endif /* defined(HAMMER_LLVM_BACKEND) */ + static const HParserVtable ch_vt = { .parse = parse_ch, .isValidRegular = h_true, .isValidCF = h_true, .desugar = desugar_ch, .compile_to_rvm = ch_ctrvm, +#ifdef HAMMER_LLVM_BACKEND .llvm = ch_llvm, +#endif .higher = false, }; diff --git a/src/parsers/charset.c b/src/parsers/charset.c index 2f73da722408c1575eab883afd0242eef63eacd2..907fe927a79184c3c292a96630a49fc83fc0b1e3 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -1,12 +1,14 @@ #include <assert.h> #include <string.h> #include "../internal.h" +#ifdef HAMMER_LLVM_BACKEND #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" #include <llvm-c/Core.h> #pragma GCC diagnostic pop +#include "../backends/llvm/llvm.h" +#endif /* defined(HAMMER_LLVM_BACKEND) */ #include "parser_internal.h" -#include "../llvm.h" static HParseResult* parse_charset(void *env, HParseState *state) { uint8_t in = h_read_bits(&state->input_stream, 8, false); @@ -75,82 +77,89 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) { return true; } -static bool cs_llvm(LLVMBuilderRef builder, LLVMValueRef func, - LLVMModuleRef mod, void* env) { +#ifdef HAMMER_LLVM_BACKEND + +static bool cs_llvm(HLLVMParserCompileContext *ctxt, void* env) { /* * LLVM to build a function to parse a charset; the args are a stream and an * arena. */ + bool ok; - LLVMValueRef stream = LLVMGetFirstParam(func); - stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream"); - LLVMValueRef arena = LLVMGetLastParam(func); + if (!ctxt) return false; /* Set up our basic blocks */ - LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "cs_entry"); - LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "cs_success"); - LLVMBasicBlockRef fail = LLVMAppendBasicBlock(func, "cs_fail"); - LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "cs_end"); + LLVMBasicBlockRef entry = LLVMAppendBasicBlock(ctxt->func, "cs_entry"); + LLVMBasicBlockRef success = LLVMAppendBasicBlock(ctxt->func, "cs_success"); + LLVMBasicBlockRef fail = LLVMAppendBasicBlock(ctxt->func, "cs_fail"); + LLVMBasicBlockRef end = LLVMAppendBasicBlock(ctxt->func, "cs_end"); /* Basic block: entry */ - LLVMPositionBuilderAtEnd(builder, entry); + LLVMBuildBr(ctxt->builder, entry); + LLVMPositionBuilderAtEnd(ctxt->builder, entry); /* First we read the char */ LLVMValueRef bits_args[3]; - bits_args[0] = stream; + bits_args[0] = ctxt->stream; bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0); bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0); - LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits"); - LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm()) + LLVMValueRef bits = LLVMBuildCall(ctxt->builder, + LLVMGetNamedFunction(ctxt->mod, "h_read_bits"), bits_args, 3, "read_bits"); + LLVMValueRef r = + LLVMBuildTrunc(ctxt->builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm()) /* We have a char, need to check if it's in the charset */ HCharset cs = (HCharset)env; /* Branch to either success or end, conditional on whether r is in cs */ - h_llvm_make_charset_membership_test(mod, func, builder, r, cs, success, fail); + ok = h_llvm_make_charset_membership_test(ctxt, r, cs, success, fail); /* Basic block: success */ - LLVMPositionBuilderAtEnd(builder, success); + LLVMPositionBuilderAtEnd(ctxt->builder, success); LLVMValueRef mr; - h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr); + h_llvm_make_tt_suint(ctxt, 8, 0, r, &mr); /* br label %ch_end */ - LLVMBuildBr(builder, end); + LLVMBuildBr(ctxt->builder, end); /* Basic block: fail */ - LLVMPositionBuilderAtEnd(builder, fail); + LLVMPositionBuilderAtEnd(ctxt->builder, fail); /* * We just branch straight to end; this exists so that the phi node in * end knows where all the incoming edges are from, rather than needing * some basic block constructed in h_llvm_make_charset_membership_test() */ - LLVMBuildBr(builder, end); + LLVMBuildBr(ctxt->builder, end); /* Basic block: end */ - LLVMPositionBuilderAtEnd(builder, end); + LLVMPositionBuilderAtEnd(ctxt->builder, end); // %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ] - LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv"); + LLVMValueRef rv = LLVMBuildPhi(ctxt->builder, ctxt->llvm_parseresultptr, "rv"); LLVMBasicBlockRef rv_phi_incoming_blocks[] = { success, fail }; LLVMValueRef rv_phi_incoming_values[] = { mr, - LLVMConstNull(llvm_parseresultptr) + LLVMConstNull(ctxt->llvm_parseresultptr) }; LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2); // ret %struct.HParseResult_.3* %rv - LLVMBuildRet(builder, rv); + LLVMBuildRet(ctxt->builder, rv); - return true; + return ok; } +#endif /* defined(HAMMER_LLVM_BACKEND) */ + static const HParserVtable charset_vt = { .parse = parse_charset, .isValidRegular = h_true, .isValidCF = h_true, .desugar = desugar_charset, .compile_to_rvm = cs_ctrvm, +#ifdef HAMMER_LLVM_BACKEND .llvm = cs_llvm, +#endif .higher = false, }; diff --git a/src/t_parser.c b/src/t_parser.c index f7c4baf7c0b59342949b3e5b0a5ce1d1b913ac2b..304362e49c61775cdef9edb8c459365695e5af5e 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -21,10 +21,19 @@ static void test_ch(gconstpointer backend) { } static void test_ch_range(gconstpointer backend) { - const HParser *range_ = h_ch_range('a', 'c'); - - g_check_parse_match(range_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); - g_check_parse_failed(range_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1); + const HParser *range_1 = h_ch_range('a', 'c'); + const HParser *range_2 = h_ch_range('a', 'z'); + const HParser *range_3 = h_ch_range('A', 'z'); + const HParser *range_all = h_ch_range(0, 255); + + g_check_parse_match(range_1, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); + g_check_parse_failed(range_1, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1); + g_check_parse_match(range_2, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); + g_check_parse_failed(range_2, (HParserBackend)GPOINTER_TO_INT(backend), "C", 1); + g_check_parse_match(range_3, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42"); + g_check_parse_failed(range_3, (HParserBackend)GPOINTER_TO_INT(backend), "2", 1); + /* range_all never fails anything */ + g_check_parse_match(range_all, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42"); } //@MARK_START @@ -213,18 +222,71 @@ static void test_action(gconstpointer backend) { static void test_in(gconstpointer backend) { uint8_t options[3] = { 'a', 'b', 'c' }; + uint8_t odds[128]; + uint8_t _1_mod_4[64]; + uint8_t scattered[3] = { 'A', 'b', 'z' }; + int i; + const HParser *in_ = h_in(options, 3); g_check_parse_match(in_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); g_check_parse_failed(in_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1); + for (i = 0; i < 128; ++i) odds[i] = (uint8_t)(2*i + 1); + const HParser *odds_ = h_in(odds, 128); + g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1, "u0x63"); + g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1, "u0x45"); + g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1); + g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1); + + for (i = 0; i < 64; ++i) _1_mod_4[i] = (uint8_t)(4*i + 1); + const HParser *_1_mod_4_ = h_in(_1_mod_4, 64); + g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1, "u0x45"); + g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1); + g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1); + g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1); + + const HParser *scattered_ = h_in(scattered, 3); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1, "u0x41"); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "z", 1, "u0x7a"); + g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "y", 1); + g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1); } static void test_not_in(gconstpointer backend) { uint8_t options[3] = { 'a', 'b', 'c' }; + uint8_t odds[128]; + uint8_t _1_mod_4[64]; + uint8_t scattered[3] = { 'A', 'b', 'z' }; + int i; + const HParser *not_in_ = h_not_in(options, 3); g_check_parse_match(not_in_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "u0x64"); g_check_parse_failed(not_in_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1); + for (i = 0; i < 128; ++i) odds[i] = (uint8_t)(2*i + 1); + const HParser *odds_ = h_not_in(odds, 128); + g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "d", 1, "u0x64"); + g_check_parse_match(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1, "u0x46"); + g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "c", 1); + g_check_parse_failed(odds_, (HParserBackend)GPOINTER_TO_INT(backend), "E", 1); + + for (i = 0; i < 64; ++i) _1_mod_4[i] = (uint8_t)(4*i + 1); + const HParser *_1_mod_4_ = h_not_in(_1_mod_4, 64); + g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1, "u0x62"); + g_check_parse_match(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "F", 1, "u0x46"); + g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "e", 1); + g_check_parse_failed(_1_mod_4_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1); + + const HParser *scattered_ = h_not_in(scattered, 3); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "B", 1, "u0x42"); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_match(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "y", 1, "u0x79"); + g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "A", 1); + g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "b", 1); + g_check_parse_failed(scattered_, (HParserBackend)GPOINTER_TO_INT(backend), "z", 1); + } static void test_end_p(gconstpointer backend) { @@ -962,6 +1024,18 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length); g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position); +#ifdef HAMMER_LLVM_BACKEND g_test_add_data_func("/core/parser/llvm/ch", GINT_TO_POINTER(PB_LLVM), test_ch); g_test_add_data_func("/core/parser/llvm/ch_range", GINT_TO_POINTER(PB_LLVM), test_ch_range); + g_test_add_data_func("/core/parser/llvm/int64", GINT_TO_POINTER(PB_LLVM), test_int64); + g_test_add_data_func("/core/parser/llvm/int32", GINT_TO_POINTER(PB_LLVM), test_int32); + g_test_add_data_func("/core/parser/llvm/int16", GINT_TO_POINTER(PB_LLVM), test_int16); + g_test_add_data_func("/core/parser/llvm/int8", GINT_TO_POINTER(PB_LLVM), test_int8); + g_test_add_data_func("/core/parser/llvm/uint64", GINT_TO_POINTER(PB_LLVM), test_uint64); + g_test_add_data_func("/core/parser/llvm/uint32", GINT_TO_POINTER(PB_LLVM), test_uint32); + g_test_add_data_func("/core/parser/llvm/uint16", GINT_TO_POINTER(PB_LLVM), test_uint16); + g_test_add_data_func("/core/parser/llvm/uint8", GINT_TO_POINTER(PB_LLVM), test_uint8); + g_test_add_data_func("/core/parser/llvm/in", GINT_TO_POINTER(PB_LLVM), test_in); + g_test_add_data_func("/core/parser/llvm/not_in", GINT_TO_POINTER(PB_LLVM), test_not_in); +#endif /* defined(HAMMER_LLVM_BACKEND) */ }