Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (104)
Showing
with 2044 additions and 28 deletions
......@@ -25,3 +25,6 @@ libhammer.pc
*.os
*.pyc
*.gem
*.ll
libhammer.pc
TestResult.xml
......@@ -102,6 +102,7 @@ matrix:
env: BINDINGS=cpp CC=clang
before_install:
- sudo apt-get update -qq
- sudo apt-get install llvm-3.8
- sudo apt-get install -y lcov
- gem install coveralls-lcov
- if [ "$BINDINGS" != "none" ]; then sudo sh -c 'echo "deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse" >> /etc/apt/sources.list'; sudo apt-get update -qq; sudo apt-get install -yqq swig3.0/trusty-backports; fi
......
......@@ -6,14 +6,19 @@ import os
import os.path
import platform
import sys
from distutils.version import LooseVersion
import re
import subprocess
default_install_dir='/usr/local'
if platform.system() == 'Windows':
default_install_dir = 'build' # no obvious place for installation on Windows
vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(PathVariable('DESTDIR', 'Root directory to install in (useful for packaging scripts)', None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', 'Where to install in the FHS', default_install_dir, PathVariable.PathAccept))
vars.Add(PathVariable('libdir', "Where to install libraries", None, PathVariable.PathAccept))
vars.Add(PathVariable('includedir', "Where to install headers", None, PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
vars.Add('python', 'Python interpreter', 'python')
......@@ -51,13 +56,20 @@ if 'DESTDIR' in env:
print('--!!-- you want; files will be installed in', file=sys.stderr)
print('--!!-- %s' % (calcInstallPath('$prefix'),), file=sys.stderr)
env['libpath'] = calcInstallPath('$prefix', 'lib')
env['incpath'] = calcInstallPath('$prefix', 'include', 'hammer')
env['parsersincpath'] = calcInstallPath('$prefix', 'include', 'hammer', 'parsers')
env['backendsincpath'] = calcInstallPath('$prefix', 'include', 'hammer', 'backends')
env['pkgconfigpath'] = calcInstallPath('$prefix', 'lib', 'pkgconfig')
env.ScanReplace('libhammer.pc.in')
if 'includedir' in env:
env['incpath'] = calcInstallPath("$includedir", "hammer")
else:
env['includedir'] = os.path.abspath(os.path.join(*map(env.subst, ["$prefix", "include"])))
env['incpath'] = calcInstallPath("$prefix", "include", "hammer")
if 'libdir' in env:
env['libpath'] = calcInstallPath("$libdir")
env['pkgconfigpath'] = calcInstallPath("$libdir", "pkgconfig")
else:
env['libpath'] = calcInstallPath("$prefix", "lib")
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env['libdir'] = os.path.abspath(os.path.join(*map(env.subst, ["$prefix", "lib"])))
env['parsersincpath'] = calcInstallPath("$includedir", "hammer", "parsers")
env['backendsincpath'] = calcInstallPath("$includedir", "hammer", "backends")
AddOption('--variant',
dest='variant',
......@@ -85,6 +97,18 @@ AddOption('--tests',
action='store_true',
help='Build tests')
AddOption("--disable-llvm-backend",
dest="use_llvm",
default=False,
action="store_false",
help="Disable the LLVM backend (and don't require LLVM library dependencies)")
AddOption("--enable-llvm-backend",
dest="use_llvm",
default=False,
action="store_true",
help="Enable the LLVM backend (and require LLVM library dependencies)")
env['CC'] = os.getenv('CC') or env['CC']
env['CXX'] = os.getenv('CXX') or env['CXX']
......@@ -119,20 +143,24 @@ elif env['PLATFORM'] == 'win32':
else:
env.MergeFlags('-lrt')
if GetOption("use_llvm"):
# Overridable default path to llvm-config
env['LLVM_CONFIG'] = "llvm-config"
env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"]
env.MergeFlags("-DHAMMER_LLVM_BACKEND")
if GetOption('coverage'):
env.Append(CFLAGS=['--coverage'],
CXXFLAGS=['--coverage'],
LDFLAGS=['--coverage'])
if env['CC'] == 'gcc':
env.Append(LIBS=['gcov'])
else:
env.ParseConfig('llvm-config --ldflags')
dbg = env.Clone(VARIANT='debug')
if env['CC'] == 'cl':
dbg.Append(CCFLAGS=['/Z7'])
else:
dbg.Append(CCFLAGS=['-g'])
dbg.MergeFlags("-g -O0")
opt = env.Clone(VARIANT='opt')
if env['CC'] == 'cl':
......@@ -150,17 +178,148 @@ env['ENV'].update(x for x in os.environ.items() if x[0].startswith('CCC_'))
#rootpath = env['ROOTPATH'] = os.path.abspath('.')
#env.Append(CPPPATH=os.path.join('#', 'hammer'))
if GetOption("use_llvm"):
# Set up LLVM config stuff to export
# some llvm versions are old and will not work; some require --system-libs
# with llvm-config, and some will break if given it
llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"):
print("This LLVM version %s is too old" % llvm_config_version[0].strip())
Exit(1)
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \
LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"):
llvm_system_libs_flag = "--system-libs"
else:
llvm_system_libs_flag = ""
# Only keep one copy of this
llvm_required_components = "core executionengine mcjit analysis x86codegen x86info"
# Stubbing this out so we can implement static-only mode if needed later
llvm_use_shared = True
# Can we ask for shared/static from llvm-config?
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"):
# Nope
llvm_linkage_type_flag = ""
llvm_use_computed_shared_lib_name = True
else:
# Woo, they finally fixed the dumb
llvm_use_computed_shared_lib_name = False
if llvm_use_shared:
llvm_linkage_type_flag = "--link-shared"
else:
llvm_linkage_type_flag = "--link-static"
if llvm_use_computed_shared_lib_name:
# Okay, pull out the major and minor version numbers (barf barf)
p = re.compile("^(\d+)\.(\d+).*$")
m = p.match(llvm_config_version[0])
if m:
llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2)))
else:
print("Couldn't compute shared library name from LLVM version '%s', but needed to" % \
llvm_config_version[0])
Exit(1)
else:
# We won't be needing it
llvm_computed_shared_lib_name = None
# llvm-config 'helpfully' supplies -g and -O flags; educate it with this
# custom ParseConfig function arg; make it a class with a method so we can
# pass it around with scons export/import
class LLVMConfigSanitizer:
def sanitize(self, env, cmd, unique=1):
# cmd is output from llvm-config
flags = cmd.split()
# match -g or -O flags
p = re.compile("^-[gO].*$")
filtered_flags = [flag for flag in flags if not p.match(flag)]
filtered_cmd = ' '.join(filtered_flags)
# print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd)
env.MergeFlags(filtered_cmd, unique)
llvm_config_sanitizer = LLVMConfigSanitizer()
# LLVM defines, which the python bindings need
try:
llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
flags = llvm_config_cflags[0].split()
# get just the -D ones
p = re.compile("^-D(.*)$")
llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)]
except:
print("%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"])
Exit(1)
# Get the llvm includedir, which the python bindings need
try:
llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
llvm_includes = llvm_config_includes[0].splitlines()
except:
print("%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"])
Exit(1)
# This goes here so we already know all the LLVM crap
# Make a fresh environment to parse the config into, to read out just LLVM stuff
llvm_dummy_env = Environment()
# Get LLVM stuff into LIBS/LDFLAGS
llvm_dummy_env.ParseConfig('%s --ldflags %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
llvm_dummy_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
llvm_dummy_env.Append(LIBS=['stdc++', ], )
#endif GetOption("use_llvm")
# The .pc.in file has substs for llvm_lib_flags and llvm_libdir_flags, so if
# we aren't using LLVM, set them to the empty string
if GetOption("use_llvm"):
env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS')
env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS')
else:
env['llvm_libdir_flags'] = ""
env['llvm_lib_flags'] = ""
pkgconfig = env.ScanReplace('libhammer.pc.in')
Default(pkgconfig)
env.Install("$pkgconfigpath", pkgconfig)
testruns = []
targets = ['$libpath',
'$incpath',
'$parsersincpath',
'$backendsincpath',
'$pkgconfigpath']
targets = ["$libpath",
"$incpath",
"$parsersincpath",
"$backendsincpath",
"$pkgconfigpath"]
Export('env')
Export('testruns')
Export('targets')
# LLVM-related flags
if GetOption("use_llvm"):
Export('llvm_computed_shared_lib_name')
Export('llvm_config_sanitizer')
Export('llvm_config_version')
Export('llvm_defines')
Export('llvm_includes')
Export('llvm_linkage_type_flag')
Export('llvm_required_components')
Export('llvm_system_libs_flag')
Export('llvm_use_computed_shared_lib_name')
Export('llvm_use_shared')
if not GetOption('in_place'):
env['BUILD_BASE'] = 'build/$VARIANT'
......
prefix=${prefix}
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib
libdir=${libdir}
Name: libhammer
Description: The Hammer parsing library
Version: 0.9.0
Cflags: -I${includedir}
Libs: -L${libdir} -lhammer
Libs.private: ${llvm_libdir_flags} ${llvm_lib_flags}
......@@ -3,8 +3,21 @@
from __future__ import absolute_import, division, print_function
import os.path
from distutils.version import LooseVersion
import re
import subprocess
Import('env testruns')
# LLVM-related flags
if GetOption("use_llvm"):
Import('llvm_computed_shared_lib_name')
Import('llvm_config_sanitizer')
Import('llvm_config_version')
Import('llvm_linkage_type_flag')
Import('llvm_required_components')
Import('llvm_system_libs_flag')
Import('llvm_use_computed_shared_lib_name')
Import('llvm_use_shared')
dist_headers = [
'hammer.h',
......@@ -21,7 +34,8 @@ parsers_headers = [
backends_headers = [
'backends/regex.h',
'backends/contextfree.h'
'backends/contextfree.h',
'backends/missing.h'
]
parsers = ['parsers/%s.c'%s for s in
......@@ -55,7 +69,12 @@ parsers = ['parsers/%s.c'%s for s in
'value']]
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', "missing"]]
# Add LLVM backend if enabled
if GetOption("use_llvm"):
llvm_backend_files = ['llvm.c', 'llvm_bytes.c', 'llvm_charset.c', 'llvm_suint.c']
backends = backends + ['backends/llvm/%s' % s for s in llvm_backend_files]
misc_hammer_parts = [
'allocator.c',
......@@ -89,7 +108,6 @@ ctests = ['t_benchmark.c',
't_mm.c',
't_regression.c']
static_library_name = 'hammer'
build_shared_library=True
if env['PLATFORM'] == 'win32':
......@@ -98,8 +116,35 @@ if env['PLATFORM'] == 'win32':
# prevent collision between .lib from dll and .lib for static lib
static_library_name = 'hammer_s'
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
if GetOption("use_llvm"):
env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize)
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
# Use a cloned env for the shared library so we can have library dependencies
shared_env = env.Clone()
if GetOption("use_llvm"):
# Get LLVM stuff into LIBS/LDFLAGS
shared_env.ParseConfig('%s --ldflags %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, \
llvm_linkage_type_flag), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
shared_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
else:
# Just grab the statics regardless of version
shared_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.'])
libhammer_shared = shared_env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
if build_shared_library:
Default(libhammer_shared, libhammer_static)
env.Install('$libpath', [libhammer_static, libhammer_shared])
......@@ -110,13 +155,31 @@ else:
env.Install('$incpath', dist_headers)
env.Install('$parsersincpath', parsers_headers)
env.Install('$backendsincpath', backends_headers)
env.Install('$pkgconfigpath', '../../../libhammer.pc')
if GetOption('with_tests'):
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
testenv.Append(LIBS=['hammer'])
testenv.Prepend(LIBPATH=['.'])
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
if GetOption("use_llvm"):
# Get LLVM stuff into LIBS/LDFLAGS
testenv.ParseConfig('%s --ldflags %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, \
llvm_linkage_type_flag), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
testenv.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
testenv.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
else:
# Just grab the statics regardless of version
testenv.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS='--coverage' if testenv.GetOption('coverage') else None)
ctest = Alias('testc', [ctestexec], ''.join(['env LD_LIBRARY_PATH=', os.path.dirname(ctestexec[0].path), ' ', ctestexec[0].path]))
AlwaysBuild(ctest)
......
#ifdef HAMMER_LLVM_BACKEND
#include <llvm-c/Analysis.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include <llvm-c/ExecutionEngine.h>
#include "../../internal.h"
#include "llvm.h"
typedef struct HLLVMParser_ {
LLVMModuleRef mod;
LLVMValueRef func;
LLVMExecutionEngineRef engine;
LLVMBuilderRef builder;
HLLVMParserCompileContext *compile_ctxt;
} HLLVMParser;
HParseResult* make_result(HArena *arena, HParsedToken *tok) {
HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult));
ret->ast = tok;
ret->arena = arena;
ret->bit_length = 0; // This way it gets overridden in h_do_parse
return ret;
}
void h_llvm_declare_common(HLLVMParserCompileContext *ctxt) {
#if SIZE_MAX == UINT64_MAX
ctxt->llvm_size_t = LLVMInt64Type();
#elif SIZE_MAX == UINT32_MAX
ctxt->llvm_size_t = LLVMInt32Type();
#else
#error "SIZE_MAX is not consistent with either 64 or 32-bit platform, couldn't guess LLVM type for size_t"
#endif
#if UINTPTR_MAX == UINT64_MAX
ctxt->llvm_intptr_t = LLVMInt64Type();
#elif UINTPTR_MAX == UINT32_MAX
ctxt->llvm_intptr_t = LLVMInt32Type();
#else
#error "UINTPTR_MAX is not consistent with either 64 or 32-bit platform, couldn't guess LLVM type for intptr"
#endif
ctxt->llvm_inputstream = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HInputStream_");
LLVMTypeRef llvm_inputstream_struct_types[] = {
LLVMPointerType(LLVMInt8Type(), 0),
LLVMInt64Type(),
LLVMInt64Type(),
LLVMInt64Type(),
LLVMInt8Type(),
LLVMInt8Type(),
LLVMInt8Type(),
LLVMInt8Type(),
LLVMInt8Type()
};
LLVMStructSetBody(ctxt->llvm_inputstream, llvm_inputstream_struct_types, 9, 0);
ctxt->llvm_inputstreamptr = LLVMPointerType(ctxt->llvm_inputstream, 0);
ctxt->llvm_arena = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HArena_");
ctxt->llvm_arenaptr = LLVMPointerType(ctxt->llvm_arena, 0);
ctxt->llvm_parsedtoken = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParsedToken_");
LLVMTypeRef llvm_parsedtoken_struct_types[] = {
LLVMInt32Type(), // actually an enum value
LLVMInt64Type(), // actually this is a union; the largest thing in it is 64 bits
ctxt->llvm_size_t,
ctxt->llvm_size_t,
LLVMInt8Type()
};
LLVMStructSetBody(ctxt->llvm_parsedtoken, llvm_parsedtoken_struct_types, 5, 0);
ctxt->llvm_parsedtokenptr = LLVMPointerType(ctxt->llvm_parsedtoken, 0);
/* The HBytes struct is one of the cases for the union in HParsedToken */
ctxt->llvm_hbytes = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HBytes_");
LLVMTypeRef llvm_hbytes_struct_types[] = {
LLVMPointerType(LLVMInt8Type(), 0), /* HBytes.token */
ctxt->llvm_size_t /* HBytes.len */
};
LLVMStructSetBody(ctxt->llvm_hbytes, llvm_hbytes_struct_types, 2, 0);
ctxt->llvm_hbytesptr = LLVMPointerType(ctxt->llvm_hbytes, 0);
ctxt->llvm_parseresult = LLVMStructCreateNamed(LLVMGetGlobalContext(), "struct.HParseResult_");
LLVMTypeRef llvm_parseresult_struct_types[] = {
ctxt->llvm_parsedtokenptr,
LLVMInt64Type(),
ctxt->llvm_arenaptr
};
LLVMStructSetBody(ctxt->llvm_parseresult, llvm_parseresult_struct_types, 3, 0);
ctxt->llvm_parseresultptr = LLVMPointerType(ctxt->llvm_parseresult, 0);
LLVMTypeRef readbits_pt[] = {
ctxt->llvm_inputstreamptr,
LLVMInt32Type(),
LLVMInt8Type()
};
LLVMTypeRef readbits_ret = LLVMFunctionType(LLVMInt64Type(), readbits_pt, 3, 0);
LLVMAddFunction(ctxt->mod, "h_read_bits", readbits_ret);
LLVMTypeRef amalloc_pt[] = {
ctxt->llvm_arenaptr,
LLVMInt32Type()
};
LLVMTypeRef amalloc_ret = LLVMFunctionType(LLVMPointerType(LLVMVoidType(), 0), amalloc_pt, 2, 0);
LLVMAddFunction(ctxt->mod, "h_arena_malloc", amalloc_ret);
LLVMTypeRef makeresult_pt[] = {
ctxt->llvm_arenaptr,
ctxt->llvm_parsedtokenptr
};
LLVMTypeRef makeresult_ret = LLVMFunctionType(ctxt->llvm_parseresultptr, makeresult_pt, 2, 0);
LLVMAddFunction(ctxt->mod, "make_result", makeresult_ret);
}
int h_llvm_compile(HAllocator* mm__, HParser* parser, const void* params) {
/* Globals for the LLVM build process */
HLLVMParserCompileContext *ctxt;
/* Return result from the top-level parser */
LLVMValueRef res;
/* Execution engine, eventually */
LLVMExecutionEngineRef engine = NULL;
/* LLVM error message, if any */
char *error = NULL;
/* LLVM IR dump, if any */
char *dump = NULL;
/* Finished LLVM parser, eventually */
HLLVMParser *llvm_parser = NULL;
/* LLVM parse function return type */
LLVMTypeRef ret_type;
/* Preamble and postamble blocks */
LLVMBasicBlockRef preamble, postamble;
char *preamble_name = NULL, *postamble_name = NULL;
int preamble_name_len = 0, postamble_name_len = 0;
/* Boilerplate to set up a translation unit, aka a module. */
const char *name = params ? (const char*)params : "parse";
/* Types of parameters to use in the function declaration */
LLVMTypeRef parser_param_types[2];
/* Return value */
int rv;
/* Build a parser compilation context */
ctxt = h_new(HLLVMParserCompileContext, 1);
memset(ctxt, 0, sizeof(*ctxt));
ctxt->mm__ = mm__;
ctxt->mod = LLVMModuleCreateWithName(name);
h_llvm_declare_common(ctxt);
/*
* Boilerplate to set up the parser function to add to the module. It
* takes an HInputStream* and returns an HParseResult.
*/
parser_param_types[0] = ctxt->llvm_inputstreamptr;
parser_param_types[1] = ctxt->llvm_arenaptr;
ret_type = LLVMFunctionType(ctxt->llvm_parseresultptr,
parser_param_types, 2, 0);
ctxt->func = LLVMAddFunction(ctxt->mod, name, ret_type);
/* Parse function is now declared; time to define it */
ctxt->builder = LLVMCreateBuilder();
/*
* In case we need to build more for internal parsers, we use the address
* of the HParser for forming names.
*/
preamble_name_len = snprintf(NULL, 0, "preamble_%p", (void *)parser);
preamble_name = h_new(char, preamble_name_len + 1);
snprintf(preamble_name, preamble_name_len + 1, "preamble_%p",
(void *)parser);
preamble = LLVMAppendBasicBlock(ctxt->func, preamble_name);
LLVMPositionBuilderAtEnd(ctxt->builder, preamble);
/*
* First thing it needs to do is get its stream and arena args and stick
* value refs in the context.
*
* XXX do we always need arena? Can we make a dummy valueref the generated
* IR refers to, and then fill in arena if we need it after we know whether
* we need it? Similar concerns apply to setting up storage needed for, e.g.
* memoizing charsets.
*/
ctxt->stream = LLVMBuildBitCast(ctxt->builder, LLVMGetFirstParam(ctxt->func),
ctxt->llvm_inputstreamptr, "stream");
ctxt->arena = LLVMGetLastParam(ctxt->func);
/*
* Translate the contents of the children of `parser` into their LLVM
* instruction equivalents
*/
if (parser->vtable->llvm(ctxt, parser, &res)) {
/* Okay, we got it - set up a postamble basic block */
postamble_name_len = snprintf(NULL, 0, "postamble_%p", (void *)parser);
postamble_name = h_new(char, postamble_name_len + 1);
snprintf(postamble_name, postamble_name_len + 1, "postamble_%p",
(void *)parser);
postamble = LLVMAppendBasicBlock(ctxt->func, postamble_name);
/* Branch from last basic block in the parser to the postamble */
LLVMBuildBr(ctxt->builder, postamble);
LLVMPositionBuilderAtEnd(ctxt->builder, postamble);
/* ...and return the result, since this is the top level */
LLVMBuildRet(ctxt->builder, res);
/* Finished building the Big Damn Function, now we compile the module */
/* First, verification */
LLVMVerifyModule(ctxt->mod, LLVMAbortProcessAction, &error);
LLVMDisposeMessage(error);
error = NULL;
/* OK, link that sonofabitch */
LLVMLinkInMCJIT();
LLVMInitializeNativeTarget();
LLVMInitializeNativeAsmPrinter();
LLVMCreateExecutionEngineForModule(&engine, ctxt->mod, &error);
if (!error) {
/* Do IR dump */
dump = LLVMPrintModuleToString(ctxt->mod);
fprintf(stderr, "\n\n%s\n\n", dump);
/*
* Package up the pointers that comprise the module and stash it in
* the original HParser
*/
llvm_parser = h_new(HLLVMParser, 1);
llvm_parser->mod = ctxt->mod;
llvm_parser->func = ctxt->func;
llvm_parser->engine = engine;
llvm_parser->builder = ctxt->builder;
llvm_parser->compile_ctxt = ctxt;
parser->backend_data = llvm_parser;
/* Sweet lemony-fresh victory is mine! */
rv = 0;
} else {
fprintf(stderr, "error: %s\n", error);
LLVMDisposeMessage(error);
rv = -1;
}
} else {
rv = -1;
}
/* Cleanup */
if (preamble_name) {
h_free(preamble_name);
preamble_name = NULL;
}
if (postamble_name) {
h_free(postamble_name);
postamble_name = NULL;
}
return rv;
}
void h_llvm_free(HParser *parser) {
HAllocator *mm__;
HLLVMParser *llvm_parser = parser->backend_data;
LLVMModuleRef mod_out;
char *err_out;
mm__ = llvm_parser->compile_ctxt->mm__;
h_free(llvm_parser->compile_ctxt);
llvm_parser->compile_ctxt = NULL;
mm__ = NULL;
llvm_parser->func = NULL;
LLVMRemoveModule(llvm_parser->engine, llvm_parser->mod, &mod_out, &err_out);
LLVMDisposeExecutionEngine(llvm_parser->engine);
llvm_parser->engine = NULL;
LLVMDisposeBuilder(llvm_parser->builder);
llvm_parser->builder = NULL;
LLVMDisposeModule(llvm_parser->mod);
llvm_parser->mod = NULL;
}
HParseResult *h_llvm_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) {
const HLLVMParser *llvm_parser = parser->backend_data;
HArena *arena = h_new_arena(mm__, 0);
// LLVMRunFunction only supports certain signatures for dumb reasons; it's this hack with
// memcpy and function pointers, or writing a shim in LLVM IR.
//
// LLVMGenericValueRef args[] = {
// LLVMCreateGenericValueOfPointer(input_stream),
// LLVMCreateGenericValueOfPointer(arena)
// };
// LLVMGenericValueRef res = LLVMRunFunction(llvm_parser->engine, llvm_parser->func, 2, args);
// HParseResult *ret = (HParseResult*)LLVMGenericValueToPointer(res);
void *parse_func_ptr_v;
HParseResult * (*parse_func_ptr)(HInputStream *input_stream, HArena *arena);
parse_func_ptr_v = LLVMGetPointerToGlobal(llvm_parser->engine, llvm_parser->func);
memcpy(&parse_func_ptr, &parse_func_ptr_v, sizeof(parse_func_ptr));
HParseResult *ret = parse_func_ptr(input_stream, arena);
if (ret) {
ret->arena = arena;
if (!input_stream->overrun) {
size_t bit_length = h_input_stream_pos(input_stream);
if (ret->bit_length == 0) {
ret->bit_length = bit_length;
}
if (ret->ast && ret->ast->bit_length != 0) {
((HParsedToken*)(ret->ast))->bit_length = bit_length;
}
} else {
ret->bit_length = 0;
}
} else {
ret = NULL;
}
if (input_stream->overrun) {
return NULL; // overrun is always failure.
}
return ret;
}
HParserBackendVTable h__llvm_backend_vtable = {
.compile = h_llvm_compile,
.parse = h_llvm_parse,
.free = h_llvm_free
};
#endif /* defined(HAMMER_LLVM_BACKEND) */
#ifdef HAMMER_LLVM_BACKEND
#ifndef HAMMER_LLVM__H
#define HAMMER_LLVM__H
#include "../../internal.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
/* LLVM methods use these to generate names for labels, etc. */
#define H_LLVM_DECLARE_NAME(s) \
char * s##_name = NULL; \
int s##_name_len = 0;
#define H_LLVM_COMPUTE_NAME(s, p) \
s##_name_len = snprintf(NULL, 0, #s "_%p", (p)); \
s##_name = h_new(char, s##_name_len + 1); \
snprintf( s##_name, s##_name_len + 1, #s "_%p", (p));
#define H_LLVM_FREE_NAME(s) \
if ( s##_name ) { \
h_free( s##_name); \
s##_name = NULL; \
s##_name_len = 0; \
}
/* The typedef is in internal.h */
struct HLLVMParserCompileContext_ {
/* Allocator */
HAllocator* mm__;
/* Module/function/builder */
LLVMModuleRef mod;
LLVMValueRef func;
LLVMBuilderRef builder;
/* Typerefs */
/* We determine typerefs for some standard C types we'll need later up front */
LLVMTypeRef llvm_size_t;
LLVMTypeRef llvm_intptr_t;
/* LLVM types for Hammer structs and pointers */
LLVMTypeRef llvm_inputstream;
LLVMTypeRef llvm_inputstreamptr;
LLVMTypeRef llvm_arena;
LLVMTypeRef llvm_arenaptr;
LLVMTypeRef llvm_parsedtoken;
LLVMTypeRef llvm_parsedtokenptr;
LLVMTypeRef llvm_parseresult;
LLVMTypeRef llvm_parseresultptr;
LLVMTypeRef llvm_hbytes;
LLVMTypeRef llvm_hbytesptr;
/* Set up in function preamble */
LLVMValueRef stream;
LLVMValueRef arena;
};
bool h_llvm_make_charset_membership_test(HLLVMParserCompileContext *ctxt,
LLVMValueRef r, HCharset cs,
LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
void h_llvm_make_tt_bytes_fixed(HLLVMParserCompileContext *ctxt,
const uint8_t *bytes, size_t len,
LLVMValueRef *mr_out);
void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt,
uint8_t length, uint8_t signedp,
LLVMValueRef r, LLVMValueRef *mr_out);
#endif // #ifndef HAMMER_LLVM__H
#endif /* defined(HAMMER_LLVM_BACKEND) */
#ifdef HAMMER_LLVM_BACKEND
#include <llvm-c/Analysis.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include <llvm-c/ExecutionEngine.h>
#include "../../internal.h"
#include "llvm.h"
/*
* Construct LLVM IR to allocate a token of type TT_BYTES with a compile-time
* constant value
*
* Parameters:
* - ctxt [in]: an HLLVMParserCompileContext
* - bytes [in]: an array of bytes
* - len [in]: size of bytes
* - mr_out [out]: the return value from make_result()
*/
void h_llvm_make_tt_bytes_fixed(HLLVMParserCompileContext *ctxt,
const uint8_t *bytes, size_t len,
LLVMValueRef *mr_out) {
/* Set up call to h_arena_malloc() for a new HParsedToken */
LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
LLVMValueRef amalloc_args[] = { ctxt->arena, tok_size };
/* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */
LLVMValueRef amalloc = LLVMBuildCall(ctxt->builder,
LLVMGetNamedFunction(ctxt->mod, "h_arena_malloc"),
amalloc_args, 2, "h_arena_malloc");
/* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */
LLVMValueRef tok = LLVMBuildBitCast(ctxt->builder, amalloc, ctxt->llvm_parsedtokenptr, "tok");
/*
* tok->token_type = TT_BYTES;
*/
LLVMValueRef toktype = LLVMBuildStructGEP(ctxt->builder, tok, 0, "token_type");
LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt32Type(), TT_BYTES, 0), toktype);
/*
* XXX the way LLVM handles unions is batshit insane and forces IR writers
* to figure out which element of the union is largest just to declare the
* type, and then get all the alignments right - in effect, manually crufting
* up something compatible with their C compiler's ABI. This is not so much
* a portability bug as a portability bug queen with a bone-penetrating
* ovipositor for laying her eggs in one's brain.
*
* The sole saving grace here is that the limited number of platforms LLVM
* can JIT on make it conceivable I may get this right for the cases that come
* up in practice if not for the general case. If it breaks horribly, the
* slightly slower but safe option is to implement a function to set the
* relevant union fields from its arguments in C and build a call to it.
*
* The equivalent C that prompted this rant is quite depressingly simple:
*
* tok->bytes.token = bytes;
* tok->bytes.len = len;
*/
LLVMValueRef hbytes_gep_tmp =
LLVMBuildStructGEP(ctxt->builder, tok, 1, "tok_union");
LLVMValueRef hbytes_gep = LLVMBuildBitCast(ctxt->builder, hbytes_gep_tmp,
ctxt->llvm_hbytesptr, "hbytes");
LLVMValueRef hbytes_token_gep =
LLVMBuildStructGEP(ctxt->builder, hbytes_gep, 0, "hbytes_token");
/*
* We have to do this silly (uintptr_t) / LLVMConstIntToPtr() dance because
* LLVM doesn't seem to offer any way to construct a compile-time pointer
* constant other than NULL directly.
*/
LLVMBuildStore(ctxt->builder,
LLVMConstIntToPtr(LLVMConstInt(ctxt->llvm_intptr_t, (uintptr_t)bytes, 0),
LLVMPointerType(LLVMInt8Type(), 0)),
hbytes_token_gep);
LLVMValueRef hbytes_len_gep =
LLVMBuildStructGEP(ctxt->builder, hbytes_gep, 1, "hbytes_len");
LLVMBuildStore(ctxt->builder, LLVMConstInt(ctxt->llvm_size_t, len, 0), hbytes_len_gep);
/*
* Now call make_result()
*/
LLVMValueRef result_args[] = { ctxt->arena, tok };
LLVMValueRef mr = LLVMBuildCall(ctxt->builder,
LLVMGetNamedFunction(ctxt->mod, "make_result"),
result_args, 2, "make_result");
*mr_out = mr;
}
#endif /* defined(HAMMER_LLVM_BACKEND) */
This diff is collapsed.
#ifdef HAMMER_LLVM_BACKEND
#include <llvm-c/Analysis.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include <llvm-c/ExecutionEngine.h>
#include "../../internal.h"
#include "llvm.h"
/*
* Construct LLVM IR to allocate a token of type TT_SINT or TT_UINT
*
* Parameters:
* - ctxt [in]: an HLLVMParserCompileContext
* - length [in]: length in bits
* - signedp [in]: TT_SINT if non-zero, TT_UINT otherwise
* - r [in]: a value ref to the value to be used to this token
* - mr_out [out]: the return value from make_result()
*/
void h_llvm_make_tt_suint(HLLVMParserCompileContext *ctxt,
uint8_t length, uint8_t signedp,
LLVMValueRef r, LLVMValueRef *mr_out) {
/* Set up call to h_arena_malloc() for a new HParsedToken */
LLVMValueRef tok_size = LLVMConstInt(LLVMInt32Type(), sizeof(HParsedToken), 0);
LLVMValueRef amalloc_args[] = { ctxt->arena, tok_size };
/* %h_arena_malloc = call void* @h_arena_malloc(%struct.HArena_.1* %1, i32 48) */
LLVMValueRef amalloc = LLVMBuildCall(ctxt->builder,
LLVMGetNamedFunction(ctxt->mod, "h_arena_malloc"),
amalloc_args, 2, "h_arena_malloc");
/* %tok = bitcast void* %h_arena_malloc to %struct.HParsedToken_.2* */
LLVMValueRef tok = LLVMBuildBitCast(ctxt->builder, amalloc, ctxt->llvm_parsedtokenptr, "tok");
/*
* tok->token_type = signedp ? TT_SINT : TT_UINT;
*
* %token_type = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 0
*/
LLVMValueRef toktype = LLVMBuildStructGEP(ctxt->builder, tok, 0, "token_type");
/* store i32 8, i32* %token_type */
LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt32Type(),
signedp ? TT_SINT : TT_UINT, 0), toktype);
/*
* tok->sint = r;
* or
* tok->uint = r;
*
* %token_data = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 1
*/
LLVMValueRef tokdata = LLVMBuildStructGEP(ctxt->builder, tok, 1, "token_data");
/*
* the token_data field is a union, but either an int64_t or a uint64_t in the
* cases we can be called for.
*/
if (length < 64) {
/* Extend needed */
LLVMValueRef r_ext;
if (signedp) r_ext = LLVMBuildSExt(ctxt->builder, r, LLVMInt64Type(), "r_sext");
else r_ext = LLVMBuildZExt(ctxt->builder, r, LLVMInt64Type(), "r_zext");
LLVMBuildStore(ctxt->builder, r_ext, tokdata);
} else {
LLVMBuildStore(ctxt->builder, r, tokdata);
}
/*
* Store the index from the stream into the token
*/
/* %t_index = getelementptr inbounds %struct.HParsedToken_.2, %struct.HParsedToken_.2* %3, i32 0, i32 2 */
LLVMValueRef tokindex = LLVMBuildStructGEP(ctxt->builder, tok, 2, "t_index");
/* %s_index = getelementptr inbounds %struct.HInputStream_.0, %struct.HInputStream_.0* %0, i32 0, i32 2 */
LLVMValueRef streamindex = LLVMBuildStructGEP(ctxt->builder, ctxt->stream, 2, "s_index");
/* %4 = load i64, i64* %s_index */
/* store i64 %4, i64* %t_index */
LLVMBuildStore(ctxt->builder, LLVMBuildLoad(ctxt->builder, streamindex, ""), tokindex);
/* Store the bit length into the token */
LLVMValueRef tokbitlen = LLVMBuildStructGEP(ctxt->builder, tok, 3, "bit_length");
LLVMBuildStore(ctxt->builder, LLVMConstInt(LLVMInt64Type(), length, 0), tokbitlen);
/*
* Now call make_result()
*
* %make_result = call %struct.HParseResult_.3* @make_result(%struct.HArena_.1* %1, %struct.HParsedToken_.2* %3)
*/
LLVMValueRef result_args[] = { ctxt->arena, tok };
LLVMValueRef mr = LLVMBuildCall(ctxt->builder,
LLVMGetNamedFunction(ctxt->mod, "make_result"),
result_args, 2, "make_result");
*mr_out = mr;
}
#endif /* defined(HAMMER_LLVM_BACKEND) */
#include "missing.h"
int h_missing_compile(HAllocator* mm__, HParser* parser, const void* params) {
/* Always fail */
return -1;
}
HParseResult *h_missing_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) {
/* Always fail */
return NULL;
}
void h_missing_free(HParser *parser) {
/* No-op */
}
HParserBackendVTable h__missing_backend_vtable = {
.compile = h_missing_compile, /* TODO */
.parse = h_missing_parse, /* TODO */
.free = h_missing_free, /* TODO */
};
#ifndef HAMMER_BACKENDS_MISSING__H
#define HAMMER_BACKENDS_MISSING__H
#include "../hammer.h"
#include "../internal.h"
#endif /* !defined(HAMMER_BACKENDS_MISSING__H) */
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <string.h>
#include <assert.h>
#include "../internal.h"
......
......@@ -9,7 +9,7 @@ cppenv = env.Clone()
cppenv.Append(CPPPATH=[".", "vendor/gtest-1.7.0/fused-src", "../.."],
LIBS=["hammer"],
LIBPATH=["../../"])
cppenv.MergeFlags("-Wno-missing-field-initializers -DGTEST_HAS_PTHREAD=0")
cppenv.MergeFlags("-std=c++11 -Wno-missing-field-initializers -DGTEST_HAS_PTHREAD=0")
gtest = cppenv.Object("vendor/gtest-1.7.0/fused-src/gtest/gtest-all.cc")
......
......@@ -356,7 +356,7 @@ namespace {
EXPECT_TRUE(ParsesTo(p, "aaa", "(u0x61 (u0x61 (u0x61)))"));
}
};
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
......
......@@ -93,7 +93,7 @@ namespace hammer {
static inline Parser Token(const uint8_t *buf, size_t len) {
return Parser(h_token(buf, len));
}
static inline Parser Ch(char ch) {
static inline Parser Ch(uint8_t ch) {
return Parser(h_ch(ch));
}
static inline Parser ChRange(uint8_t lower, uint8_t upper) {
......
......@@ -14,6 +14,7 @@ dotnetenv.Append(CCFLAGS=["-fpic", '-DSWIG', '-Wno-all',
LIBS=['hammer'],
LIBPATH=["../.."],
SWIGFLAGS=["-DHAMMER_INTERNAL__NO_STDARG_H",
"-DSWIG2_CSHARP",
"-Isrc/", "-csharp",
"-dllimport","hammer_dotnet",
"-namespace", "Hammer.Internal"])
......
......@@ -4,6 +4,10 @@ from __future__ import absolute_import, division, print_function
import os, os.path
Import('env libhammer_shared testruns targets')
# LLVM-related flags
if GetOption("use_llvm"):
Import('llvm_defines')
Import('llvm_includes')
pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0)
......@@ -11,7 +15,25 @@ swig = pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURC
setup = ['setup.py']
pydir = os.path.join(env['BUILD_BASE'], 'src/bindings/python')
pysetup = os.path.join(pydir, 'setup.py')
libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], '%s %s build_ext --inplace' % (env['python'], pysetup))
if GetOption("use_llvm"):
define_list = ','.join(llvm_defines + ['HAMMER_LLVM_BACKEND'])
inc_list = ' '.join(['-I' + e for e in llvm_includes])
else:
define_list = None
inc_list = None
swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../'
arg_list = '%s %s build_ext --inplace ' % (env['python'], pysetup)
if inc_list:
arg_list = arg_list + inc_list
swig_opt_list = swig_opt_list + ' ' + inc_list
if define_list:
arg_list = arg_list + ' --define=\"' + define_list + '\"'
arg_list = arg_list + ' --swig-opts=\"' + swig_opt_list + '\"'
libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --swig=swig3.0 --inplace')
Default(libhammer_python)
pytestenv = pythonenv.Clone()
......
......@@ -387,6 +387,23 @@ HHashValue h_hash_ptr(const void *p) {
return (uintptr_t)p >> 4;
}
HHashValue h_hash_str(const void *p) {
const char *s;
s = (const char *)p;
return h_djbhash_str(s);
}
bool h_eq_str(const void *p, const void *q) {
if (p && q) {
if (strcmp((const char *)p, (const char *)q) == 0) return true;
else return false;
} else {
return h_eq_ptr(p, q);
}
}
uint32_t h_djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
while (len--) {
......@@ -395,6 +412,14 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) {
return hash;
}
uint32_t h_djbhash_str(const char *s) {
uint32_t hash = 5381;
while (*s != '\0') {
hash = hash * 33 + (uint8_t)(*s++);
}
return hash;
}
void h_symbol_put(HParseState *state, const char* key, void *value) {
if (!state->symbol_table) {
state->symbol_table = h_slist_new(state->arena);
......
......@@ -31,6 +31,23 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
&h__llk_backend_vtable,
&h__lalr_backend_vtable,
&h__glr_backend_vtable,
/*
* Brittleness warning!
*
* We're using an enum as an index into this array (don't blame me...)
* so it's important that this array have the same size and order as
* the corresponding enum values in HParserBackend of src/hammer.h.
* Since callers use those enums as numeric constants to select a
* backend, dropping/reordering them breaks binary compatibility.
* If anyone adds any more optional backends in the future, don't
* #ifdef out those enum values in hammer.h, and do provide the
* 'missing' stub backend as an alternative here.
*/
#ifdef HAMMER_LLVM_BACKEND
&h__llvm_backend_vtable,
#else
&h__missing_backend_vtable,
#endif
};
......