Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (69)
Showing with 1717 additions and 43 deletions
......@@ -23,3 +23,5 @@ build/
.sconsign.dblite
*.os
*.pyc
*.ll
libhammer.pc
dist: trusty
sudo: required
language: c
compiler:
- gcc
- clang
env:
- BINDINGS=none
global:
- LLVM_CONFIG=llvm-config-3.8
matrix:
- BINDINGS=none
matrix:
include:
- compiler: gcc
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-2.2.5
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-2.2.5
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.0.0-p353
rvm: ruby-2.3.1
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.0.0-p353
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.1.0
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.1.0
rvm: ruby-2.3.1
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: python
......@@ -94,10 +91,12 @@ before_install:
- sudo apt-get update -qq
- sudo apt-get install lcov
- gem install coveralls-lcov
- if [ "$CC" == "gcc" ]; then sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y; sudo apt-get update -qq; sudo apt-get install gcc-5; fi
- sudo apt-get install llvm-3.8 llvm-3.8-dev clang-3.8
- if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi
- if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi
- if [ "$BINDINGS" == "perl" ]; then sudo apt-get install -t trusty-backports swig3.0; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; fi
install: true
before_script:
- if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi
......
......@@ -3,11 +3,16 @@ import os
import os.path
import platform
import sys
from distutils.version import LooseVersion
import re
import subprocess
vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(PathVariable('libdir', "Where to install libraries", None, PathVariable.PathAccept))
vars.Add(PathVariable('includedir', "Where to install headers", None, PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
tools = ['default', 'scanreplace']
......@@ -41,15 +46,22 @@ if 'DESTDIR' in env:
print >>sys.stderr, "--!!-- you want; files will be installed in"
print >>sys.stderr, "--!!-- %s" % (calcInstallPath("$prefix"),)
if 'includedir' in env:
env['incpath'] = calcInstallPath("$includedir", "hammer")
else:
env['includedir'] = os.path.abspath(os.path.join(*map(env.subst, ["$prefix", "include"])))
env['incpath'] = calcInstallPath("$prefix", "include", "hammer")
if 'libdir' in env:
env['libpath'] = calcInstallPath("$libdir")
env['pkgconfigpath'] = calcInstallPath("$libdir", "pkgconfig")
else:
env['libpath'] = calcInstallPath("$prefix", "lib")
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env['libdir'] = os.path.abspath(os.path.join(*map(env.subst, ["$prefix", "lib"])))
env['parsersincpath'] = calcInstallPath("$includedir", "hammer", "parsers")
env['backendsincpath'] = calcInstallPath("$includedir", "hammer", "backends")
env['libpath'] = calcInstallPath("$prefix", "lib")
env['incpath'] = calcInstallPath("$prefix", "include", "hammer")
env['parsersincpath'] = calcInstallPath("$prefix", "include", "hammer", "parsers")
env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backends")
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env.ScanReplace('libhammer.pc.in')
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable")
env.MergeFlags("-std=gnu11 -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wall -Wextra -Werror")
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')
......@@ -78,9 +90,19 @@ AddOption("--in-place",
action="store_true",
help="Build in-place, rather than in the build/<variant> tree")
AddOption("--disable-llvm-backend",
dest="use_llvm",
default=False,
action="store_false",
help="Disable the LLVM backend (and don't require LLVM library dependencies)")
AddOption("--enable-llvm-backend",
dest="use_llvm",
default=False,
action="store_true",
help="Enable the LLVM backend (and require LLVM library dependencies)")
dbg = env.Clone(VARIANT='debug')
dbg.Append(CCFLAGS=['-g'])
dbg.MergeFlags("-g -O0")
opt = env.Clone(VARIANT='opt')
opt.Append(CCFLAGS=["-O3"])
......@@ -93,6 +115,11 @@ else:
env["CC"] = os.getenv("CC") or env["CC"]
env["CXX"] = os.getenv("CXX") or env["CXX"]
if GetOption("use_llvm"):
# Overridable default path to llvm-config
env['LLVM_CONFIG'] = "llvm-config"
env["LLVM_CONFIG"] = os.getenv("LLVM_CONFIG") or env["LLVM_CONFIG"]
if GetOption("coverage"):
env.Append(CFLAGS=["--coverage"],
CXXFLAGS=["--coverage"],
......@@ -100,7 +127,8 @@ if GetOption("coverage"):
if env["CC"] == "gcc":
env.Append(LIBS=['gcov'])
else:
env.ParseConfig('llvm-config --ldflags')
env.ParseConfig('%s --cflags --ldflags --libs core executionengine mcjit analysis x86codegen x86info' % \
env["LLVM_CONFIG"])
if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin':
env.Replace(CC="clang",
......@@ -111,6 +139,126 @@ env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_"))
#rootpath = env['ROOTPATH'] = os.path.abspath('.')
#env.Append(CPPPATH=os.path.join('#', "hammer"))
if GetOption("use_llvm"):
# Set up LLVM config stuff to export
# some llvm versions are old and will not work; some require --system-libs
# with llvm-config, and some will break if given it
llvm_config_version = subprocess.Popen('%s --version' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.6"):
print "This LLVM version %s is too old" % llvm_config_version[0].strip()
Exit(1)
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9") and \
LooseVersion(llvm_config_version[0]) >= LooseVersion("3.5"):
llvm_system_libs_flag = "--system-libs"
else:
llvm_system_libs_flag = ""
# Only keep one copy of this
llvm_required_components = "core executionengine mcjit analysis x86codegen x86info"
# Stubbing this out so we can implement static-only mode if needed later
llvm_use_shared = True
# Can we ask for shared/static from llvm-config?
if LooseVersion(llvm_config_version[0]) < LooseVersion("3.9"):
# Nope
llvm_linkage_type_flag = ""
llvm_use_computed_shared_lib_name = True
else:
# Woo, they finally fixed the dumb
llvm_use_computed_shared_lib_name = False
if llvm_use_shared:
llvm_linkage_type_flag = "--link-shared"
else:
llvm_linkage_type_flag = "--link-static"
if llvm_use_computed_shared_lib_name:
# Okay, pull out the major and minor version numbers (barf barf)
p = re.compile("^(\d+)\.(\d+).*$")
m = p.match(llvm_config_version[0])
if m:
llvm_computed_shared_lib_name = "LLVM-%d.%d" % ((int)(m.group(1)), (int)(m.group(2)))
else:
print "Couldn't compute shared library name from LLVM version '%s', but needed to" % \
llvm_config_version[0]
Exit(1)
else:
# We won't be needing it
llvm_computed_shared_lib_name = None
# llvm-config 'helpfully' supplies -g and -O flags; educate it with this
# custom ParseConfig function arg; make it a class with a method so we can
# pass it around with scons export/import
class LLVMConfigSanitizer:
def sanitize(self, env, cmd, unique=1):
# cmd is output from llvm-config
flags = cmd.split()
# match -g or -O flags
p = re.compile("^-[gO].*$")
filtered_flags = [flag for flag in flags if not p.match(flag)]
filtered_cmd = ' '.join(filtered_flags)
# print "llvm_config_sanitize: \"%s\" => \"%s\"" % (cmd, filtered_cmd)
env.MergeFlags(filtered_cmd, unique)
llvm_config_sanitizer = LLVMConfigSanitizer()
# LLVM defines, which the python bindings need
try:
llvm_config_cflags = subprocess.Popen('%s --cflags' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
flags = llvm_config_cflags[0].split()
# get just the -D ones
p = re.compile("^-D(.*)$")
llvm_defines = [p.match(flag).group(1) for flag in flags if p.match(flag)]
except:
print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
Exit(1)
# Get the llvm includedir, which the python bindings need
try:
llvm_config_includes = subprocess.Popen('%s --includedir' % env["LLVM_CONFIG"], \
shell=True, \
stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate()
llvm_includes = llvm_config_includes[0].splitlines()
except:
print "%s failed. Make sure you have LLVM and clang installed." % env["LLVM_CONFIG"]
Exit(1)
# This goes here so we already know all the LLVM crap
# Make a fresh environment to parse the config into, to read out just LLVM stuff
llvm_dummy_env = Environment()
# Get LLVM stuff into LIBS/LDFLAGS
llvm_dummy_env.ParseConfig('%s --ldflags %s %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, llvm_linkage_type_flag, \
llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
llvm_dummy_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
llvm_dummy_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
llvm_dummy_env.Append(LIBS=['stdc++', ], )
#endif GetOption("use_llvm")
# The .pc.in file has substs for llvm_lib_flags and llvm_libdir_flags, so if
# we aren't using LLVM, set them to the empty string
if GetOption("use_llvm"):
env['llvm_libdir_flags'] = llvm_dummy_env.subst('$_LIBDIRFLAGS')
env['llvm_lib_flags'] = llvm_dummy_env.subst('$_LIBFLAGS')
else:
env['llvm_libdir_flags'] = ""
env['llvm_lib_flags'] = ""
pkgconfig = env.ScanReplace('libhammer.pc.in')
Default(pkgconfig)
env.Install("$pkgconfigpath", pkgconfig)
testruns = []
targets = ["$libpath",
......@@ -122,6 +270,18 @@ targets = ["$libpath",
Export('env')
Export('testruns')
Export('targets')
# LLVM-related flags
if GetOption("use_llvm"):
Export('llvm_computed_shared_lib_name')
Export('llvm_config_sanitizer')
Export('llvm_config_version')
Export('llvm_defines')
Export('llvm_includes')
Export('llvm_linkage_type_flag')
Export('llvm_required_components')
Export('llvm_system_libs_flag')
Export('llvm_use_computed_shared_lib_name')
Export('llvm_use_shared')
if not GetOption("in_place"):
env['BUILD_BASE'] = 'build/$VARIANT'
......
prefix=${prefix}
exec_prefix=${prefix}
includedir=${prefix}/include
libdir=${exec_prefix}/lib
libdir=${libdir}
Name: libhammer
Description: The Hammer parsing library
Version: 0.9.0
Cflags: -I${includedir}
Libs: -L${libdir} -lhammer
Libs.private: ${llvm_libdir_flags} ${llvm_lib_flags}
# -*- python -*-
import os.path
from distutils.version import LooseVersion
import re
import subprocess
Import('env testruns')
# LLVM-related flags
if GetOption("use_llvm"):
Import('llvm_computed_shared_lib_name')
Import('llvm_config_sanitizer')
Import('llvm_config_version')
Import('llvm_linkage_type_flag')
Import('llvm_required_components')
Import('llvm_system_libs_flag')
Import('llvm_use_computed_shared_lib_name')
Import('llvm_use_shared')
dist_headers = [
"hammer.h",
......@@ -48,11 +62,16 @@ parsers = ['parsers/%s.c'%s for s in
'unimplemented',
'whitespace',
'xor',
'value']]
'value']]
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
# Add LLVM backend if enabled
if GetOption("use_llvm"):
llvm_backend_files = ['llvm.c']
backends = backends + ['backends/llvm/%s' % s for s in llvm_backend_files]
misc_hammer_parts = [
'allocator.c',
'benchmark.c',
......@@ -76,19 +95,66 @@ ctests = ['t_benchmark.c',
't_misc.c',
't_regression.c']
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
if GetOption("use_llvm"):
env.ParseConfig('%s --cflags --ldflags' % env["LLVM_CONFIG"], function=llvm_config_sanitizer.sanitize)
libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)
# Use a cloned env for the shared library so we can have library dependencies
shared_env = env.Clone()
if GetOption("use_llvm"):
# Get LLVM stuff into LIBS/LDFLAGS
shared_env.ParseConfig('%s --ldflags %s %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, \
llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
shared_env.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
shared_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
else:
# Just grab the statics regardless of version
shared_env.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
shared_env.Append(LIBS=['stdc++', ], LIBPATH=['.'])
libhammer_shared = shared_env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
Default(libhammer_shared, libhammer_static)
env.Install("$libpath", [libhammer_static, libhammer_shared])
env.Install("$incpath", dist_headers)
env.Install("$parsersincpath", parsers_headers)
env.Install("$backendsincpath", backends_headers)
env.Install("$pkgconfigpath", "../../../libhammer.pc")
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
testenv.Append(LIBS=['hammer'], LIBPATH=['.'])
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
if GetOption("use_llvm"):
# Get LLVM stuff into LIBS/LDFLAGS
testenv.ParseConfig('%s --ldflags %s %s %s' % \
(env["LLVM_CONFIG"], llvm_system_libs_flag, \
llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
# Get the right -l lines in
if llvm_use_shared:
if llvm_use_computed_shared_lib_name:
testenv.Append(LIBS=[llvm_computed_shared_lib_name, ])
else:
testenv.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
else:
# Just grab the statics regardless of version
testenv.ParseConfig('%s %s --libs %s' % \
(env["LLVM_CONFIG"], llvm_linkage_type_flag, llvm_required_components), \
function=llvm_config_sanitizer.sanitize)
testenv.Append(LIBS=['stdc++'], LIBPATH=['.'])
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS="--coverage" if testenv.GetOption("coverage") else None)
ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path]))
AlwaysBuild(ctest)
......
This diff is collapsed.
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <string.h>
#include <assert.h>
#include "../internal.h"
......
......@@ -6,7 +6,7 @@ cppenv = env.Clone()
cppenv.Append(CPPPATH=[".", "vendor/gtest-1.7.0/fused-src", "../.."],
LIBS=["hammer"],
LIBPATH=["../../"])
cppenv.MergeFlags("-Wno-missing-field-initializers -DGTEST_HAS_PTHREAD=0")
cppenv.MergeFlags("-std=c++11 -Wno-missing-field-initializers -DGTEST_HAS_PTHREAD=0")
gtest = cppenv.Object("vendor/gtest-1.7.0/fused-src/gtest/gtest-all.cc")
......
......@@ -356,7 +356,7 @@ namespace {
EXPECT_TRUE(ParsesTo(p, "aaa", "(u0x61 (u0x61 (u0x61)))"));
}
};
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
......
......@@ -93,7 +93,7 @@ namespace hammer {
static inline Parser Token(const uint8_t *buf, size_t len) {
return Parser(h_token(buf, len));
}
static inline Parser Ch(char ch) {
static inline Parser Ch(uint8_t ch) {
return Parser(h_ch(ch));
}
static inline Parser ChRange(uint8_t lower, uint8_t upper) {
......
# -*- python -*-
import os, os.path
Import('env libhammer_shared testruns targets')
Import('llvm_defines')
Import('llvm_includes')
pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0)
swig = pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURCE"))
setup = ['setup.py']
pydir = os.path.join(env['BUILD_BASE'], 'src/bindings/python')
libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], 'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --inplace')
define_list = ','.join(llvm_defines)
inc_list = ' '.join(['-I' + e for e in llvm_includes])
swig_opt_list = '-DHAMMER_INTERNAL__NO_STDARG_H -I../../ ' + inc_list
libhammer_python = pythonenv.Command(['hammer.py', 'hammer_wrap.c'], [swig, setup], \
'python ' + os.path.join(pydir, 'setup.py') + ' build_ext --inplace ' + inc_list + \
' --define=\"' + define_list + '\" --swig-opts=\"' + swig_opt_list + '\"')
Default(libhammer_python)
pytestenv = pythonenv.Clone()
......
......@@ -31,6 +31,7 @@ static HParserBackendVTable *backends[PB_MAX + 1] = {
&h__llk_backend_vtable,
&h__lalr_backend_vtable,
&h__glr_backend_vtable,
&h__llvm_backend_vtable,
};
......
......@@ -46,7 +46,8 @@ typedef enum HParserBackend_ {
PB_LLk,
PB_LALR,
PB_GLR,
PB_MAX = PB_GLR
PB_LLVM,
PB_MAX = PB_LLVM
} HParserBackend;
typedef enum HTokenType_ {
......@@ -177,7 +178,7 @@ typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void
typedef struct HParserTestcase_ {
unsigned char* input;
size_t length;
char* output_unambiguous;
const char* output_unambiguous;
} HParserTestcase;
#ifdef SWIG
......
......@@ -24,7 +24,12 @@
#define HAMMER_INTERNAL__H
#include <stdint.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include "hammer.h"
#include "platform.h"
......@@ -152,23 +157,90 @@ static inline void h_sarray_clear(HSArray *arr) {
// }}}
typedef unsigned int *HCharset;
typedef unsigned int HCharsetWord;
#define CHARSET_WHOLE_WORD_MASK UINT_MAX
typedef HCharsetWord *HCharset;
#define CHARSET_BITS_PER_WORD (sizeof(HCharsetWord) * 8)
#define CHARSET_WORDS (256 / CHARSET_BITS_PER_WORD)
#define CHARSET_SIZE (CHARSET_WORDS * sizeof(HCharsetWord))
#define CHARSET_BIT_IDX_TO_WORD(idx) \
(((unsigned int)(idx)) / CHARSET_BITS_PER_WORD)
#define CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx) \
(((unsigned int)(idx)) % CHARSET_BITS_PER_WORD)
#define CHART_WORD_AND_BIT_TO_BIT_IDX(word,bit) \
((uint8_t)(CHARSET_BITS_PER_WORD * ((unsigned int)(word)) + \
((unsigned int)(bit))))
#define CHARSET_BIT_POS_IN_WORD_MASK(bit) \
((((HCharsetWord)(1)) << (bit)) & CHARSET_WHOLE_WORD_MASK)
/* Mask for all bits below a position */
#define CHARSET_BIT_MASK_UP_TO_POS(bit) \
((CHARSET_BIT_POS_IN_WORD_MASK((bit)) - 1) & CHARSET_WHOLE_WORD_MASK)
/* Mask off all bits above and including a position */
#define CHARSET_BIT_MASK_FROM_POS(bit) \
((~CHARSET_BIT_MASK_UP_TO_POS((bit))) & CHARSET_WHOLE_WORD_MASK)
static inline HCharset copy_charset(HAllocator *mm__, HCharset in) {
HCharset cs = h_new(HCharsetWord, CHARSET_WORDS);
memcpy(cs, in, CHARSET_SIZE);
return cs;
}
static inline HCharset new_charset(HAllocator* mm__) {
HCharset cs = h_new(unsigned int, 256 / (sizeof(unsigned int) * 8));
memset(cs, 0, 32); // 32 bytes = 256 bits
HCharset cs = h_new(HCharsetWord, CHARSET_WORDS);
memset(cs, 0, CHARSET_SIZE);
return cs;
}
static inline void charset_complement(HCharset cs) {
for (unsigned int i = 0; i < CHARSET_WORDS; ++i) cs[i] = ~(cs[i]);
}
static inline int charset_isset(HCharset cs, uint8_t pos) {
return !!(cs[pos / (sizeof(*cs)*8)] & (1 << (pos % (sizeof(*cs)*8))));
return !!(cs[CHARSET_BIT_IDX_TO_WORD(pos)] &
CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos)));
}
static inline void charset_restrict_to_range(HCharset cs, uint8_t idx_start, uint8_t idx_end) {
HCharsetWord mask;
if (idx_end < idx_start) {
/* Range is empty, clear the charset */
memset(cs, 0, CHARSET_SIZE);
} else {
/* Clear below, if any */
if (CHARSET_BIT_IDX_TO_WORD(idx_start) > 0) {
memset(cs, 0, CHARSET_BIT_IDX_TO_WORD(idx_start) * sizeof(HCharsetWord));
}
/* Note this partial start/ending word code still works if they are the same word */
/* Mask partial starting word, if any */
if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start) != 0) {
mask = CHARSET_BIT_MASK_FROM_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_start));
cs[CHARSET_BIT_IDX_TO_WORD(idx_start)] &= mask;
}
/* Mask partial ending word, if any */
if (CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end) != CHARSET_BITS_PER_WORD - 1) {
mask = CHARSET_BIT_MASK_UP_TO_POS(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end));
mask |= CHARSET_BIT_POS_IN_WORD_MASK(CHARSET_BIT_IDX_TO_BIT_IN_WORD(idx_end));
cs[CHARSET_BIT_IDX_TO_WORD(idx_end)] &= mask;
}
/* Clear above, if any */
if (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1 < CHARSET_WORDS) {
memset(cs + CHARSET_BIT_IDX_TO_WORD(idx_end) + 1, 0,
(CHARSET_WORDS - (CHARSET_BIT_IDX_TO_WORD(idx_end) + 1)) *
sizeof(HCharsetWord));
}
}
}
static inline void charset_set(HCharset cs, uint8_t pos, int val) {
cs[pos / (sizeof(*cs)*8)] =
cs[CHARSET_BIT_IDX_TO_WORD(pos)] =
val
? cs[pos / (sizeof(*cs)*8)] | (1 << (pos % (sizeof(*cs)*8)))
: cs[pos / (sizeof(*cs)*8)] & ~(1 << (pos % (sizeof(*cs)*8)));
? cs[CHARSET_BIT_IDX_TO_WORD(pos)] | CHARSET_BIT_POS_IN_WORD_MASK(
CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos))
: cs[CHARSET_BIT_IDX_TO_WORD(pos)] & ~CHARSET_BIT_POS_IN_WORD_MASK(
CHARSET_BIT_IDX_TO_BIT_IN_WORD(pos));
}
typedef unsigned int HHashValue;
......@@ -322,6 +394,7 @@ extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable;
extern HParserBackendVTable h__lalr_backend_vtable;
extern HParserBackendVTable h__glr_backend_vtable;
extern HParserBackendVTable h__llvm_backend_vtable;
// }}}
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
......@@ -419,6 +492,7 @@ struct HParserVtable_ {
bool (*isValidCF)(void *env);
bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean.
void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env);
bool (*llvm)(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void *env);
bool higher; // false if primitive
};
......
#ifndef HAMMER_LLVM__H
#define HAMMER_LLVM__H
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
LLVMTypeRef llvm_inputstream, llvm_inputstreamptr, llvm_arena, llvm_arenaptr;
LLVMTypeRef llvm_parsedtoken, llvm_parsedtokenptr, llvm_parseresult, llvm_parseresultptr;
void h_llvm_make_charset_membership_test(HAllocator* mm__,
LLVMModuleRef mod, LLVMValueRef func, LLVMBuilderRef builder,
LLVMValueRef r, HCharset cs,
LLVMBasicBlockRef yes, LLVMBasicBlockRef no);
void h_llvm_make_tt_suint(LLVMModuleRef mod, LLVMBuilderRef builder,
LLVMValueRef stream, LLVMValueRef arena,
LLVMValueRef r, LLVMValueRef *mr_out);
#endif // #ifndef HAMMER_LLVM__H
......@@ -21,7 +21,10 @@ static void *aa_alloc(HAllocator *allocator, size_t size)
static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
HArena *arena = ((ArenaAllocator *)allocator)->arena;
#pragma GCC diagnostic pop
assert(((void)"XXX need realloc for arena allocator", 0));
return NULL;
}
......
#include <assert.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include "parser_internal.h"
#include "../llvm.h"
struct bits_env {
uint8_t length;
......@@ -17,6 +22,78 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
return make_result(state->arena, result);
}
static bool bits_llvm(HAllocator *mm__,
LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod,
void* env) {
/* %result = alloca %struct.HParsedToken_*, align 8 */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
LLVMValueRef result = LLVMBuildAlloca(builder, llvm_parsedtoken, "result");
#pragma GCC diagnostic pop
/* store i8* %env, i8** %1, align 8 */
/* store %struct.HParseState_* %state, %struct.HParseState_** %2, align 8 */
/* %3 = load i8** %1, align 8 */
/* %4 = bitcast i8* %3 to %struct.bits_env* */
/* store %struct.bits_env* %4, %struct.bits_env** %env, align 8 */
/* %5 = load %struct.HParseState_** %2, align 8 */
/* %6 = getelementptr inbounds %struct.HParseState_* %5, i32 0, i32 2 */
/* %7 = load %struct.HArena_** %6, align 8 */
/* %8 = call noalias i8* @h_arena_malloc(%struct.HArena_* %7, i64 48) */
/* %9 = bitcast i8* %8 to %struct.HParsedToken_* */
/* store %struct.HParsedToken_* %9, %struct.HParsedToken_** %result, align 8 */
/* %10 = load %struct.bits_env** %env_, align 8 */
/* %11 = getelementptr inbounds %struct.bits_env* %10, i32 0, i32 1 */
/* %12 = load i8* %11, align 1 */
/* %13 = zext i8 %12 to i32 */
/* %14 = icmp ne i32 %13, 0 */
/* %15 = select i1 %14, i32 4, i32 8 */
/* %16 = load %struct.HParsedToken_** %result, align 8 */
/* %17 = getelementptr inbounds %struct.HParsedToken_* %16, i32 0, i32 0 */
/* store i32 %15, i32* %17, align 4 */
/* %18 = load %struct.bits_env** %env_, align 8 */
/* %19 = getelementptr inbounds %struct.bits_env* %18, i32 0, i32 1 */
/* %20 = load i8* %19, align 1 */
/* %21 = icmp ne i8 %20, 0 */
/* br i1 %21, label %22, label %33 */
/* ; <label>:22 ; preds = %0 */
/* %23 = load %struct.HParseState_** %2, align 8 */
/* %24 = getelementptr inbounds %struct.HParseState_* %23, i32 0, i32 1 */
/* %25 = load %struct.bits_env** %env_, align 8 */
/* %26 = getelementptr inbounds %struct.bits_env* %25, i32 0, i32 0 */
/* %27 = load i8* %26, align 1 */
/* %28 = zext i8 %27 to i32 */
/* %29 = call i64 @h_read_bits(%struct.HInputStream_* %24, i32 %28, i8 signext 1) */
/* %30 = load %struct.HParsedToken_** %result, align 8 */
/* %31 = getelementptr inbounds %struct.HParsedToken_* %30, i32 0, i32 1 */
/* %32 = bitcast %union.anon* %31 to i64* */
/* store i64 %29, i64* %32, align 8 */
/* br label %44 */
/* ; <label>:33 ; preds = %0 */
/* %34 = load %struct.HParseState_** %2, align 8 */
/* %35 = getelementptr inbounds %struct.HParseState_* %34, i32 0, i32 1 */
/* %36 = load %struct.bits_env** %env_, align 8 */
/* %37 = getelementptr inbounds %struct.bits_env* %36, i32 0, i32 0 */
/* %38 = load i8* %37, align 1 */
/* %39 = zext i8 %38 to i32 */
/* %40 = call i64 @h_read_bits(%struct.HInputStream_* %35, i32 %39, i8 signext 0) */
/* %41 = load %struct.HParsedToken_** %result, align 8 */
/* %42 = getelementptr inbounds %struct.HParsedToken_* %41, i32 0, i32 1 */
/* %43 = bitcast %union.anon* %42 to i64* */
/* store i64 %40, i64* %43, align 8 */
/* br label %44 */
/* ; <label>:44 ; preds = %33, %22 */
/* %45 = load %struct.HParseState_** %2, align 8 */
/* %46 = getelementptr inbounds %struct.HParseState_* %45, i32 0, i32 2 */
/* %47 = load %struct.HArena_** %46, align 8 */
/* %48 = load %struct.HParsedToken_** %result, align 8 */
/* %49 = call %struct.HParseResult_* @make_result(%struct.HArena_* %47, %struct.HParsedToken_* %48) */
/* ret %struct.HParseResult_* %49 */
return true;
}
static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
// signedp == NULL iff unsigned
bool signedp = (signedp_p != NULL);
......@@ -102,6 +179,7 @@ static const HParserVtable bits_vt = {
.isValidCF = h_true,
.desugar = desugar_bits,
.compile_to_rvm = bits_ctrvm,
.llvm = bits_llvm,
.higher = false,
};
......
#include <stdint.h>
#include <assert.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include "parser_internal.h"
#include "../llvm.h"
static HParseResult* parse_ch(void* env, HParseState *state) {
uint8_t c = (uint8_t)(uintptr_t)(env);
......@@ -41,12 +46,77 @@ static bool ch_ctrvm(HRVMProg *prog, void* env) {
return true;
}
static bool ch_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func, LLVMModuleRef mod, void* env) {
// Build a new LLVM function to parse a character
// Set up params for calls to h_read_bits() and h_arena_malloc()
LLVMValueRef bits_args[3];
LLVMValueRef stream = LLVMGetFirstParam(func);
stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
bits_args[0] = stream;
bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0);
bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0);
LLVMValueRef arena = LLVMGetLastParam(func);
// Set up basic blocks: entry, success and failure branches, then exit
LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "ch_entry");
LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "ch_success");
LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "ch_end");
// Basic block: entry
LLVMPositionBuilderAtEnd(builder, entry);
// Call to h_read_bits()
// %read_bits = call i64 @h_read_bits(%struct.HInputStream_* %8, i32 8, i8 signext 0)
LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits");
// %2 = trunc i64 %read_bits to i8
LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // do we actually need this?
// Check if h_read_bits succeeded
// %"c == r" = icmp eq i8 -94, %2 ; the -94 comes from c_
uint8_t c_ = (uint8_t)(uintptr_t)(env);
LLVMValueRef c = LLVMConstInt(LLVMInt8Type(), c_, 0);
LLVMValueRef icmp = LLVMBuildICmp(builder, LLVMIntEQ, c, r, "c == r");
// Branch so success or failure basic block, as appropriate
// br i1 %"c == r", label %ch_success, label %ch_fail
LLVMBuildCondBr(builder, icmp, success, end);
// Basic block: success
LLVMPositionBuilderAtEnd(builder, success);
/* Make a token */
LLVMValueRef mr;
h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
// br label %ch_end
LLVMBuildBr(builder, end);
// Basic block: end
LLVMPositionBuilderAtEnd(builder, end);
// %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ]
LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv");
LLVMBasicBlockRef rv_phi_incoming_blocks[] = {
success,
entry
};
LLVMValueRef rv_phi_incoming_values[] = {
mr,
LLVMConstNull(llvm_parseresultptr)
};
LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2);
// ret %struct.HParseResult_.3* %rv
LLVMBuildRet(builder, rv);
return true;
}
static const HParserVtable ch_vt = {
.parse = parse_ch,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_ch,
.compile_to_rvm = ch_ctrvm,
.llvm = ch_llvm,
.higher = false,
};
......
#include <assert.h>
#include <string.h>
#include "../internal.h"
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
#include <llvm-c/Core.h>
#pragma GCC diagnostic pop
#include "parser_internal.h"
#include "../llvm.h"
static HParseResult* parse_charset(void *env, HParseState *state) {
uint8_t in = h_read_bits(&state->input_stream, 8, false);
......@@ -70,12 +75,82 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
return true;
}
static bool cs_llvm(HAllocator *mm__, LLVMBuilderRef builder, LLVMValueRef func,
LLVMModuleRef mod, void* env) {
/*
* LLVM to build a function to parse a charset; the args are a stream and an
* arena.
*/
LLVMValueRef stream = LLVMGetFirstParam(func);
stream = LLVMBuildBitCast(builder, stream, llvm_inputstreamptr, "stream");
LLVMValueRef arena = LLVMGetLastParam(func);
/* Set up our basic blocks */
LLVMBasicBlockRef entry = LLVMAppendBasicBlock(func, "cs_entry");
LLVMBasicBlockRef success = LLVMAppendBasicBlock(func, "cs_success");
LLVMBasicBlockRef fail = LLVMAppendBasicBlock(func, "cs_fail");
LLVMBasicBlockRef end = LLVMAppendBasicBlock(func, "cs_end");
/* Basic block: entry */
LLVMPositionBuilderAtEnd(builder, entry);
/* First we read the char */
LLVMValueRef bits_args[3];
bits_args[0] = stream;
bits_args[1] = LLVMConstInt(LLVMInt32Type(), 8, 0);
bits_args[2] = LLVMConstInt(LLVMInt8Type(), 0, 0);
LLVMValueRef bits = LLVMBuildCall(builder, LLVMGetNamedFunction(mod, "h_read_bits"), bits_args, 3, "read_bits");
LLVMValueRef r = LLVMBuildTrunc(builder, bits, LLVMInt8Type(), ""); // TODO Necessary? (same question in ch_llvm())
/* We have a char, need to check if it's in the charset */
HCharset cs = (HCharset)env;
/* Branch to either success or end, conditional on whether r is in cs */
h_llvm_make_charset_membership_test(mm__, mod, func, builder, r, cs, success, fail);
/* Basic block: success */
LLVMPositionBuilderAtEnd(builder, success);
LLVMValueRef mr;
h_llvm_make_tt_suint(mod, builder, stream, arena, r, &mr);
/* br label %ch_end */
LLVMBuildBr(builder, end);
/* Basic block: fail */
LLVMPositionBuilderAtEnd(builder, fail);
/*
* We just branch straight to end; this exists so that the phi node in
* end knows where all the incoming edges are from, rather than needing
* some basic block constructed in h_llvm_make_charset_membership_test()
*/
LLVMBuildBr(builder, end);
/* Basic block: end */
LLVMPositionBuilderAtEnd(builder, end);
// %rv = phi %struct.HParseResult_.3* [ %make_result, %ch_success ], [ null, %ch_entry ]
LLVMValueRef rv = LLVMBuildPhi(builder, llvm_parseresultptr, "rv");
LLVMBasicBlockRef rv_phi_incoming_blocks[] = {
success,
fail
};
LLVMValueRef rv_phi_incoming_values[] = {
mr,
LLVMConstNull(llvm_parseresultptr)
};
LLVMAddIncoming(rv, rv_phi_incoming_values, rv_phi_incoming_blocks, 2);
// ret %struct.HParseResult_.3* %rv
LLVMBuildRet(builder, rv);
return true;
}
static const HParserVtable charset_vt = {
.parse = parse_charset,
.isValidRegular = h_true,
.isValidCF = h_true,
.desugar = desugar_charset,
.compile_to_rvm = cs_ctrvm,
.llvm = cs_llvm,
.higher = false,
};
......
#ifndef _GNU_SOURCE
#define _GNU_SOURCE // to obtain asprintf/vasprintf
#endif
#include "platform.h"
#include <stdio.h>
......