Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.


Select target project
No results found


Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (96)
......@@ -44,7 +44,10 @@ Installing
* mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings)
* [nunit]( (for testing .NET bindings)
To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug`.
To build, type `scons`.
To run the built-in test suite, type `scons test`.
To avoid the test dependencies, add `--no-tests`.
For a debug build, add `--variant=debug`.
To build bindings, pass a "bindings" argument to scons, e.g. `scons bindings=python`. `scons bindings=python test` will build Python bindings and run tests for both C and Python. `--variant=debug` is valid here too. You can build more than one set of bindings at a time; just separate them with commas, e.g. `scons bindings=python,perl`.
......@@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function
import os
import os.path
import platform
import subprocess
import sys
......@@ -21,6 +22,17 @@ tools = ['default', 'scanreplace']
if 'dotnet' in ARGUMENTS.get('bindings', []):
# add the clang tool if necessary
if os.getenv('CC') == 'clang' or platform.system() == 'Darwin':
# try to detect if cc happens to be clang by inspecting --version
cc = os.getenv('CC') or 'cc'
ver =[cc, '--version'], capture_output=True).stdout
if b'clang' in ver.split():
os.environ['CC'] = cc # make sure we call it as we saw it
envvars = {'PATH' : os.environ['PATH']}
if 'PKG_CONFIG_PATH' in os.environ:
envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH']
......@@ -91,18 +103,15 @@ AddOption('--in-place',
help='Build in-place, rather than in the build/<variant> tree')
default=env['PLATFORM'] != 'win32',
help='Build tests')
help='Do not build tests')
env['CC'] = os.getenv('CC') or env['CC']
env['CXX'] = os.getenv('CXX') or env['CXX']
if os.getenv('CC') == 'clang' or env['PLATFORM'] == 'darwin':
env['CFLAGS'] = os.getenv('CFLAGS') or env['CFLAGS']
# Language standard and warnings
if env['CC'] == 'cl':
......@@ -118,8 +127,15 @@ if env['CC'] == 'cl':
# -Wno-clobbered only really works with gcc >= 4.2.x, but ... scons
env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
if env['PLATFORM'] == 'darwin':
# It's reported -D_POSIX_C_SOURCE breaks the Mac OS build; I think we
# may need _DARWIN_C_SOURCE instead/in addition to, but let's wait to
# have access to a Mac to test/repo
env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Using -D_POSIX_C_SOURCE=200809L here, not on an ad-hoc basis when,
# #including, is important
env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Linker options
if env['PLATFORM'] == 'darwin':
......@@ -6,6 +6,9 @@ import os.path
Import('env testruns')
# Bump this if you break binary compatibility (e.g. renumber backends)
hammer_shlib_version = "1.0.0"
dist_headers = [
......@@ -21,7 +24,9 @@ parsers_headers = [
backends_headers = [
parsers = ['parsers/%s.c'%s for s in
......@@ -30,6 +35,7 @@ parsers = ['parsers/%s.c'%s for s in
......@@ -56,7 +62,7 @@ parsers = ['parsers/%s.c'%s for s in
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'params']]
misc_hammer_parts = [
......@@ -88,6 +94,7 @@ ctests = ['t_benchmark.c',
......@@ -109,7 +116,8 @@ libhammer_static = None
libhammer_shared = None
if build_shared_library:
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
if libhammer_shared is not None:
......@@ -61,7 +61,14 @@ static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
void* h_alloc(HAllocator* mm__, size_t size) {
void *p = mm__->alloc(mm__, size);
h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size);
h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size);
return p;
void* h_realloc(HAllocator* mm__, void* ptr, size_t size) {
void *p = mm__->realloc(mm__, ptr, size);
h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size);
return p;
......@@ -267,3 +274,32 @@ void h_allocator_stats(HArena *arena, HArenaStats *stats) {
stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
void* h_arena_realloc(HArena *arena, void* ptr, size_t n) {
struct arena_link *link;
void* ret;
size_t ncopy;
// XXX this is really wasteful, but maybe better than nothing?
// first, we walk the blocks to find our ptr. since we don't know how large
// the original allocation was, we must always make a new one and copy as
// much data from the old block as there could have been.
for (link = arena->head; link; link = link->next) {
if (ptr >= (void *)link->rest && ptr <= (void *)link->rest + link->used)
break; /* found it */
assert(link != NULL);
ncopy = (void *)link->rest + link->used - ptr;
if (n < ncopy)
ncopy = n;
ret = h_arena_malloc_noinit(arena, n);
assert(ret != NULL);
memcpy(ret, ptr, ncopy);
h_arena_free(arena, ptr);
return ret;
......@@ -48,6 +48,7 @@ typedef struct HAllocator_ {
} HAllocator;
void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2);
void* h_realloc(HAllocator* allocator, void* ptr, size_t size);
typedef struct HArena_ HArena ; // hidden implementation
......@@ -55,6 +56,7 @@ HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for def
void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_realloc(HArena *arena, void* ptr, size_t count);
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
void h_arena_set_except(HArena *arena, jmp_buf *except);
#include <assert.h>
#include "lr.h"
#include "params.h"
static bool glr_step(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action);
......@@ -174,9 +175,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
HSlistNode *x;
for(x=engines->head; x; x=x->next) {
HLREngine *eng = x->elem;
if(eng->state == engine->state) {
x->elem = lrengine_merge(eng, engine);
if(eng->state == engine->state && eng->input.index == engine->input.index) {
x->elem = lrengine_merge(eng, engine);
if(!x) // no merge happened
......@@ -241,12 +242,54 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
return result;
char * h_glr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
char * h_glr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
int h_glr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
HParserBackendVTable h__glr_backend_vtable = {
.compile = h_glr_compile,
.parse = h_glr_parse,
.free = h_glr_free
.free = h_glr_free,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "glr",
.backend_description = "GLR(k) parser backend",
.get_description_with_params = h_glr_get_description,
.get_short_name_with_params = h_glr_get_short_name,
.extract_params = h_glr_extract_params
#include <assert.h>
#include "contextfree.h"
#include "lr.h"
#include "params.h"
/* LALR-via-SLR grammar transformation */
......@@ -275,6 +275,7 @@ HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
size_t k = params? (uintptr_t)params : DEFAULT_KMAX;
// generate (augmented) CFG from parser
// construct LR(0) DFA
// build LR(0) table
......@@ -335,7 +336,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
if(match_any_production(table, eg, lhs, item->rhs, state)) {
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
const HStringMap *fs = h_follow(k, eg->grammar, lhs);
assert(fs != NULL);
assert(fs->epsilon_branch == NULL);
// NB: there is a case where fs can be empty: when reducing by lhs
......@@ -369,10 +370,43 @@ void h_lalr_free(HParser *parser)
HLRTable *table = parser->backend_data;
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
char * h_lalr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
char * h_lalr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
int h_lalr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
HParserBackendVTable h__lalr_backend_vtable = {
.compile = h_lalr_compile,
......@@ -380,11 +414,18 @@ HParserBackendVTable h__lalr_backend_vtable = {
.free = h_lalr_free,
.parse_start = h_lr_parse_start,
.parse_chunk = h_lr_parse_chunk,
.parse_finish = h_lr_parse_finish
.parse_finish = h_lr_parse_finish,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "lalr",
.backend_description = "LALR(k) parser backend",
.get_description_with_params = h_lalr_get_description,
.get_short_name_with_params = h_lalr_get_short_name,
.extract_params = h_lalr_extract_params
// dummy!
int test_lalr(void)
......@@ -2,8 +2,7 @@
#include "../internal.h"
#include "../cfgrammar.h"
#include "../parsers/parser_internal.h"
static const size_t DEFAULT_KMAX = 1;
#include "params.h"
/* Generating the LL(k) parse table */
......@@ -254,7 +253,8 @@ void h_llk_free(HParser *parser)
HLLkTable *table = parser->backend_data;
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
......@@ -606,6 +606,38 @@ HParseResult *h_llk_parse_finish(HSuspendedParser *s)
return llk_parse_finish_(s->mm__, s->backend_state);
char * h_llk_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LL";
size_t k, len;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
char * h_llk_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LL";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
int h_llk_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t *be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
HParserBackendVTable h__llk_backend_vtable = {
.compile = h_llk_compile,
......@@ -614,7 +646,19 @@ HParserBackendVTable h__llk_backend_vtable = {
.parse_start = h_llk_parse_start,
.parse_chunk = h_llk_parse_chunk,
.parse_finish = h_llk_parse_finish
.parse_finish = h_llk_parse_finish,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "llk",
.backend_description = "LL(k) parser backend",
.get_description_with_params = h_llk_get_description,
.get_short_name_with_params = h_llk_get_short_name,
/*extraction of params from string*/
.extract_params = h_llk_extract_params
#include "missing.h"
/* Placeholder backend that always fails */
int h_missing_compile(HAllocator* mm__, HParser* parser, const void* params) {
/* Always fail */
return -1;
HParseResult *h_missing_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) {
/* Always fail */
return NULL;
void h_missing_free(HParser *parser) {
/* No-op */
HParserBackendVTable h__missing_backend_vtable = {
.compile = h_missing_compile,
.parse = h_missing_parse,
.free = h_missing_free,
#include "../hammer.h"
#include "../internal.h"
#endif /* !defined(HAMMER_BACKENDS_MISSING__H) */
......@@ -34,37 +34,39 @@ HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) {
return ret;
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
HParseResult *tmp_res;
if (parser) {
HInputStream bak = state->input_stream;
tmp_res = parser->vtable->parse(parser->env, state);
if (tmp_res) {
tmp_res->arena = state->arena;
if (!state->input_stream.overrun) {
size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
if (tmp_res->bit_length == 0) { // Don't modify if forwarding.
tmp_res->bit_length = bit_length;
if (tmp_res->ast && tmp_res->ast->bit_length != 0) {
((HParsedToken*)(tmp_res->ast))->bit_length = bit_length;
} else
tmp_res->bit_length = 0;
} else
tmp_res = NULL;
if (state->input_stream.overrun)
return NULL; // overrun is always failure.
if (!tmp_res) {
state->input_stream = INVALID;
state->input_stream.input = key->input_pos.input;
// internal helper to perform an uncached parse and common error-handling
static inline
HParseResult *perform_lowlevel_parse(HParseState *state, const HParser *parser)
HParseResult *res;
HInputStream bak;
size_t len;
if (!parser)
return NULL;
bak = state->input_stream;
res = parser->vtable->parse(parser->env, state);
if (!res)
return NULL; // NB: input position is considered invalid on failure
// combinators' parse functions by design do not have to check for overrun.
// turn such bogus successes into parse failure.
if (state->input_stream.overrun) {
res->bit_length = 0;
return NULL;
return tmp_res;
// update result length
res->arena = state->arena;
len = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
if (res->bit_length == 0) // Don't modify if forwarding.
res->bit_length = len;
if (res->ast && res->ast->bit_length != 0)
((HParsedToken *)(res->ast))->bit_length = len;
return res;
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state, HHashValue keyhash) {
......@@ -240,8 +242,10 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
* so we check to see if we have one
if (!base || NULL == base->head) {
h_hashtable_put_precomp(state->cache, key,
cached_result(state, tmp_res), keyhash);
if (parser->vtable->higher) {
h_hashtable_put_precomp(state->cache, key,
cached_result(state, tmp_res), keyhash);
return tmp_res;
} else {
base->seed = tmp_res;
......@@ -261,13 +265,15 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
parser->backend_vtable = &h__packrat_backend_vtable;
parser->backend = PB_PACKRAT;
return 0; // No compilation necessary, and everything should work
// out of the box.
void h_packrat_free(HParser *parser) {
parser->backend = PB_PACKRAT; // revert to default, oh that's us
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
static uint32_t cache_key_hash(const void* key) {
......@@ -322,6 +328,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
parse_state->arena = arena;
parse_state->symbol_table = NULL;
HParseResult *res = h_do_parse(parser, parse_state);
*input_stream = parse_state->input_stream;
// tear down the parse state
......@@ -332,8 +339,121 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
return res;
// The following naive implementation of the iterative (chunked) parsing API
// concatenates chunks and blindly re-runs the full parse on every call to
// h_packrat_parse_chunk.
// NB: A full implementation will still have to concatenate the chunks to
// support arbitrary backtracking, but should be able save much, if not all, of
// the HParseState between calls.
// Cutting unneeded past input should also be possible but is complicated by
// the fact that only higher-order combinators are saved to the packrat cache,
// so former input to bare primitive combinators must remain available.
// Note: The iterative API expects us to always consume an entire input chunk
// when we suspend, even if packrat later backtracks into it. We will produce
// the correct parse result and accurately consume from a final chunk, but all
// earlier chunks will be reported as fully consumed and as being part of the
// HParseResult in terms of its bit_length field.
void h_packrat_parse_start(HSuspendedParser *s)
// nothing to do here, we allocate lazily below
bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input)
HAllocator *mm__ = s->mm__;
HParseResult *res;
HInputStream *cat;
size_t newlen;
if (s->backend_state == NULL) { // this is the first chunk
// attempt to finish the parse on just the given input.
res = h_packrat_parse(mm__, s->parser, input);
if (input->last_chunk || !input->overrun) {
s->backend_state = res; // pass on the result
return true; // and signal we're done
// we ran out of input and are expecting more
// allocate and initialize an input stream to concatenate the chunks
cat = h_new(HInputStream, 1);
*cat = *input;
cat->input = h_alloc(mm__, input->length);
memcpy((void *)cat->input, input->input, input->length);
s->backend_state = cat;
goto suspend;
// we have received additional input - append it to the saved stream
cat = s->backend_state;
assert(input->pos == cat->length);
if (input->length > SIZE_MAX - cat->length)
h_platform_errx(1, "input length would overflow");
newlen = cat->length + input->length;
cat->input = h_realloc(mm__, (void *)cat->input, newlen);
memcpy((void *)cat->input + cat->length, input->input, input->length);
cat->length = newlen;
cat->last_chunk = input->last_chunk;
// reset our input stream and call the parser on it (again)
cat->index = 0;
cat->bit_offset = 0;
cat->margin = 0;
cat->endianness = DEFAULT_ENDIANNESS;
cat->overrun = false;
res = h_packrat_parse(mm__, s->parser, cat);
assert(cat->index <= cat->length);
input->overrun = cat->overrun;
// suspend if the parser still needs more input
if (input->overrun && !input->last_chunk)
goto suspend;
// otherwise the parse is finished...
// report final input position
if (cat->index < input->pos) { // parser just needed some lookahead
input->index = 0; // don't consume this last chunk
input->bit_offset = 0;
input->margin = 0;
} else {
input->index = cat->index - input->pos;
input->bit_offset = cat->bit_offset;
input->margin = cat->margin;
input->endianness = cat->endianness;
// clean up and return the result
h_free((void *)cat->input);
s->backend_state = res;
return true; // don't call me again.
input->index = input->length; // consume the entire chunk on suspend
input->margin = 0;
input->bit_offset = 0;
return false; // come back with more input.
HParseResult *h_packrat_parse_finish(HSuspendedParser *s)
return s->backend_state;
HParserBackendVTable h__packrat_backend_vtable = {
.compile = h_packrat_compile,
.parse = h_packrat_parse,
.free = h_packrat_free
.free = h_packrat_free,
.parse_start = h_packrat_parse_start,
.parse_chunk = h_packrat_parse_chunk,
.parse_finish = h_packrat_parse_finish,
/* Name/param resolution functions */
.backend_short_name = "packrat",
.backend_description = "Packrat parser with Warth's recursion",
.get_description_with_params = h_get_description_with_no_params,
.get_short_name_with_params = h_get_short_name_with_no_params
#include "params.h"
size_t h_get_param_k(void *param) {
uintptr_t params_int;
params_int = (uintptr_t)param;
return (size_t)params_int;
char * h_format_description_with_param_k(HAllocator *mm__, const char *backend_name, size_t k){
const char *format_str = "%s(%zu) parser backend";
const char *generic_descr_format_str =
"%s(k) parser backend (default k is %zu)";
size_t len;
char *descr = NULL;
if (k > 0) {
/* A specific k was given */
/* Measure how big a buffer we need */
len = snprintf(NULL, 0, format_str, backend_name, k);
/* Allocate it and do the real snprintf */
descr = h_new(char, len + 1);
if (descr) {
snprintf(descr, len + 1, format_str, backend_name, k);
} else {
* No specific k, would use DEFAULT_KMAX. We say what DEFAULT_KMAX
* was compiled in in the description.
len = snprintf(NULL, 0, generic_descr_format_str, backend_name, DEFAULT_KMAX);
/* Allocate and do the real snprintf */
descr = h_new(char, len + 1);
if (descr) {
snprintf(descr, len + 1, generic_descr_format_str, backend_name, DEFAULT_KMAX);
return descr;
char * h_format_name_with_param_k(HAllocator *mm__, const char *backend_name, size_t k){
const char *format_str = "%s(%zu)", *generic_name = "%s(k)";
size_t len;
char *name = NULL;
if (k > 0) {
/* A specific k was given */
/* Measure how big a buffer we need */
len = snprintf(NULL, 0, format_str, backend_name, k);
/* Allocate it and do the real snprintf */
name = h_new(char, len + 1);
if (name) {
snprintf(name, len + 1, format_str, backend_name, k);
} else {
/* No specific k */
len = snprintf(NULL, 0, generic_name, backend_name, k);
name = h_new(char, len + 1);
if (name) {
snprintf(name, len + 1, generic_name, backend_name);
return name;
/*TODO better error handling*/
int h_extract_param_k(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
be_with_params->params = NULL;
int param_0 = -1;
int success = 0;
uintptr_t param;
size_t expected_params_len = 1;
backend_params_t params_t = be_with_params_t->params;
size_t actual_params_len = params_t.len;
if(actual_params_len >= expected_params_len) {
backend_param_with_name_t param_t = params_t.params[0];
success = sscanf((char*)param_t.param.param, "%d", &param_0);
if(success) {
param = (uintptr_t) param_0;
be_with_params->params = (void *)param;
return success;
#include "../hammer.h"
#include "../internal.h"
static const size_t DEFAULT_KMAX = 1;
size_t h_get_param_k(void *param);
char * h_format_description_with_param_k(HAllocator *mm__, const char *backend_name, size_t k);
char * h_format_name_with_param_k(HAllocator *mm__, const char *backend_name, size_t k);
int h_extract_param_k(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t);
#endif /* !defined(HAMMER_BACKENDS_PARAMS__H) */
......@@ -417,7 +417,8 @@ static void h_regex_free(HParser *parser) {
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
......@@ -452,7 +453,12 @@ static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInp
HParserBackendVTable h__regex_backend_vtable = {
.compile = h_regex_compile,
.parse = h_regex_parse,
.free = h_regex_free
.free = h_regex_free,
/* Name/param resolution functions */
.backend_short_name = "regex",
.backend_description = "Regular expression matcher (broken)",
.get_description_with_params = h_get_description_with_no_params,
.get_short_name_with_params = h_get_short_name_with_no_params
#ifndef NDEBUG
......@@ -258,7 +258,7 @@ static void remove_productions_with(HCFGrammar *g, const HCFChoice *x)
static void eliminate_dead_rules(HCFGrammar *g)
HHashTableEntry *hte;
const HCFChoice *symbol;
const HCFChoice *symbol = NULL;
size_t i;
bool found;
This diff is collapsed.
......@@ -41,7 +41,12 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
* Have a backend that always fails to pass around "no such backend"
* indications
......@@ -49,6 +54,26 @@ typedef enum HParserBackend_ {
} HParserBackend;
typedef struct HParserBackendVTable_ HParserBackendVTable;
typedef struct HParserBackendWithParams_ {
/* Name of backend extracted from a string if the choice of backend was specified in a call using a string */
char *requested_name;
/* The backend (if backend is to be loaded from an external module set to invalid (?))*/
HParserBackend backend;
/* Backend vtable (TODO: use this instead of the enum so we can get rid of that) */
HParserBackendVTable * backend_vtable;
* Backend-specific parameters - if this needs to be freed, the backend
* should provide a free_params method in its vtable; currently no backends
* do this - PB_PACKRAT and PB_REGULAR take no params, and PB_LLk, PB_LALR
* and PB_GLR take an integer cast to void *
void *params;
/* Allocator to use to free this (and the params if necessary) */
HAllocator *mm__;
} HParserBackendWithParams;
typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_;
......@@ -137,6 +162,7 @@ typedef struct HParserVtable_ HParserVtable;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
HParserBackendVTable * backend_vtable;
void* backend_data;
void *env;
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
......@@ -175,6 +201,53 @@ typedef bool (*HPredicate)(HParseResult *p, void* user_data);
typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
* For parser used when extracting name and params for backend by name
* TODO: possibly move to its own file?
enum BackendTokenType_ {
TT_backend_with_params_t = TT_USER,
typedef struct backend_param {
size_t len;
uint8_t *param;
uint8_t *param_name;
} backend_param_t;
typedef struct backend_param_name {
size_t len;
uint8_t *param_name;
size_t param_id;
} backend_param_name_t;
typedef struct backend_param_with_name {
backend_param_name_t param_name;
backend_param_t param;
} backend_param_with_name_t;
typedef struct {
uint8_t *name;
size_t len;
} backend_name_t;
typedef struct backend_params {
backend_param_with_name_t *params;
size_t len;
} backend_params_t;
typedef struct backend_with_params {
backend_name_t name;
backend_params_t params;
} backend_with_params_t;
// {{{ Stuff for benchmarking
typedef struct HParserTestcase_ {
unsigned char* input;
......@@ -262,6 +335,89 @@ typedef struct HBenchmarkResults_ {
#endif // SWIG
// }}}
* Ask if this backend is available
int h_is_backend_available(HParserBackend backend);
* Ask what the default backend is (currently always PB_PACKRAT)
HParserBackend h_get_default_backend(void);
HParserBackendVTable * h_get_default_backend_vtable(void);
* Copy a backend+params, using the backend-supplied copy method; the
* allocator used is the one passed in, or call the __m version with
* a NULL allocator to use the one from the source HParserBackendWithParams
HAMMER_FN_DECL(HParserBackendWithParams *, h_copy_backend_with_params,
HParserBackendWithParams *be_with_params);
* Free a backend+params
void h_free_backend_with_params(HParserBackendWithParams *be_with_params);
* Get a name string for a backend; this is constant per backend and so
* need not be freed; it will resolve to the backend under
* h_get_backend_by_name().
const char * h_get_name_for_backend(HParserBackend be);
* Get a name string for a backend with parameters; it is the caller's
* responsibility to free it later. This will resolve to the same
* backend and parameters under h_get_backend_with_params_by_name().
HAMMER_FN_DECL(char *, h_get_name_for_backend_with_params,
HParserBackendWithParams *be_with_params);
* Get a human-readable descriptive string for a backend; this is constant
* per backend and so need not be freed.
const char * h_get_descriptive_text_for_backend(HParserBackend be);
* Get a human-readable descriptive string for a backend with params; it is
* the caller's responsibility to free it later. Sorry, but it's allowed
* to depend on the params and putting keeping the buffer elsewhere and
* replacing it on the next call wouldn't be thread-safe.
HAMMER_FN_DECL(char *, h_get_descriptive_text_for_backend_with_params,
HParserBackendWithParams *be_with_params);
* Look up an HParserBackend by name; this should round-trip with
* h_get_name_for_backend().
HParserBackend h_query_backend_by_name(const char *name);
* Get a Hammer Backend with params from a string of the form
* backend_name(params) for example "lalr(1)".
* If the backend is one of the existing backends in the HBackend enum,
* HBackend will be populated in the result.
* Otherwise the result will save the name for use in attempts later at
* loading the named module.
HAMMER_FN_DECL(HParserBackendWithParams *, h_get_backend_with_params_by_name, const char *name_with_params);
* Top-level function to call a parser that has been built over some
......@@ -331,6 +487,14 @@ HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, con
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
* Returns a parser that parses the specified number of octets.
* The input does not have to be aligned to a byte boundary.
* Result token type: TT_BYTES
HAMMER_FN_DECL(HParser*, h_bytes, size_t len);
* Returns a parser that parses a signed 8-byte integer value.
......@@ -795,6 +959,8 @@ void h_pprintln(FILE* stream, const HParsedToken* tok);
* Consult each backend for details.
HAMMER_FN_DECL(int, h_compile_for_backend_with_params, HParser* parser, HParserBackendWithParams *be_with_params);
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
......@@ -69,6 +69,8 @@ extern HAllocator system_allocator;
typedef struct HCFStack_ HCFStack;
typedef struct HInputStream_ {
// This should be considered to be a really big value type.
const uint8_t *input;
......@@ -238,6 +240,33 @@ typedef struct HParserBackendVTable_ {
HParseResult *(*parse_finish)(HSuspendedParser *s);
// parse_finish must free s->backend_state.
// parse_finish will not be called before parse_chunk reports done.
/* The backend knows how to free its params */
void (*free_params)(HAllocator *mm__, void *p);
* ..and how to copy them
* Since the backend params need not actually be an allocated object,
* (and in fact no current backends use this, although it is permissible),
* but might (as in PB_GLR) be some numeric constant cast to void * which
* copy_params() should just pass through, we can't use returning NULL
* to signal allocation failure. Hence, passing the result out in a
* void ** and returning a status code (0 indicates success).
int (*copy_params)(HAllocator *mm__, void **out, void *in);
/* Description/name handling */
const char *backend_short_name;
const char *backend_description;
char * (*get_description_with_params)(HAllocator *mm__,
HParserBackend be,
void *params);
char * (*get_short_name_with_params)(HAllocator *mm__,
HParserBackend be,
void *params);
/* extract params from the input string */
int (*extract_params)(HParserBackendWithParams * be_with_params, backend_with_params_t *be_with_params_t);
} HParserBackendVTable;
......@@ -318,6 +347,7 @@ struct HBitWriter_ {
// Backends {{{
extern HParserBackendVTable h__missing_backend_vtable;
extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable;
extern HParserBackendVTable h__lalr_backend_vtable;
......@@ -326,27 +356,65 @@ extern HParserBackendVTable h__glr_backend_vtable;
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
* Helper functions for backend with params names and descriptions for
* backends which take no params.
char * h_get_description_with_no_params(HAllocator *mm__,
HParserBackend be, void *params);
char * h_get_short_name_with_no_params(HAllocator *mm__,
HParserBackend be, void *params);
int64_t h_read_bits(HInputStream* state, int count, char signed_p);
void h_skip_bits(HInputStream* state, size_t count);
void h_seek_bits(HInputStream* state, size_t pos);
static inline size_t h_input_stream_pos(HInputStream* state) {
assert(state->index < SIZE_MAX / 8);
return state->index * 8 + state->bit_offset + state->margin;
assert(state->pos <= SIZE_MAX - state->index);
assert(state->pos + state->index < SIZE_MAX / 8);
return (state->pos + state->index) * 8 + state->bit_offset + state->margin;
static inline size_t h_input_stream_length(HInputStream *state) {
assert(state->length <= SIZE_MAX / 8);
return state->length * 8;
assert(state->pos <= SIZE_MAX - state->length);
assert(state->pos + state->length <= SIZE_MAX / 8);
return (state->pos + state->length) * 8;
// need to decide if we want to make this public.
HParseResult* h_do_parse(const HParser* parser, HParseState *state);
void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);
* Inline this for benefit of h_new_parser() below, then make
* the API h_get_default_backend() call it.
static inline HParserBackend h_get_default_backend__int(void) {
return PB_PACKRAT;
static inline HParserBackendVTable * h_get_default_backend_vtable__int(void) {
return &h__packrat_backend_vtable;
static inline HParserBackendVTable * h_get_missing_backend_vtable__int(void) {
return &h__missing_backend_vtable;
/* copy_params for backends where the parameter is not actually a pointer */
int h_copy_numeric_param(HAllocator *mm__, void **out, void *in);
static inline
HParser *h_new_parser(HAllocator *mm__, const HParserVtable *vt, void *env) {
HParser *p = h_new(HParser, 1);
memset(p, 0, sizeof(HParser));
p->vtable = vt;
p->env = env;
* Current limitation: if we specify backends solely by HParserBackend, we
* can't set a default backend that requires any parameters to h_compile()
p->backend = h_get_default_backend__int();
p->backend_vtable = h_get_default_backend_vtable__int();
return p;
......@@ -3,18 +3,18 @@
static HParseResult *parse_and(void* env, HParseState* state) {
HInputStream bak = state->input_stream;
HParseResult *res = h_do_parse((HParser*)env, state);
if (!res)
return NULL; // propagate failed input state, esp. overrun
state->input_stream = bak;
if (res)
return make_result(state->arena, NULL);
return NULL;
return make_result(state->arena, NULL);
static const HParserVtable and_vt = {
.parse = parse_and,
.isValidRegular = h_false, /* TODO: strictly speaking this should be regular,
but it will be a huge amount of work and difficult
to get right, so we're leaving it for a future
revision. --mlp, 18/12/12 */
but it will be a huge amount of work and
difficult to get right, so we're leaving it for
a future revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */
.compile_to_rvm = h_not_regular,
.higher = true,
......@@ -21,8 +21,7 @@ static void *aa_alloc(HAllocator *allocator, size_t size)
static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size)
HArena *arena = ((ArenaAllocator *)allocator)->arena;
assert(((void)"XXX need realloc for arena allocator", 0));
return NULL;
return h_arena_realloc(arena, ptr, size);
static void aa_free(HAllocator *allocator, void *ptr)