Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (151)
Showing with 724 additions and 84 deletions
......@@ -3,7 +3,7 @@
# and kick off a recursive make
# Also, "make src/all" turns into "make -C src all"
SUBDIRS = src examples jni
SUBDIRS = src examples src/bindings/jni
include config.mk
TOPLEVEL=.
......
......@@ -44,7 +44,10 @@ Installing
* mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings)
* [nunit](http://www.nunit.org/) (for testing .NET bindings)
To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug`.
To build, type `scons`.
To run the built-in test suite, type `scons test`.
To avoid the test dependencies, add `--no-tests`.
For a debug build, add `--variant=debug`.
To build bindings, pass a "bindings" argument to scons, e.g. `scons bindings=python`. `scons bindings=python test` will build Python bindings and run tests for both C and Python. `--variant=debug` is valid here too. You can build more than one set of bindings at a time; just separate them with commas, e.g. `scons bindings=python,perl`.
......
......@@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function
import os
import os.path
import platform
import subprocess
import sys
default_install_dir='/usr/local'
......@@ -14,13 +15,24 @@ if platform.system() == 'Windows':
vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'jni', 'perl', 'php', 'python', 'ruby']))
vars.Add('python', 'Python interpreter', 'python')
tools = ['default', 'scanreplace']
if 'dotnet' in ARGUMENTS.get('bindings', []):
tools.append('csharp/mono')
# add the clang tool if necessary
if os.getenv('CC') == 'clang' or platform.system() == 'Darwin':
tools.append('clang')
else:
# try to detect if cc happens to be clang by inspecting --version
cc = os.getenv('CC') or 'cc'
ver = subprocess.run([cc, '--version'], capture_output=True).stdout
if b'clang' in ver.split():
tools.append('clang')
os.environ['CC'] = cc # make sure we call it as we saw it
envvars = {'PATH' : os.environ['PATH']}
if 'PKG_CONFIG_PATH' in os.environ:
envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH']
......@@ -91,18 +103,15 @@ AddOption('--in-place',
action='store_true',
help='Build in-place, rather than in the build/<variant> tree')
AddOption('--tests',
AddOption('--no-tests',
dest='with_tests',
default=env['PLATFORM'] != 'win32',
action='store_true',
help='Build tests')
action='store_false',
help='Do not build tests')
env['CC'] = os.getenv('CC') or env['CC']
env['CXX'] = os.getenv('CXX') or env['CXX']
if os.getenv('CC') == 'clang' or env['PLATFORM'] == 'darwin':
env.Replace(CC='clang',
CXX='clang++')
env['CFLAGS'] = os.getenv('CFLAGS') or env['CFLAGS']
# Language standard and warnings
if env['CC'] == 'cl':
......@@ -118,8 +127,15 @@ if env['CC'] == 'cl':
]
)
else:
# -Wno-clobbered only really works with gcc >= 4.2.x, but ... scons
env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wno-clobbered')
if env['PLATFORM'] == 'darwin':
# It's reported -D_POSIX_C_SOURCE breaks the Mac OS build; I think we
# may need _DARWIN_C_SOURCE instead/in addition to, but let's wait to
# have access to a Mac to test/repo
env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
else:
# Using -D_POSIX_C_SOURCE=200809L here, not on an ad-hoc basis when,
# #including, is important
env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Linker options
if env['PLATFORM'] == 'darwin':
......
......@@ -6,6 +6,9 @@ import os.path
Import('env testruns')
# Bump this if you break binary compatibility (e.g. renumber backends)
hammer_shlib_version = "1.0.0"
dist_headers = [
'hammer.h',
'allocator.h',
......@@ -21,7 +24,9 @@ parsers_headers = [
backends_headers = [
'backends/regex.h',
'backends/contextfree.h'
'backends/contextfree.h',
'backends/missing.h',
'backends/params.h'
]
parsers = ['parsers/%s.c'%s for s in
......@@ -30,6 +35,7 @@ parsers = ['parsers/%s.c'%s for s in
'attr_bool',
'bind',
'bits',
'bytes',
'butnot',
'ch',
'charset',
......@@ -56,7 +62,7 @@ parsers = ['parsers/%s.c'%s for s in
'seek']]
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'params']]
misc_hammer_parts = [
'allocator.c',
......@@ -88,6 +94,7 @@ ctests = ['t_benchmark.c',
't_grammar.c',
't_misc.c',
't_mm.c',
't_names.c',
't_regression.c']
......@@ -109,7 +116,8 @@ libhammer_static = None
libhammer_shared = None
if build_shared_library:
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
SHLIBVERSION=hammer_shlib_version)
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
if libhammer_shared is not None:
......
......@@ -61,7 +61,14 @@ static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
void* h_alloc(HAllocator* mm__, size_t size) {
void *p = mm__->alloc(mm__, size);
if(!p)
h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size);
h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size);
return p;
}
void* h_realloc(HAllocator* mm__, void* ptr, size_t size) {
void *p = mm__->realloc(mm__, ptr, size);
if(!p)
h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size);
return p;
}
......@@ -267,3 +274,32 @@ void h_allocator_stats(HArena *arena, HArenaStats *stats) {
stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
#endif
}
void* h_arena_realloc(HArena *arena, void* ptr, size_t n) {
struct arena_link *link;
void* ret;
size_t ncopy;
// XXX this is really wasteful, but maybe better than nothing?
//
// first, we walk the blocks to find our ptr. since we don't know how large
// the original allocation was, we must always make a new one and copy as
// much data from the old block as there could have been.
for (link = arena->head; link; link = link->next) {
if (ptr >= (void *)link->rest && ptr <= (void *)link->rest + link->used)
break; /* found it */
}
assert(link != NULL);
ncopy = (void *)link->rest + link->used - ptr;
if (n < ncopy)
ncopy = n;
ret = h_arena_malloc_noinit(arena, n);
assert(ret != NULL);
memcpy(ret, ptr, ncopy);
h_arena_free(arena, ptr);
return ret;
}
......@@ -48,6 +48,7 @@ typedef struct HAllocator_ {
} HAllocator;
void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2);
void* h_realloc(HAllocator* allocator, void* ptr, size_t size);
typedef struct HArena_ HArena ; // hidden implementation
......@@ -55,6 +56,7 @@ HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for def
void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_realloc(HArena *arena, void* ptr, size_t count);
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
void h_arena_set_except(HArena *arena, jmp_buf *except);
......
#include <assert.h>
#include "lr.h"
#include "params.h"
static bool glr_step(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action);
......@@ -174,9 +175,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
HSlistNode *x;
for(x=engines->head; x; x=x->next) {
HLREngine *eng = x->elem;
if(eng->state == engine->state) {
x->elem = lrengine_merge(eng, engine);
break;
if(eng->state == engine->state && eng->input.index == engine->input.index) {
x->elem = lrengine_merge(eng, engine);
break;
}
}
if(!x) // no merge happened
......@@ -225,6 +226,8 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
HLREngine *engine = h_slist_pop(engines);
const HLRAction *action = h_lrengine_action(engine);
glr_step(&result, engback, engine, action);
// XXX detect ambiguous results - two engines terminating at the same pos
// -> kill both engines, i.e. ignore if there is a later unamb. success
}
// swap the lists
......@@ -239,12 +242,54 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
return result;
}
char * h_glr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
}
char * h_glr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
}
int h_glr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
}
HParserBackendVTable h__glr_backend_vtable = {
.compile = h_glr_compile,
.parse = h_glr_parse,
.free = h_glr_free
.free = h_glr_free,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "glr",
.backend_description = "GLR(k) parser backend",
.get_description_with_params = h_glr_get_description,
.get_short_name_with_params = h_glr_get_short_name,
.extract_params = h_glr_extract_params
};
......
#include <assert.h>
#include "contextfree.h"
#include "lr.h"
#include "params.h"
/* LALR-via-SLR grammar transformation */
......@@ -31,18 +31,24 @@ static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
{
HLRAction *action = lrtable_lookup(table, x, A);
assert(action != NULL);
// we are interested in a transition out of state x, i.e. a shift action.
// while there could also be reduce actions associated with A in state x,
// those are not what we are here for. so if action is a conflict, search it
// for the shift. there will only be one and it will be the bottom element.
if(action->type == HLR_CONFLICT) {
HSlistNode *x;
for(x=action->branches->head; x; x=x->next) {
action = x->elem;
assert(action->type != HLR_CONFLICT); // no nesting of conflicts
if(action->type == HLR_SHIFT)
break;
}
assert(x != NULL && x->next == NULL); // shift found at the bottom
}
assert(action->type == HLR_SHIFT);
return action->nextstate;
}
static inline HLRTransition *transition(HArena *arena,
size_t x, const HCFChoice *A, size_t y)
{
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
t->from = x;
t->symbol = A;
t->to = y;
return t;
return action->nextstate;
}
// no-op on terminal symbols
......@@ -69,8 +75,8 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
HCFChoice **iBj = items;
for(; *B; B++, iBj++) {
size_t j = follow_transition(table, i, *B);
HLRTransition *i_B_j = transition(arena, i, *B, j);
*iBj = h_hashtable_get(eg->tmap, i_B_j);
HLRTransition i_B_j = {i, *B, j};
*iBj = h_hashtable_get(eg->tmap, &i_B_j);
assert(*iBj != NULL);
i = j;
}
......@@ -269,6 +275,7 @@ HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
size_t k = params? (uintptr_t)params : DEFAULT_KMAX;
// generate (augmented) CFG from parser
// construct LR(0) DFA
// build LR(0) table
......@@ -329,10 +336,14 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
if(match_any_production(table, eg, lhs, item->rhs, state)) {
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
const HStringMap *fs = h_follow(k, eg->grammar, lhs);
assert(fs != NULL);
assert(fs->epsilon_branch == NULL);
assert(!h_stringmap_empty(fs));
// NB: there is a case where fs can be empty: when reducing by lhs
// would lead to certain parse failure, by means of h_nothing_p()
// for instance. in that case, the below code correctly adds no
// reduce action.
assert(!h_stringmap_empty(fs)); // XXX
// for each lookahead symbol, put action into table cell
if(terminals_put(table->tmap[state], fs, action) < 0)
......@@ -345,6 +356,8 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
}
}
h_cfgrammar_free(eg->grammar);
}
h_cfgrammar_free(g);
......@@ -357,10 +370,43 @@ void h_lalr_free(HParser *parser)
HLRTable *table = parser->backend_data;
h_lrtable_free(table);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
}
char * h_lalr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
}
char * h_lalr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
}
int h_lalr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
}
HParserBackendVTable h__lalr_backend_vtable = {
.compile = h_lalr_compile,
......@@ -368,11 +414,18 @@ HParserBackendVTable h__lalr_backend_vtable = {
.free = h_lalr_free,
.parse_start = h_lr_parse_start,
.parse_chunk = h_lr_parse_chunk,
.parse_finish = h_lr_parse_finish
};
.parse_finish = h_lr_parse_finish,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "lalr",
.backend_description = "LALR(k) parser backend",
.get_description_with_params = h_lalr_get_description,
.get_short_name_with_params = h_lalr_get_short_name,
.extract_params = h_lalr_extract_params
};
// dummy!
int test_lalr(void)
......
......@@ -2,8 +2,7 @@
#include "../internal.h"
#include "../cfgrammar.h"
#include "../parsers/parser_internal.h"
static const size_t DEFAULT_KMAX = 1;
#include "params.h"
/* Generating the LL(k) parse table */
......@@ -254,7 +253,8 @@ void h_llk_free(HParser *parser)
HLLkTable *table = parser->backend_data;
h_llktable_free(table);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
}
......@@ -606,6 +606,38 @@ HParseResult *h_llk_parse_finish(HSuspendedParser *s)
return llk_parse_finish_(s->mm__, s->backend_state);
}
char * h_llk_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LL";
size_t k, len;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
}
char * h_llk_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LL";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
}
int h_llk_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t *be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
}
HParserBackendVTable h__llk_backend_vtable = {
.compile = h_llk_compile,
......@@ -614,7 +646,19 @@ HParserBackendVTable h__llk_backend_vtable = {
.parse_start = h_llk_parse_start,
.parse_chunk = h_llk_parse_chunk,
.parse_finish = h_llk_parse_finish
.parse_finish = h_llk_parse_finish,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "llk",
.backend_description = "LL(k) parser backend",
.get_description_with_params = h_llk_get_description,
.get_short_name_with_params = h_llk_get_short_name,
/*extraction of params from string*/
.extract_params = h_llk_extract_params
};
......
#include "missing.h"
/* Placeholder backend that always fails */
int h_missing_compile(HAllocator* mm__, HParser* parser, const void* params) {
/* Always fail */
return -1;
}
HParseResult *h_missing_parse(HAllocator* mm__, const HParser* parser, HInputStream* stream) {
/* Always fail */
return NULL;
}
void h_missing_free(HParser *parser) {
/* No-op */
}
HParserBackendVTable h__missing_backend_vtable = {
.compile = h_missing_compile,
.parse = h_missing_parse,
.free = h_missing_free,
};
#ifndef HAMMER_BACKENDS_MISSING__H
#define HAMMER_BACKENDS_MISSING__H
#include "../hammer.h"
#include "../internal.h"
#endif /* !defined(HAMMER_BACKENDS_MISSING__H) */
......@@ -34,37 +34,39 @@ HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) {
return ret;
}
// Really library-internal tool to perform an uncached parse, and handle any common error-handling.
static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) {
// TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when.
HParseResult *tmp_res;
if (parser) {
HInputStream bak = state->input_stream;
tmp_res = parser->vtable->parse(parser->env, state);
if (tmp_res) {
tmp_res->arena = state->arena;
if (!state->input_stream.overrun) {
size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
if (tmp_res->bit_length == 0) { // Don't modify if forwarding.
tmp_res->bit_length = bit_length;
}
if (tmp_res->ast && tmp_res->ast->bit_length != 0) {
((HParsedToken*)(tmp_res->ast))->bit_length = bit_length;
}
} else
tmp_res->bit_length = 0;
}
} else
tmp_res = NULL;
if (state->input_stream.overrun)
return NULL; // overrun is always failure.
#ifdef CONSISTENCY_CHECK
if (!tmp_res) {
state->input_stream = INVALID;
state->input_stream.input = key->input_pos.input;
// internal helper to perform an uncached parse and common error-handling
static inline
HParseResult *perform_lowlevel_parse(HParseState *state, const HParser *parser)
{
HParseResult *res;
HInputStream bak;
size_t len;
if (!parser)
return NULL;
bak = state->input_stream;
res = parser->vtable->parse(parser->env, state);
if (!res)
return NULL; // NB: input position is considered invalid on failure
// combinators' parse functions by design do not have to check for overrun.
// turn such bogus successes into parse failure.
if (state->input_stream.overrun) {
res->bit_length = 0;
return NULL;
}
#endif
return tmp_res;
// update result length
res->arena = state->arena;
len = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak);
if (res->bit_length == 0) // Don't modify if forwarding.
res->bit_length = len;
if (res->ast && res->ast->bit_length != 0)
((HParsedToken *)(res->ast))->bit_length = len;
return res;
}
HParserCacheValue* recall(HParserCacheKey *k, HParseState *state, HHashValue keyhash) {
......@@ -240,8 +242,10 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
* so we check to see if we have one
*/
if (!base || NULL == base->head) {
h_hashtable_put_precomp(state->cache, key,
cached_result(state, tmp_res), keyhash);
if (parser->vtable->higher) {
h_hashtable_put_precomp(state->cache, key,
cached_result(state, tmp_res), keyhash);
}
return tmp_res;
} else {
base->seed = tmp_res;
......@@ -261,13 +265,15 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) {
}
int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) {
parser->backend_vtable = &h__packrat_backend_vtable;
parser->backend = PB_PACKRAT;
return 0; // No compilation necessary, and everything should work
// out of the box.
}
void h_packrat_free(HParser *parser) {
parser->backend = PB_PACKRAT; // revert to default, oh that's us
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
}
static uint32_t cache_key_hash(const void* key) {
......@@ -322,6 +328,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
parse_state->arena = arena;
parse_state->symbol_table = NULL;
HParseResult *res = h_do_parse(parser, parse_state);
*input_stream = parse_state->input_stream;
h_slist_free(parse_state->lr_stack);
h_hashtable_free(parse_state->recursion_heads);
// tear down the parse state
......@@ -332,8 +339,121 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr
return res;
}
// The following naive implementation of the iterative (chunked) parsing API
// concatenates chunks and blindly re-runs the full parse on every call to
// h_packrat_parse_chunk.
//
// NB: A full implementation will still have to concatenate the chunks to
// support arbitrary backtracking, but should be able save much, if not all, of
// the HParseState between calls.
// Cutting unneeded past input should also be possible but is complicated by
// the fact that only higher-order combinators are saved to the packrat cache,
// so former input to bare primitive combinators must remain available.
//
// Note: The iterative API expects us to always consume an entire input chunk
// when we suspend, even if packrat later backtracks into it. We will produce
// the correct parse result and accurately consume from a final chunk, but all
// earlier chunks will be reported as fully consumed and as being part of the
// HParseResult in terms of its bit_length field.
void h_packrat_parse_start(HSuspendedParser *s)
{
// nothing to do here, we allocate lazily below
}
bool h_packrat_parse_chunk(HSuspendedParser *s, HInputStream *input)
{
HAllocator *mm__ = s->mm__;
HParseResult *res;
HInputStream *cat;
size_t newlen;
if (s->backend_state == NULL) { // this is the first chunk
// attempt to finish the parse on just the given input.
res = h_packrat_parse(mm__, s->parser, input);
if (input->last_chunk || !input->overrun) {
s->backend_state = res; // pass on the result
return true; // and signal we're done
}
// we ran out of input and are expecting more
// allocate and initialize an input stream to concatenate the chunks
cat = h_new(HInputStream, 1);
*cat = *input;
cat->input = h_alloc(mm__, input->length);
memcpy((void *)cat->input, input->input, input->length);
s->backend_state = cat;
goto suspend;
}
// we have received additional input - append it to the saved stream
cat = s->backend_state;
assert(input->pos == cat->length);
if (input->length > SIZE_MAX - cat->length)
h_platform_errx(1, "input length would overflow");
newlen = cat->length + input->length;
cat->input = h_realloc(mm__, (void *)cat->input, newlen);
memcpy((void *)cat->input + cat->length, input->input, input->length);
cat->length = newlen;
cat->last_chunk = input->last_chunk;
// reset our input stream and call the parser on it (again)
cat->index = 0;
cat->bit_offset = 0;
cat->margin = 0;
cat->endianness = DEFAULT_ENDIANNESS;
cat->overrun = false;
res = h_packrat_parse(mm__, s->parser, cat);
assert(cat->index <= cat->length);
input->overrun = cat->overrun;
// suspend if the parser still needs more input
if (input->overrun && !input->last_chunk)
goto suspend;
// otherwise the parse is finished...
// report final input position
if (cat->index < input->pos) { // parser just needed some lookahead
input->index = 0; // don't consume this last chunk
input->bit_offset = 0;
input->margin = 0;
} else {
input->index = cat->index - input->pos;
input->bit_offset = cat->bit_offset;
input->margin = cat->margin;
input->endianness = cat->endianness;
}
// clean up and return the result
h_free((void *)cat->input);
h_free(cat);
s->backend_state = res;
return true; // don't call me again.
suspend:
input->index = input->length; // consume the entire chunk on suspend
input->margin = 0;
input->bit_offset = 0;
return false; // come back with more input.
}
HParseResult *h_packrat_parse_finish(HSuspendedParser *s)
{
return s->backend_state;
}
HParserBackendVTable h__packrat_backend_vtable = {
.compile = h_packrat_compile,
.parse = h_packrat_parse,
.free = h_packrat_free
.free = h_packrat_free,
.parse_start = h_packrat_parse_start,
.parse_chunk = h_packrat_parse_chunk,
.parse_finish = h_packrat_parse_finish,
/* Name/param resolution functions */
.backend_short_name = "packrat",
.backend_description = "Packrat parser with Warth's recursion",
.get_description_with_params = h_get_description_with_no_params,
.get_short_name_with_params = h_get_short_name_with_no_params
};
#include "params.h"
size_t h_get_param_k(void *param) {
uintptr_t params_int;
params_int = (uintptr_t)param;
return (size_t)params_int;
}
char * h_format_description_with_param_k(HAllocator *mm__, const char *backend_name, size_t k){
const char *format_str = "%s(%zu) parser backend";
const char *generic_descr_format_str =
"%s(k) parser backend (default k is %zu)";
size_t len;
char *descr = NULL;
if (k > 0) {
/* A specific k was given */
/* Measure how big a buffer we need */
len = snprintf(NULL, 0, format_str, backend_name, k);
/* Allocate it and do the real snprintf */
descr = h_new(char, len + 1);
if (descr) {
snprintf(descr, len + 1, format_str, backend_name, k);
}
} else {
/*
* No specific k, would use DEFAULT_KMAX. We say what DEFAULT_KMAX
* was compiled in in the description.
*/
len = snprintf(NULL, 0, generic_descr_format_str, backend_name, DEFAULT_KMAX);
/* Allocate and do the real snprintf */
descr = h_new(char, len + 1);
if (descr) {
snprintf(descr, len + 1, generic_descr_format_str, backend_name, DEFAULT_KMAX);
}
}
return descr;
}
char * h_format_name_with_param_k(HAllocator *mm__, const char *backend_name, size_t k){
const char *format_str = "%s(%zu)", *generic_name = "%s(k)";
size_t len;
char *name = NULL;
if (k > 0) {
/* A specific k was given */
/* Measure how big a buffer we need */
len = snprintf(NULL, 0, format_str, backend_name, k);
/* Allocate it and do the real snprintf */
name = h_new(char, len + 1);
if (name) {
snprintf(name, len + 1, format_str, backend_name, k);
}
} else {
/* No specific k */
len = snprintf(NULL, 0, generic_name, backend_name, k);
name = h_new(char, len + 1);
if (name) {
snprintf(name, len + 1, generic_name, backend_name);
}
}
return name;
}
/*TODO better error handling*/
int h_extract_param_k(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
be_with_params->params = NULL;
int param_0 = -1;
int success = 0;
uintptr_t param;
size_t expected_params_len = 1;
backend_params_t params_t = be_with_params_t->params;
size_t actual_params_len = params_t.len;
if(actual_params_len >= expected_params_len) {
backend_param_with_name_t param_t = params_t.params[0];
success = sscanf((char*)param_t.param.param, "%d", &param_0);
}
if(success) {
param = (uintptr_t) param_0;
be_with_params->params = (void *)param;
}
return success;
}
#ifndef HAMMER_BACKENDS_PARAMS__H
#define HAMMER_BACKENDS_PARAMS__H
#include "../hammer.h"
#include "../internal.h"
static const size_t DEFAULT_KMAX = 1;
size_t h_get_param_k(void *param);
char * h_format_description_with_param_k(HAllocator *mm__, const char *backend_name, size_t k);
char * h_format_name_with_param_k(HAllocator *mm__, const char *backend_name, size_t k);
int h_extract_param_k(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t);
#endif /* !defined(HAMMER_BACKENDS_PARAMS__H) */
......@@ -417,7 +417,8 @@ static void h_regex_free(HParser *parser) {
h_free(prog->actions);
h_free(prog);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
}
static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params) {
......@@ -452,7 +453,12 @@ static HParseResult *h_regex_parse(HAllocator* mm__, const HParser* parser, HInp
HParserBackendVTable h__regex_backend_vtable = {
.compile = h_regex_compile,
.parse = h_regex_parse,
.free = h_regex_free
.free = h_regex_free,
/* Name/param resolution functions */
.backend_short_name = "regex",
.backend_description = "Regular expression matcher (broken)",
.get_description_with_params = h_get_description_with_no_params,
.get_short_name_with_params = h_get_short_name_with_no_params
};
#ifndef NDEBUG
......
#!python
from __future__ import absolute_import, division, print_function
import os
import sys
def walkDirs(path):
"""helper function to get a list of all subdirectories"""
def addDirs(pathlist, dirname, names):
"""internal function to pass to os.walk"""
print("in addDirs")
for n in names:
f = os.path.join(dirname, n)
if os.path.isdir(f):
pathlist.append(f)
pathlist = [path]
os.walk(path, addDirs, pathlist)
print(pathlist)
return pathlist
def ConfigureJNI(env):
"""Configure the given environment for compiling Java Native Interface
c or c++ language files."""
print( "Configuring JNI includes")
if not env.get('JAVAC'):
print( "The Java compiler must be installed and in the current path.")
return 0
# first look for a shell variable called JAVA_HOME
java_base = os.environ.get('JAVA_HOME')
if not java_base:
if sys.platform == 'darwin':
# Apple's OS X has its own special java base directory
java_base = '/System/Library/Frameworks/JavaVM.framework'
else:
# Search for the java compiler
print ("JAVA_HOME environment variable is not set. Searching for java... ")
jcdir = os.path.dirname(env.WhereIs('javac'))
if not jcdir:
print( "not found.")
return 0
# assuming the compiler found is in some directory like
# /usr/jdkX.X/bin/javac, java's home directory is /usr/jdkX.X
java_base = os.path.join(jcdir, "..")
print( "found.")
if sys.platform == 'cygwin':
# Cygwin and Sun Java have different ideas of how path names
# are defined. Use cygpath to convert the windows path to
# a cygwin path. i.e. C:\jdkX.X to /cygdrive/c/jdkX.X
java_base = os.popen("cygpath -up '"+java_base+"'").read().replace( \
'\n', '')
if sys.platform == 'darwin':
# Apple does not use Sun's naming convention
java_headers = [os.path.join(java_base, 'Headers')]
java_libs = [os.path.join(java_base, 'Libraries')]
else:
# windows and linux
java_headers = [os.path.join(java_base, 'include')]
java_libs = [os.path.join(java_base, 'lib')]
# Sun's windows and linux JDKs keep system-specific header
# files in a sub-directory of include
if java_base == '/usr' or java_base == '/usr/local':
# too many possible subdirectories. Just use defaults
java_headers.append(os.path.join(java_headers[0], 'win32'))
java_headers.append(os.path.join(java_headers[0], 'linux'))
java_headers.append(os.path.join(java_headers[0], 'solaris'))
else:
# add all subdirs of 'include'. The system specific headers
# should be in there somewhere
java_headers = walkDirs(java_headers[0])
if not any(os.path.exists(os.path.join(path, 'jni.h'))
for path in java_headers):
print("Can't find jni.h in %s" % java_headers)
return 0
# add Java's include and lib directory to the environment
java_headers.append(os.path.join(java_headers[0], 'linux'))
env.Append(CPPPATH = java_headers)
env.Append(LIBPATH = java_libs)
# add any special platform-specific compilation or linking flags
if sys.platform == 'darwin':
env.Append(SHLINKFLAGS = '-dynamiclib -framework JavaVM')
env['SHLIBSUFFIX'] = '.jnilib'
elif sys.platform == 'cygwin':
env.Append(CCFLAGS = '-mno-cygwin')
env.Append(SHLINKFLAGS = '-mno-cygwin -Wl,--kill-at')
# Add extra potentially useful environment variables
env['JAVA_HOME'] = java_base
env['JNI_CPPPATH'] = java_headers
env['JNI_LIBPATH'] = java_libs
return 1
\ No newline at end of file
......@@ -84,6 +84,20 @@ public static void main(String args[])
Parser i3parsers[] = {Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()};
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()), i3, i3.length));
out("permutation");
byte ch3[] = {(byte) 'a', (byte) 'b', (byte) 'c'};
handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'a'), Hammer.ch((byte)'b'), Hammer.ch((byte)'c')), ch3, ch3.length));
handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'b'), Hammer.ch((byte)'a'), Hammer.ch((byte)'c')), ch3, ch3.length));
out("skip");
byte ch6[] = {(byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'};
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.skip((int)32), Hammer.ch((byte)'f')), ch6, ch6.length));
out("seek");
final int SEEK_SET = 0; /* Seek from beginning of file. */
//final int SEEK_CUR = 1; /* Seek from current position. */
//final int SEEK_END = 2; /* Seek from end of file. */
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.seek((int)40, (int)SEEK_SET), Hammer.ch((byte)'f')), ch6, ch6.length));
}
......
......@@ -5,17 +5,18 @@ CSOURCES := com_upstandinghackers_hammer_Hammer.c com_upstandinghackers_hammer_P
# ls *.h *.o *.so com/upstandinghackers/hammer/*.class | grep -v jhammer.h | tr '\n' ' '; replace single $ with $$
OUTPUTS := com/upstandinghackers/hammer/Action.class com/upstandinghackers/hammer/Hammer.class com_upstandinghackers_hammer_Hammer.h com_upstandinghackers_hammer_Hammer.o com/upstandinghackers/hammer/Hammer\$TokenType.class com_upstandinghackers_hammer_Hammer_TokenType.h com/upstandinghackers/hammer/ParsedToken.class com_upstandinghackers_hammer_ParsedToken.h com_upstandinghackers_hammer_ParsedToken.o com/upstandinghackers/hammer/Parser.class com/upstandinghackers/hammer/ParseResult.class com_upstandinghackers_hammer_ParseResult.h com_upstandinghackers_hammer_ParseResult.o com_upstandinghackers_hammer_Parser.h com_upstandinghackers_hammer_Parser.o com/upstandinghackers/hammer/Predicate.class libjhammer.so
TOPLEVEL := ../
TOPLEVEL := ../../../
JC=javac
JH=javah
CP=com/upstandinghackers/hammer
PACKAGE=com.upstandinghackers.hammer
include ../common.mk
include ../../../common.mk
JNI_INCLUDE := /usr/lib/jvm/java-6-openjdk/include/
CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
JNI_INCLUDE := /usr/lib/jvm/java-8-oracle/include/
JNI_INCLUDE_LINUX := /usr/lib/jvm/java-8-oracle/include/linux
CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE) -I $(JNI_INCLUDE_LINUX)
%.java: $(call ifsilent,| $(HUSH))
$(call hush, "Compiling Java source $@") $(JC) $(CP)/$@
......@@ -23,7 +24,7 @@ CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
all: javacc prepare compile link
link: compile
$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../src/*.o ../src/backends/*.o ../src/parsers/*.o
$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../../../src/*.o ../../../src/backends/*.o ../../../src/parsers/*.o
$(CSOURCES): prepare
$(call hush, "Compiling $@") $(CC) -c $(CFLAGS) $@
......
File moved
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os, os.path
import sys
Import('env libhammer_shared testruns targets')
from src.bindings.jni.ConfigureJNI import ConfigureJNI
javaenv = env.Clone()
if not ConfigureJNI(javaenv):
print("Java Native Interface is required... Exiting")
Exit(0)
javaenv.Append(CPPPATH=[".", "../.."],
LIBS=['hammer'],
LIBPATH=["../.."])
# compile java classes
jni_classes = javaenv.Java(".", "#src/bindings/jni/com")
print(jni_classes)
jni_headers = javaenv.JavaH(".", jni_classes)
print(jni_headers)
Default(jni_classes)
Default(jni_headers)
#print(javaenv.Dump())
shlib_env = env.Clone(CPPPATH=javaenv['JNI_CPPPATH'] + ['../..'],
LIBS=['hammer'],
LIBPATH=["../.."])
csources = ['com_upstandinghackers_hammer_Hammer.c',
'com_upstandinghackers_hammer_ParsedToken.c',
'com_upstandinghackers_hammer_Parser.c',
'com_upstandinghackers_hammer_ParseResult.c']
libjhammer_shared = shlib_env.SharedLibrary('libjhammer', csources)
Default(libjhammer_shared)