diff --git a/.travis.yml b/.travis.yml index 246a3d639be8c4ab437a8c37c3d409aaba801a6d..8c5af523ada62766b6fdc18fb47fd92f7a613964 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,8 +2,23 @@ language: c compiler: - gcc - clang +env: + - BINDINGS=none +matrix: + include: + - compiler: gcc + language: python + python: 2.7 + env: BINDINGS=python + - compiler: clang + language: python + python: 2.7 + env: BINDINGS=python +before_install: + - sudo apt-get update -qq + - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq swig python-dev; fi script: - - scons + - scons bindings=$BINDINGS test notifications: irc: channels: diff --git a/SConstruct b/SConstruct index 50c185be815a8777c72f9cd550d00a70e6d0744e..e30f6df284eb488fa55e7edb4ed8bcbe666705d5 100644 --- a/SConstruct +++ b/SConstruct @@ -7,9 +7,13 @@ import sys vars = Variables(None, ARGUMENTS) vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate)) vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept)) +vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['python'])) env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, tools=['default', 'scanreplace'], toolpath=['tools']) +if not 'bindings' in env: + env['bindings'] = [] + def calcInstallPath(*elements): path = os.path.abspath(os.path.join(*map(env.subst, elements))) if 'DESTDIR' in env: @@ -90,18 +94,23 @@ env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) #rootpath = env['ROOTPATH'] = os.path.abspath('.') #env.Append(CPPPATH=os.path.join('#', "hammer")) +testruns = [] + Export('env') +Export('testruns') if not GetOption("in_place"): env['BUILD_BASE'] = 'build/$VARIANT' - env.SConscript(["src/SConscript"], variant_dir='$BUILD_BASE/src') - env.SConscript(["examples/SConscript"], variant_dir='$BUILD_BASE/examples') + lib = env.SConscript(["src/SConscript"], variant_dir='$BUILD_BASE/src') + env.Alias("examples", env.SConscript(["examples/SConscript"], variant_dir='$BUILD_BASE/examples')) else: env['BUILD_BASE'] = '.' - env.SConscript(["src/SConscript"]) - env.SConscript(["examples/SConscript"]) + lib = env.SConscript(["src/SConscript"]) + env.Alias(env.SConscript(["examples/SConscript"])) + +#env.Command('test', '$BUILD_BASE/src/test_suite', 'env LD_LIBRARY_PATH=$BUILD_BASE/src $SOURCE') -env.Command('test', '$BUILD_BASE/src/test_suite', 'env LD_LIBRARY_PATH=$BUILD_BASE/src $SOURCE') +env.Alias("test", testruns) env.Alias("install", "$libpath") env.Alias("install", "$incpath") diff --git a/src/SConscript b/src/SConscript index 1d9ca7621c1983d32cae4d86490d5d95c6582834..038839320cc6f65b0ea5a6a0140442923bd0db00 100644 --- a/src/SConscript +++ b/src/SConscript @@ -1,7 +1,6 @@ # -*- python -*- -Import('env') - -bindings = [] +import os.path +Import('env testruns') dist_headers = [ "hammer.h", @@ -62,15 +61,16 @@ misc_hammer_parts = [ 'registry.c', 'system_allocator.c'] -tests = ['t_benchmark.c', - 't_bitreader.c', - 't_bitwriter.c', - 't_parser.c', - 't_grammar.c', - 't_misc.c'] +ctests = ['t_benchmark.c', + 't_bitreader.c', + 't_bitwriter.c', + 't_parser.c', + 't_grammar.c', + 't_misc.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) +Default(libhammer_shared, libhammer_static) env.Install("$libpath", [libhammer_static, libhammer_shared]) env.Install("$incpath", dist_headers) @@ -81,9 +81,12 @@ env.Install("$pkgconfigpath", "../../../libhammer.pc") testenv = env.Clone() testenv.ParseConfig('pkg-config --cflags --libs glib-2.0') testenv.Append(LIBS=['hammer'], LIBPATH=['.']) -testenv.Program('test_suite', tests + ['test_suite.c']) +ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c']) +ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path])) +AlwaysBuild(ctest) +testruns.append(ctest) Export("libhammer_static libhammer_shared") -for b in bindings: +for b in env['bindings']: env.SConscript(["bindings/%s/SConscript" % b]) diff --git a/src/allocator.h b/src/allocator.h index 2dfc14e689f825efabc0d7c46b515217ccd90abb..a02d97cd51873c7ae979c0dfa208bb3bd975f986 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -29,7 +29,11 @@ typedef struct HAllocator_ { typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... +#ifndef SWIG void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); +#else +void* h_arena_malloc(HArena *arena, size_t count); +#endif void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 8aa1f8ed670502f4b59e9be6498d22eaa74723ad..87f166de95e8fb2b58bd8455622ca8f7cf007115 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -83,7 +83,7 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { some->eval_set = NULL; rec_detect->head = some; } - assert(state->lr_stack->head != NULL); + //assert(state->lr_stack->head != NULL); HSlistNode *head = state->lr_stack->head; HLeftRec *lr; while (head && (lr = head->elem)->rule != p) { diff --git a/src/bindings/desugar-header.pl b/src/bindings/desugar-header.pl new file mode 100644 index 0000000000000000000000000000000000000000..5bdd11e665b86af623583a94002551795d7b9ade --- /dev/null +++ b/src/bindings/desugar-header.pl @@ -0,0 +1,22 @@ +#!/usr/bin/perl -w + + +my $arg = qr/[^,]*/; + +while(<>) { + chomp; + if (/^HAMMER_FN_DECL_NOARG\(([^,]*), ([^,]*)\);/) { + print "$1 $2(void);\n"; + print "$1 $2__m(HAllocator* mm__);\n"; + } elsif (/^HAMMER_FN_DECL\(([^,]*), ([^,]*), ([^)]*)\);/) { + print "$1 $2($3);\n"; + print "$1 $2__m(HAllocator* mm__, $3);\n"; + } elsif (/^HAMMER_FN_DECL_VARARGS_ATTR\((__attribute__\(\([^)]*\)\)), ([^,]*), ([^,]*), ([^)]*)\);/) { + print "$2 $3($4, ...);\n"; + print "$2 $3__m(HAllocator *mm__, $4, ...);\n"; + print "$2 $3__a(void* args);\n"; + print "$2 $3__ma(HAllocator* mm__, void* args);\n"; + } elsif (/^HAMMER_FN_DECL/) { + print "\e[1;31m!!!\e[0m " . $_ . "\n"; + } +} diff --git a/src/bindings/python/SConscript b/src/bindings/python/SConscript new file mode 100644 index 0000000000000000000000000000000000000000..0f1e9a92e5215fcb03547a3e075411e36bf2a949 --- /dev/null +++ b/src/bindings/python/SConscript @@ -0,0 +1,29 @@ +# -*- python -*- +import os, os.path +Import('env libhammer_shared testruns') + +pythonenv = env.Clone(IMPLICIT_COMMAND_DEPENDENCIES = 0) + +pythonenv.Append(CPPPATH = ['../../']) +pythonenv.Append(CCFLAGS = ['-fpic', '-DSWIG', '-Wno-all', '-Wno-extra', '-Wno-error']) +pythonenv.ParseConfig("pkg-config --cflags python") +pythonenv.Append(LIBS = ['hammer']) +pythonenv.Append(LIBPATH = ['../../']) +pythonenv.Append(SWIGFLAGS = ['-DHAMMER_INTERNAL__NO_STDARG_H', '-Isrc/', '-python']) + +pythonenv.Command("hammer.i", "../swig/hammer.i", Copy("$TARGET", "$SOURCE")) + +swig = ['hammer.i'] + +libhammer_python = pythonenv.SharedLibrary('hammer', swig, SHLIBPREFIX='_') +Default(libhammer_python) + +pytestenv = pythonenv.Clone() +pytestenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0])) +pytests = ['hammer_tests.py'] +pytestexec = pytestenv.Command(['hammer.pyc', 'hammer_tests.pyc'], pytests + libhammer_python, "nosetests -vv $SOURCE") +pytest = Alias("testpython", [pytestexec], pytestexec) +AlwaysBuild(pytest) +testruns.append(pytest) + + diff --git a/src/bindings/python/hammer.py b/src/bindings/python/hammer.py new file mode 100644 index 0000000000000000000000000000000000000000..36b78c8c8d3408b68e7df1e92a70be53e159f877 --- /dev/null +++ b/src/bindings/python/hammer.py @@ -0,0 +1,488 @@ +from cffi import FFI +import threading +import sys + +_ffi = FFI() + +# {{{ Types + +_ffi.cdef("typedef struct HAllocator_ HAllocator;") +_ffi.cdef("typedef struct HArena_ HArena;") +_ffi.cdef("typedef int bool;") +_ffi.cdef("typedef struct HParseState_ HParseState;") +_ffi.cdef(""" +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = 0, // PB_MIN is always the default. + PB_REGULAR, + PB_LLk, + PB_LALR, + PB_GLR +// TODO: support PB_MAX +} HParserBackend; +""") +_ffi.cdef(""" +typedef enum HTokenType_ { + // Before you change the explicit values of these, think of the poor bindings ;_; + TT_NONE = 1, + TT_BYTES = 2, + TT_SINT = 4, + TT_UINT = 8, + TT_SEQUENCE = 16, + TT_RESERVED_1, // reserved for backend-specific internal use + TT_ERR = 32, + TT_USER = 64, + TT_MAX +} HTokenType; +""") +_ffi.cdef(""" +typedef struct HCountedArray_ { + size_t capacity; + size_t used; + HArena * arena; + struct HParsedToken_ **elements; +} HCountedArray; +""") +_ffi.cdef(""" +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; +""") +_ffi.cdef(""" +typedef struct HParsedToken_ { + HTokenType token_type; + union { + HBytes bytes; + int64_t sint; + uint64_t uint; + double dbl; + float flt; + HCountedArray *seq; // a sequence of HParsedToken's + void *user; + }; + size_t index; + char bit_offset; +} HParsedToken; +""") +_ffi.cdef(""" +typedef struct HParseResult_ { + const HParsedToken *ast; + long long bit_length; + HArena * arena; +} HParseResult; +""") + +_ffi.cdef("""typedef HParsedToken* (*HAction)(const HParseResult *p);""") +_ffi.cdef("""typedef bool (*HPredicate)(HParseResult *p);""") +_ffi.cdef(""" +typedef struct HCFChoice_ HCFChoice; +typedef struct HRVMProg_ HRVMProg; +typedef struct HParserVtable_ HParserVtable; +""") + +_ffi.cdef("typedef struct HParser_ HParser;") +_ffi.cdef(""" +typedef struct HParserTestcase_ { + unsigned char* input; + size_t length; + char* output_unambiguous; +} HParserTestcase; + +typedef struct HCaseResult_ { + bool success; + union { + const char* actual_results; // on failure, filled in with the results of h_write_result_unamb + size_t parse_time; // on success, filled in with time for a single parse, in nsec + }; +} HCaseResult; + +typedef struct HBackendResults_ { + HParserBackend backend; + bool compile_success; + size_t n_testcases; + size_t failed_testcases; // actually a count... + HCaseResult *cases; +} HBackendResults; + +typedef struct HBenchmarkResults_ { + size_t len; + HBackendResults *results; +} HBenchmarkResults; +""") + +# }}} +# {{{ Arena functions +_ffi.cdef("void* h_arena_malloc(HArena *arena, size_t count);") +_ffi.cdef("void h_arena_free(HArena *arena, void* ptr);") +# }}} +# {{{ cdefs +## The following section was generated by +## $ perl ../desugar-header.pl <../../hammer.h |sed -e 's/.*/_ffi.cdef("&")/' +_ffi.cdef("HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);") +_ffi.cdef("HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length);") +_ffi.cdef("HParser* h_token(const uint8_t *str, const size_t len);") +_ffi.cdef("HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len);") +_ffi.cdef("HParser* h_ch(const uint8_t c);") +_ffi.cdef("HParser* h_ch__m(HAllocator* mm__, const uint8_t c);") +_ffi.cdef("HParser* h_ch_range(const uint8_t lower, const uint8_t upper);") +_ffi.cdef("HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper);") +_ffi.cdef("HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);") +_ffi.cdef("HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper);") +_ffi.cdef("HParser* h_bits(size_t len, bool sign);") +_ffi.cdef("HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign);") +_ffi.cdef("HParser* h_int64(void);") +_ffi.cdef("HParser* h_int64__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_int32(void);") +_ffi.cdef("HParser* h_int32__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_int16(void);") +_ffi.cdef("HParser* h_int16__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_int8(void);") +_ffi.cdef("HParser* h_int8__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_uint64(void);") +_ffi.cdef("HParser* h_uint64__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_uint32(void);") +_ffi.cdef("HParser* h_uint32__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_uint16(void);") +_ffi.cdef("HParser* h_uint16__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_uint8(void);") +_ffi.cdef("HParser* h_uint8__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_whitespace(const HParser* p);") +_ffi.cdef("HParser* h_whitespace__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_left(const HParser* p, const HParser* q);") +_ffi.cdef("HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q);") +_ffi.cdef("HParser* h_right(const HParser* p, const HParser* q);") +_ffi.cdef("HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q);") +_ffi.cdef("HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);") +_ffi.cdef("HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q);") +_ffi.cdef("HParser* h_action(const HParser* p, const HAction a);") +_ffi.cdef("HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a);") +_ffi.cdef("HParser* h_in(const uint8_t *charset, size_t length);") +_ffi.cdef("HParser* h_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);") +_ffi.cdef("HParser* h_not_in(const uint8_t *charset, size_t length);") +_ffi.cdef("HParser* h_not_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);") +_ffi.cdef("HParser* h_end_p(void);") +_ffi.cdef("HParser* h_end_p__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_nothing_p(void);") +_ffi.cdef("HParser* h_nothing_p__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_sequence(HParser* p, ...);") +_ffi.cdef("HParser* h_sequence__m(HAllocator *mm__, HParser* p, ...);") +_ffi.cdef("HParser* h_sequence__a(void* args);") +_ffi.cdef("HParser* h_sequence__ma(HAllocator* mm__, void* args);") +_ffi.cdef("HParser* h_choice(HParser* p, ...);") +_ffi.cdef("HParser* h_choice__m(HAllocator *mm__, HParser* p, ...);") +_ffi.cdef("HParser* h_choice__a(void* args);") +_ffi.cdef("HParser* h_choice__ma(HAllocator* mm__, void* args);") +_ffi.cdef("HParser* h_butnot(const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_difference(const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_xor(const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +_ffi.cdef("HParser* h_many(const HParser* p);") +_ffi.cdef("HParser* h_many__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_many1(const HParser* p);") +_ffi.cdef("HParser* h_many1__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_repeat_n(const HParser* p, const size_t n);") +_ffi.cdef("HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n);") +_ffi.cdef("HParser* h_optional(const HParser* p);") +_ffi.cdef("HParser* h_optional__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_ignore(const HParser* p);") +_ffi.cdef("HParser* h_ignore__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_sepBy(const HParser* p, const HParser* sep);") +_ffi.cdef("HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep);") +_ffi.cdef("HParser* h_sepBy1(const HParser* p, const HParser* sep);") +_ffi.cdef("HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep);") +_ffi.cdef("HParser* h_epsilon_p(void);") +_ffi.cdef("HParser* h_epsilon_p__m(HAllocator* mm__);") +_ffi.cdef("HParser* h_length_value(const HParser* length, const HParser* value);") +_ffi.cdef("HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value);") +_ffi.cdef("HParser* h_attr_bool(const HParser* p, HPredicate pred);") +_ffi.cdef("HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred);") +_ffi.cdef("HParser* h_and(const HParser* p);") +_ffi.cdef("HParser* h_and__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_not(const HParser* p);") +_ffi.cdef("HParser* h_not__m(HAllocator* mm__, const HParser* p);") +_ffi.cdef("HParser* h_indirect(void);") +_ffi.cdef("HParser* h_indirect__m(HAllocator* mm__);") +_ffi.cdef("void h_bind_indirect(HParser* indirect, const HParser* inner);") +_ffi.cdef("void h_bind_indirect__m(HAllocator* mm__, HParser* indirect, const HParser* inner);") +_ffi.cdef("void h_parse_result_free(HParseResult *result);") +_ffi.cdef("void h_parse_result_free__m(HAllocator* mm__, HParseResult *result);") +_ffi.cdef("void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);") +_ffi.cdef("int h_compile(HParser* parser, HParserBackend backend, const void* params);") +_ffi.cdef("int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params);") +_ffi.cdef("HBenchmarkResults * h_benchmark(HParser* parser, HParserTestcase* testcases);") +_ffi.cdef("HBenchmarkResults * h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases);") + +_lib = _ffi.verify("#include <hammer/hammer.h>", + libraries=['hammer']) + +_lib.TT_PYTHON = _lib.TT_USER # TODO: Use the token type allocator from #45 +# }}} +class _DynamicScopeHolder(threading.local): + """A dynamically-scoped holder of python objects, which may or may not + otherwise appear in the object graph. Intended for use with CFFI """ + def __init__(self): + self._ctxstack = [] + def __enter__(self): + self._ctxstack.append([]) + def __exit__(self, exc_type, exc_value, traceback): + self._ctxstack.pop() + return False + def stash(self, *objs): + if len(self._ctxstack) < 1: + raise Exception("Not in any dynamic scope") + for obj in objs: + self._ctxstack[-1].append(obj) +def _fromHParsedToken(cobj): + # TODO: Free the toplevel parser + tt = cobj.token_type + + if cobj.token_type == _lib.TT_BYTES: + return _ffi.buffer(cobj.bytes.token, cobj.bytes.len)[:] + elif cobj.token_type == _lib.TT_ERR: + # I have no idea what this is for + pass + elif cobj.token_type == _lib.TT_NONE: + return None + elif cobj.token_type == _lib.TT_SEQUENCE: + return [_fromHParsedToken(cobj.seq.elements[i]) + for i in range(cobj.seq.used)] + elif cobj.token_type == _lib.TT_SINT: + return cobj.sint + elif cobj.token_type == _lib.TT_UINT: + return cobj.uint + elif cobj.token_type == _lib.TT_PYTHON: + return _ffi.from_handle(cobj.user) + +_parser_result_holder = _DynamicScopeHolder() +def _toHParsedToken(arena, pyobj): + if pyobj is None: + return _ffi.NULL + cobj = _ffi.new_handle(pyobj) + _parser_result_holder.stash(cobj) + + hpt = _ffi.cast("HParsedToken*", _lib.h_arena_malloc(arena, _ffi.sizeof("HParsedToken"))) + hpt.token_type = _lib.TT_PYTHON + hpt.user = cobj + hpt.bit_offset = chr(127) + hpt.index = 0 + return hpt + +def _fromParseResult(cobj): + ret = _fromHParsedToken(cobj.ast) + _lib.h_parse_result_free(cobj) + return ret + +def _to_haction(fn): + """Turn a function that transforms a parsed value into an HAction""" + def action(parse_result): + res = _toHParsedToken(parse_result.arena, fn(_fromParseResult(parse_result))) + if res != _ffi.NULL and parse_result.ast != _ffi.NULL: + res.index = parse_result.ast.index + res.bit_offset = parse_result.ast.bit_offset + return res + return _ffi.callback("HParsedToken*(HParseResult*)", action) + +def _to_hpredicate(fn): + """Turn a function that transforms a parsed value into an HAction""" + def predicate(parse_result): + res = fn(_fromParseResult(parse_result)) + # TODO: Handle exceptions; parse should fail. + if type(res) != bool: + raise TypeError("Predicates should return a bool") + return res + return _ffi.callback("bool(HParseResult*)", predicate) + +class Parser(object): + # TODO: Map these to individually garbage-collected blocks of + # memory. Perhaps with an arena allocator with block size of 1? + # There has to be something more efficient than that, though. + + # TODO: How do we handle encodings? By default, we're using UTF-8 + def __init__(self, internal, deps): + """Create a new parser from an FFI object. Not for user code""" + self._parser = internal + self._deps = deps + + def parse(self, string): + with _parser_result_holder: + pres = _lib.h_parse(self._parser, string, len(string)) + if pres: + return _fromParseResult(pres) + else: + return None + + def __mul__(self, count): + return repeat_n(self, count) + + + +class IndirectParser(Parser): + def bind(self, inner): + _lib.h_bind_indirect(self._parser, inner._parser) + self._deps = (inner,) + +class BitsParser(Parser): + pass + +def token(token): + # TODO: Does not clone argument. + if isinstance(token, unicode): + token = token.encode("utf-8") + return Parser(_lib.h_token(token, len(token)), ()) + +def ch(char): + """Returns either a token or an int, depending on the type of the + argument""" + if isinstance(char, int): + return Parser(_lib.h_ch(char), ()) + else: + return token(char) + +def ch_range(chr1, chr2): + if not isinstance(chr1, str) or not isinstance(chr2, str): + raise TypeError("ch_range can't handle unicode") + def my_action(pr): + # print "In action: ", pr + return pr + return action(Parser(_lib.h_ch_range(ord(chr1), ord(chr2)), ()), my_action) + +def int_range(parser, i1, i2): + if type(parser) != BitsParser: + raise TypeError("int_range is only valid when used with a bits parser") + return Parser(_lib.h_int_range(parser._parser, i1, i2), (parser,)) + +def bits(length, signedp): + return BitsParser(_lib.h_bits(length, signedp), ()) + +def int64(): return bits(64, True) +def int32(): return bits(32, True) +def int16(): return bits(16, True) +def int8 (): return bits(8, True) +def uint64(): return bits(64, False) +def uint32(): return bits(32, False) +def uint16(): return bits(16, False) +def uint8 (): return bits(8, False) + +def whitespace(p): + return Parser(_lib.h_whitespace(p._parser), (p,)) +def left(p1, p2): + return Parser(_lib.h_left(p1._parser, p2._parser), (p1, p2)) +def right(p1, p2): + return Parser(_lib.h_right(p1._parser, p2._parser), (p1, p2)) +def middle(p1, p2, p3): + return Parser(_lib.h_middle(p1._parser, p2._parser, p3._parser), (p1, p2, p3)) +def action(parser, action): + caction = _to_haction(action) + return Parser(_lib.h_action(parser._parser, caction), (parser, caction)) + +def in_(charset): + if not isinstance(charset, str): + # TODO/Python3: change str to bytes + raise TypeError("in_ can't deal with unicode") + return Parser(_lib.h_in(charset, len(charset)), ()) +def not_in(charset): + if not isinstance(charset, str): + # TODO/Python3: change str to bytes + raise TypeError("in_ can't deal with unicode") + return Parser(_lib.h_not_in(charset, len(charset)), ()) +def end_p(): + return Parser(_lib.h_end_p(), ()) +def nothing_p(): + return Parser(_lib.h_nothing_p(), ()) +def sequence(*parsers): + plist = [p._parser for p in parsers] + plist.append(_ffi.NULL) + return Parser(_lib.h_sequence(*plist), (plist,)) +def choice(*parsers): + plist = [p._parser for p in parsers] + plist.append(_ffi.NULL) + return Parser(_lib.h_choice(*plist), (plist,)) +def butnot(p1, p2): + return Parser(_lib.h_butnot(p1._parser, p2._parser), (p1, p2)) +def difference(p1, p2): + return Parser(_lib.h_difference(p1._parser, p2._parser), (p1, p2)) +def xor(p1, p2): + return Parser(_lib.h_xor(p1._parser, p2._parser), (p1, p2)) +def many(p1): + return Parser(_lib.h_many(p1._parser), (p1,)) +def many1(p1): + return Parser(_lib.h_many1(p1._parser), (p1,)) +def repeat_n(p1, n): + return Parser(_lib.h_repeat_n(p1._parser, n), (p1,)) +def optional(p1): + return Parser(_lib.h_optional(p1._parser), (p1,)) +def ignore(p1): + return Parser(_lib.h_ignore(p1._parser), (p1,)) +def sepBy(p, sep): + return Parser(_lib.h_sepBy(p._parser, sep._parser), (p, sep)) +def sepBy1(p, sep): + return Parser(_lib.h_sepBy1(p._parser, sep._parser), (p, sep)) +def epsilon_p(): + return Parser(_lib.h_epsilon_p(), ()) +def length_value(p_len, p_value): + return Parser(_lib.h_length_value(p_len._parser, p_value._parser), (p_len, p_value)) +def attr_bool(parser, predicate): + cpredicate = _to_hpredicate(predicate) + return Parser(_lib.h_attr_bool(parser._parser, cpredicate), (parser, cpredicate)) +def and_(parser): + return Parser(_lib.h_and(parser._parser), (parser,)) +def not_(parser): + return Parser(_lib.h_not(parser._parser), (parser,)) +def indirect(): + return IndirectParser(_lib.h_indirect(), ()) +def bind_indirect(indirect, inner): + indirect.bind(inner) + +def parse(parser): + return parser.parse() + +# Unfortunately, "in", "and", and "not" are keywords. This makes them +# show up in the module namespace for the use of automated tools. Do +# not attempt to use them by hand; only use the mangled forms (with +# the '_') +sys.modules[__name__].__dict__["in"] = in_ +sys.modules[__name__].__dict__["and"] = and_ +sys.modules[__name__].__dict__["not"] = not_ + +def run_test(): + p_test = sepBy1(choice(ch('1'), + ch('2'), + ch('3')), + ch(',')) + return p_test.parse("1,2,3") + +# {{{ Automatic parser construction... python specific + +# TODO: Implement Parsable metaclass, which requires the existence of +# a "parse" method. + +# This is expected to be extended by user code. As a general rule, +# only provide auto-parsers for your own types. +AUTO_PARSERS = { + str: token, + unicode: token, +} + +def _auto_seq(lst): + return sequence(*(auto_1(p, default_method=_auto_choice) + for p in lst)) + +def _auto_choice(lst): + return choice(*(auto_1(p, default_method=_auto_seq) + for p in lst)) + +def auto_1(arg, default_method=_auto_choice): + if isinstance(arg, Parser): + return arg + elif type(arg) in AUTO_PARSERS: + return AUTO_PARSERS[type(arg)](arg) + else: + return default_method(arg) + +def auto(*args): + return auto_1(args, default_method=_auto_choice) + +# }}} diff --git a/src/bindings/python/hammer_tests.py b/src/bindings/python/hammer_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..45a63f49ffe42384a1f3025b0c02eb85264462e5 --- /dev/null +++ b/src/bindings/python/hammer_tests.py @@ -0,0 +1,509 @@ +import unittest +import hammer as h + +class TestTokenParser(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.token("95\xa2") + def test_success(self): + self.assertEqual(self.parser.parse("95\xa2"), "95\xa2") + def test_partial_fails(self): + self.assertEqual(self.parser.parse("95"), None) + +class TestChParser(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser_int = h.ch(0xa2) + cls.parser_chr = h.ch("\xa2") + def test_success(self): + self.assertEqual(self.parser_int.parse("\xa2"), 0xa2) + self.assertEqual(self.parser_chr.parse("\xa2"), "\xa2") + def test_failure(self): + self.assertEqual(self.parser_int.parse("\xa3"), None) + self.assertEqual(self.parser_chr.parse("\xa3"), None) + +class TestChRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.ch_range("a", "c") + def test_success(self): + self.assertEqual(self.parser.parse("b"), "b") + def test_failure(self): + self.assertEqual(self.parser.parse("d"), None) + +class TestInt64(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int64() + def test_success(self): + self.assertEqual(self.parser.parse("\xff\xff\xff\xfe\x00\x00\x00\x00"), -0x200000000) + def test_failure(self): + self.assertEqual(self.parser.parse("\xff\xff\xff\xfe\x00\x00\x00"), None) + +class TestInt32(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int32() + def test_success(self): + self.assertEqual(self.parser.parse("\xff\xfe\x00\x00"), -0x20000) + self.assertEqual(self.parser.parse("\x00\x02\x00\x00"), 0x20000) + def test_failure(self): + self.assertEqual(self.parser.parse("\xff\xfe\x00"), None) + self.assertEqual(self.parser.parse("\x00\x02\x00"), None) + +class TestInt16(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int16() + def test_success(self): + self.assertEqual(self.parser.parse("\xfe\x00"), -0x200) + self.assertEqual(self.parser.parse("\x02\x00"), 0x200) + def test_failure(self): + self.assertEqual(self.parser.parse("\xfe"), None) + self.assertEqual(self.parser.parse("\x02"), None) + +class TestInt8(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int8() + def test_success(self): + self.assertEqual(self.parser.parse("\x88"), -0x78) + def test_failure(self): + self.assertEqual(self.parser.parse(""), None) + +class TestUint64(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint64() + def test_success(self): + self.assertEqual(self.parser.parse("\x00\x00\x00\x02\x00\x00\x00\x00"), 0x200000000) + def test_failure(self): + self.assertEqual(self.parser.parse("\x00\x00\x00\x02\x00\x00\x00"), None) + +class TestUint32(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint32() + def test_success(self): + self.assertEqual(self.parser.parse("\x00\x02\x00\x00"), 0x20000) + def test_failure(self): + self.assertEqual(self.parser.parse("\x00\x02\x00"), None) + +class TestUint16(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint16() + def test_success(self): + self.assertEqual(self.parser.parse("\x02\x00"), 0x200) + def test_failure(self): + self.assertEqual(self.parser.parse("\x02"), None) + +class TestUint8(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.uint8() + def test_success(self): + self.assertEqual(self.parser.parse("\x78"), 0x78) + def test_failure(self): + self.assertEqual(self.parser.parse(""), None) + +class TestIntRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.int_range(h.uint8(), 3, 10) + def test_success(self): + self.assertEqual(self.parser.parse("\x05"), 5) + def test_failure(self): + self.assertEqual(self.parser.parse("\x0b"), None) + +class TestWhitespace(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.whitespace(h.ch("a")) + def test_success(self): + self.assertEqual(self.parser.parse("a"), "a") + self.assertEqual(self.parser.parse(" a"), "a") + self.assertEqual(self.parser.parse(" a"), "a") + self.assertEqual(self.parser.parse("\ta"), "a") + def test_failure(self): + self.assertEqual(self.parser.parse("_a"), None) + +class TestWhitespaceEnd(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.whitespace(h.end_p()) + def test_success(self): + self.assertEqual(self.parser.parse(""), None) # empty string + self.assertEqual(self.parser.parse(" "), None) # empty string + def test_failure(self): + self.assertEqual(self.parser.parse(" x"), None) + +class TestLeft(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.left(h.ch("a"), h.ch(" ")) + def test_success(self): + self.assertEqual(self.parser.parse("a "), "a") + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + self.assertEqual(self.parser.parse(" "), None) + self.assertEqual(self.parser.parse("ab"), None) + +class TestRight(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.right(h.ch(" "), h.ch("a")) + def test_success(self): + self.assertEqual(self.parser.parse(" a"), "a") + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + self.assertEqual(self.parser.parse(" "), None) + self.assertEqual(self.parser.parse("ba"), None) + +class TestMiddle(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.middle(h.ch(" "), h.ch("a"), h.ch(" ")) + def test_success(self): + self.assertEqual(self.parser.parse(" a "), "a") + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + self.assertEqual(self.parser.parse(" "), None) + self.assertEqual(self.parser.parse(" a"), None) + self.assertEqual(self.parser.parse("a "), None) + self.assertEqual(self.parser.parse(" b "), None) + self.assertEqual(self.parser.parse("ba "), None) + self.assertEqual(self.parser.parse(" ab"), None) + +class TestAction(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.action(h.sequence(h.choice(h.ch("a"), h.ch("A")), + h.choice(h.ch("b"), h.ch("B"))), + lambda x: [y.upper() for y in x]) + def test_success(self): + self.assertEqual(self.parser.parse("ab"), ["A", "B"]) + self.assertEqual(self.parser.parse("AB"), ["A", "B"]) + def test_failure(self): + self.assertEqual(self.parser.parse("XX"), None) + +class TestIn(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.in_("abc") + def test_success(self): + self.assertEqual(self.parser.parse("b"), "b") + def test_failure(self): + self.assertEqual(self.parser.parse("d"), None) + +class TestNotIn(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.not_in("abc") + def test_success(self): + self.assertEqual(self.parser.parse("d"), "d") + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + +class TestEndP(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.end_p()) + def test_success(self): + self.assertEqual(self.parser.parse("a"), ("a",)) + def test_failure(self): + self.assertEqual(self.parser.parse("aa"), None) + +class TestNothingP(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.nothing_p() + def test_success(self): + pass + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + +class TestSequence(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.ch("b")) + def test_success(self): + self.assertEqual(self.parser.parse("ab"), ('a','b')) + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + self.assertEqual(self.parser.parse("b"), None) + +class TestSequenceWhitespace(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.whitespace(h.ch("b"))) + def test_success(self): + self.assertEqual(self.parser.parse("ab"), ('a','b')) + self.assertEqual(self.parser.parse("a b"), ('a','b')) + self.assertEqual(self.parser.parse("a b"), ('a','b')) + def test_failure(self): + self.assertEqual(self.parser.parse("a c"), None) + +class TestChoice(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.choice(h.ch("a"), h.ch("b")) + def test_success(self): + self.assertEqual(self.parser.parse("a"), "a") + self.assertEqual(self.parser.parse("b"), "b") + def test_failure(self): + self.assertEqual(self.parser.parse("c"), None) + +class TestButNot(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.butnot(h.ch("a"), h.token("ab")) + def test_success(self): + self.assertEqual(self.parser.parse("a"), "a") + self.assertEqual(self.parser.parse("aa"), "a") + def test_failure(self): + self.assertEqual(self.parser.parse("ab"), None) + +class TestButNotRange(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.butnot(h.ch_range("0", "9"), h.ch("6")) + def test_success(self): + self.assertEqual(self.parser.parse("4"), "4") + def test_failure(self): + self.assertEqual(self.parser.parse("6"), None) + +class TestDifference(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.difference(h.token("ab"), h.ch("a")) + def test_success(self): + self.assertEqual(self.parser.parse("ab"), "ab") + def test_failure(self): + self.assertEqual(self.parser.parse("a"), None) + +class TestXor(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.xor(h.ch_range("0", "6"), h.ch_range("5", "9")) + def test_success(self): + self.assertEqual(self.parser.parse("0"), "0") + self.assertEqual(self.parser.parse("9"), "9") + def test_failure(self): + self.assertEqual(self.parser.parse("5"), None) + self.assertEqual(self.parser.parse("a"), None) + +class TestMany(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.many(h.choice(h.ch("a"), h.ch("b"))) + def test_success(self): + self.assertEqual(self.parser.parse(""), ()) + self.assertEqual(self.parser.parse("a"), ('a',)) + self.assertEqual(self.parser.parse("b"), ('b',)) + self.assertEqual(self.parser.parse("aabbaba"), ('a','a','b','b','a','b','a')) + def test_failure(self): + pass + +class TestMany1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.many1(h.choice(h.ch("a"), h.ch("b"))) + def test_success(self): + self.assertEqual(self.parser.parse("a"), ("a",)) + self.assertEqual(self.parser.parse("b"), ("b",)) + self.assertEqual(self.parser.parse("aabbaba"), ("a", "a", "b", "b", "a", "b", "a")) + def test_failure(self): + self.assertEqual(self.parser.parse(""), None) + self.assertEqual(self.parser.parse("daabbabadef"), None) + +class TestRepeatN(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.repeat_n(h.choice(h.ch("a"), h.ch("b")), 2) + def test_success(self): + self.assertEqual(self.parser.parse("abdef"), ('a', 'b')) + def test_failure(self): + self.assertEqual(self.parser.parse("adef"), None) + self.assertEqual(self.parser.parse("dabdef"), None) + +class TestOptional(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.optional(h.choice(h.ch("b"), h.ch("c"))), h.ch("d")) + def test_success(self): + self.assertEqual(self.parser.parse("abd"), ('a','b','d')) + self.assertEqual(self.parser.parse("acd"), ('a','c','d')) + self.assertEqual(self.parser.parse("ad"), ('a',h.Placeholder(), 'd')) + def test_failure(self): + self.assertEqual(self.parser.parse("aed"), None) + self.assertEqual(self.parser.parse("ab"), None) + self.assertEqual(self.parser.parse("ac"), None) + +class TestIgnore(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.ignore(h.ch("b")), h.ch("c")) + def test_success(self): + self.assertEqual(self.parser.parse("abc"), ("a","c")) + def test_failure(self): + self.assertEqual(self.parser.parse("ac"), None) + +class TestSepBy(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sepBy(h.choice(h.ch("1"), h.ch("2"), h.ch("3")), h.ch(",")) + def test_success(self): + self.assertEqual(self.parser.parse("1,2,3"), ('1','2','3')) + self.assertEqual(self.parser.parse("1,3,2"), ('1','3','2')) + self.assertEqual(self.parser.parse("1,3"), ('1','3')) + self.assertEqual(self.parser.parse("3"), ('3',)) + self.assertEqual(self.parser.parse(""), ()) + def test_failure(self): + pass + +class TestSepBy1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sepBy1(h.choice(h.ch("1"), h.ch("2"), h.ch("3")), h.ch(",")) + def test_success(self): + self.assertEqual(self.parser.parse("1,2,3"), ('1','2','3')) + self.assertEqual(self.parser.parse("1,3,2"), ('1','3','2')) + self.assertEqual(self.parser.parse("1,3"), ('1','3')) + self.assertEqual(self.parser.parse("3"), ('3',)) + def test_failure(self): + self.assertEqual(self.parser.parse(""), None) + +class TestEpsilonP1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.epsilon_p(), h.ch("b")) + def test_success(self): + self.assertEqual(self.parser.parse("ab"), ("a", "b")) + def test_failure(self): + pass + +class TestEpsilonP2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.epsilon_p(), h.ch("a")) + def test_success(self): + self.assertEqual(self.parser.parse("a"), ("a",)) + def test_failure(self): + pass + +class TestEpsilonP3(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.epsilon_p()) + def test_success(self): + self.assertEqual(self.parser.parse("a"), ("a",)) + def test_failure(self): + pass + +class TestAttrBool(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.attr_bool(h.many1(h.choice(h.ch("a"), h.ch("b"))), + lambda x: x[0] == x[1]) + def test_success(self): + self.assertEqual(self.parser.parse("aa"), ("a", "a")) + self.assertEqual(self.parser.parse("bb"), ("b", "b")) + def test_failure(self): + self.assertEqual(self.parser.parse("ab"), None) + +class TestAnd1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.and_(h.ch("0")), h.ch("0")) + def test_success(self): + self.assertEqual(self.parser.parse("0"), ("0",)) + def test_failure(self): + pass + +class TestAnd2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.and_(h.ch("0")), h.ch("1")) + def test_success(self): + pass + def test_failure(self): + self.assertEqual(self.parser.parse("0"), None) + +class TestAnd3(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("1"), h.and_(h.ch("2"))) + def test_success(self): + self.assertEqual(self.parser.parse("12"), ('1',)) + def test_failure(self): + pass + +class TestNot1(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), + h.choice(h.ch("+"), h.token("++")), + h.ch("b")) + def test_success(self): + self.assertEqual(self.parser.parse("a+b"), ("a", "+", "b")) + def test_failure(self): + self.assertEqual(self.parser.parse("a++b"), None) + +class TestNot2(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.parser = h.sequence(h.ch("a"), h.choice(h.sequence(h.ch("+"), h.not_(h.ch("+"))), + h.token("++")), + h.ch("b")) + def test_success(self): + self.assertEqual(self.parser.parse("a+b"), ('a', ('+',), 'b')) + self.assertEqual(self.parser.parse("a++b"), ('a', "++", 'b')) + def test_failure(self): + pass + +# ### this is commented out for packrat in C ... +# #class TestLeftrec(unittest.TestCase): +# # @classmethod +# # def setUpClass(cls): +# # cls.parser = h.indirect() +# # a = h.ch("a") +# # h.bind_indirect(cls.parser, h.choice(h.sequence(cls.parser, a), a)) +# # def test_success(self): +# # self.assertEqual(self.parser.parse("a"), "a") +# # self.assertEqual(self.parser.parse("aa"), ["a", "a"]) +# # self.assertEqual(self.parser.parse("aaa"), ["a", "a", "a"]) +# # def test_failure(self): +# # pass + + +class TestRightrec(unittest.TestCase): + @classmethod + def setUpClass(cls): + #raise unittest.SkipTest("Bind doesn't work right now") + cls.parser = h.indirect() + a = h.ch("a") + cls.parser.bind(h.choice(h.sequence(a, cls.parser), + h.epsilon_p())) + def test_success(self): + self.assertEqual(self.parser.parse("a"), ('a',)) + self.assertEqual(self.parser.parse("aa"), ('a', ('a',))) + self.assertEqual(self.parser.parse("aaa"), ('a', ('a', ('a',)))) + def test_failure(self): + pass + +# ### this is just for GLR +# #class TestAmbiguous(unittest.TestCase): +# # @classmethod +# # def setUpClass(cls): +# # cls.parser = h.indirect() +# # d = h.ch("d") +# # p = h.ch("+") +# # h.bind_indirect(cls.parser, h.choice(h.sequence(cls.parser, p, cls.parser), d)) +# # # this is supposed to be flattened +# # def test_success(self): +# # self.assertEqual(self.parser.parse("d"), ["d"]) +# # self.assertEqual(self.parser.parse("d+d"), ["d", "+", "d"]) +# # self.assertEqual(self.parser.parse("d+d+d"), ["d", "+", "d", "+", "d"]) +# # def test_failure(self): +# # self.assertEqual(self.parser.parse("d+"), None) diff --git a/src/bindings/swig/hammer.i b/src/bindings/swig/hammer.i new file mode 100644 index 0000000000000000000000000000000000000000..5ac8c3767b2c639e2a5444de532ca1e59c8a450a --- /dev/null +++ b/src/bindings/swig/hammer.i @@ -0,0 +1,343 @@ +%module hammer + +%nodefaultctor; + +%include "stdint.i" + +#if defined(SWIGPYTHON) +%ignore HCountedArray_; +%apply (char *STRING, size_t LENGTH) {(uint8_t* str, size_t len)} +%apply (uint8_t* str, size_t len) {(const uint8_t* input, size_t length)} +%apply (uint8_t* str, size_t len) {(const uint8_t* str, const size_t len)} +%apply (uint8_t* str, size_t len) {(const uint8_t* charset, size_t length)} + + +%rename("_%s") ""; +// %rename(_h_ch) h_ch; + +%inline { + static PyObject *_helper_Placeholder = NULL, *_helper_ParseError = NULL; + + static void register_helpers(PyObject* parse_error, PyObject *placeholder) { + _helper_ParseError = parse_error; + _helper_Placeholder = placeholder; + } + } + +%pythoncode %{ + class Placeholder(object): + """The python equivalent of TT_NONE""" + def __str__(self): + return "Placeholder" + def __repr__(self): + return "Placeholder" + def __eq__(self, other): + return type(self) == type(other) + class ParseError(Exception): + """The parse failed; the message may have more information""" + pass + + _hammer._register_helpers(ParseError, + Placeholder) + %} + +%typemap(in) void*[] { + if (PyList_Check($input)) { + Py_INCREF($input); + int size = PyList_Size($input); + int i = 0; + int res = 0; + $1 = (void**)malloc((size+1)*sizeof(HParser*)); + for (i=0; i<size; i++) { + PyObject *o = PyList_GetItem($input, i); + res = SWIG_ConvertPtr(o, &($1[i]), SWIGTYPE_p_HParser_, 0 | 0); + if (!SWIG_IsOK(res)) { + SWIG_exception_fail(SWIG_ArgError(res), "that wasn't an HParser" ); + } + } + $1[size] = NULL; + } else { + PyErr_SetString(PyExc_TypeError, "__a functions take lists of parsers as their argument"); + return NULL; + } + } +%typemap(in) uint8_t { + if (PyInt_Check($input)) { + $1 = PyInt_AsLong($input); + } + else if (!PyString_Check($input)) { + PyErr_SetString(PyExc_ValueError, "Expecting a string"); + return NULL; + } else { + $1 = *(uint8_t*)PyString_AsString($input); + } + } +%typemap(out) HBytes* { + $result = PyString_FromStringAndSize((char*)$1->token, $1->len); + } +%typemap(out) struct HCountedArray_* { + int i; + $result = PyList_New($1->used); + for (i=0; i<$1->used; i++) { + HParsedToken *t = $1->elements[i]; + PyObject *o = SWIG_NewPointerObj(SWIG_as_voidptr(t), SWIGTYPE_p_HParsedToken_, 0 | 0); + PyList_SetItem($result, i, o); + } + } +%typemap(out) struct HParseResult_* { + if ($1 == NULL) { + // TODO: raise parse failure + Py_INCREF(Py_None); + $result = Py_None; + } else { + $result = hpt_to_python($1->ast); + } + } +%typemap(newfree) struct HParseResult_* { + h_parse_result_free($input); + } +%inline %{ + static int h_tt_python; + %} +%init %{ + h_tt_python = h_allocate_token_type("com.upstandinghackers.hammer.python"); + %} + + + + +%typemap(in) (HPredicate pred, void* user_data) { + Py_INCREF($input); + $2 = $input; + $1 = call_predicate; + } + +%typemap(in) (const HAction a, void* user_data) { + Py_INCREF($input); + $2 = $input; + $1 = call_action; + } + +%inline %{ + + struct HParsedToken_; + struct HParseResult_; + static PyObject* hpt_to_python(const struct HParsedToken_ *token); + + static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data); + static int call_predicate(const struct HParseResult_ *p, void* user_data); + %} +#else + #warning no uint8_t* typemaps defined +#endif + + // All the include paths are relative to the build, i.e., ../../. If you need to build these manually (i.e., not with scons), keep that in mind. +%{ +#include "allocator.h" +#include "hammer.h" +#include "internal.h" +#include "glue.h" +%} +%include "allocator.h" +%include "hammer.h" + +%extend HArena_ { + ~HArena_() { + h_delete_arena($self); + } + }; +%extend HParseResult_ { + ~HParseResult_() { + h_parse_result_free($self); + } +}; + +%newobject h_parse; +%delobject h_parse_result_free; +%newobject h_new_arena; +%delobject h_delete_arena; + +#ifdef SWIGPYTHON +%inline { + static PyObject* hpt_to_python(const HParsedToken *token) { + // Caller holds a reference to returned object + PyObject *ret; + if (token == NULL) { + Py_RETURN_NONE; + } + switch (token->token_type) { + case TT_NONE: + return PyObject_CallFunctionObjArgs(_helper_Placeholder, NULL); + break; + case TT_BYTES: + return PyString_FromStringAndSize((char*)token->token_data.bytes.token, token->token_data.bytes.len); + case TT_SINT: + // TODO: return PyINT if appropriate + return PyLong_FromLong(token->token_data.sint); + case TT_UINT: + // TODO: return PyINT if appropriate + return PyLong_FromUnsignedLong(token->token_data.uint); + case TT_SEQUENCE: + ret = PyTuple_New(token->token_data.seq->used); + for (int i = 0; i < token->token_data.seq->used; i++) { + PyTuple_SET_ITEM(ret, i, hpt_to_python(token->token_data.seq->elements[i])); + } + return ret; + default: + if (token->token_type == h_tt_python) { + ret = (PyObject*)token->token_data.user; + Py_INCREF(ret); + return ret; + } else { + return SWIG_NewPointerObj((void*)token, SWIGTYPE_p_HParsedToken_, 0 | 0); + // TODO: support registry + } + + } + } + static struct HParsedToken_* call_action(const struct HParseResult_ *p, void* user_data) { + PyObject *callable = user_data; + PyObject *ret = PyObject_CallFunctionObjArgs(callable, + hpt_to_python(p->ast), + NULL); + if (ret == NULL) { + PyErr_Print(); + assert(ret != NULL); + } + // TODO: add reference to ret to parse-local data + // For now, just hold onto reference + HParsedToken *tok = h_make(p->arena, h_tt_python, ret); + return tok; + } + + static int call_predicate(const struct HParseResult_ *p, void* user_data) { + PyObject *callable = user_data; + PyObject *ret = PyObject_CallFunctionObjArgs(callable, + hpt_to_python(p->ast), + NULL); + int rret = 0; + if (ret == NULL) { + // TODO: throw exception + PyErr_Print(); + assert(ret != NULL); + } + // TODO: add reference to ret to parse-local data + rret = PyObject_IsTrue(ret); + Py_DECREF(ret); + return rret; + } + + } + +%rename("%s") ""; + +%extend HParser_ { + HParseResult* parse(const uint8_t* input, size_t length) { + return h_parse($self, input, length); + } + bool compile(HParserBackend backend) { + return h_compile($self, backend, NULL) == 0; + } + PyObject* __dir__() { + PyObject* ret = PyList_New(2); + PyList_SET_ITEM(ret, 0, PyString_FromString("parse")); + PyList_SET_ITEM(ret, 1, PyString_FromString("compile")); + return ret; + } +} + +%pythoncode %{ + +def action(p, act): + return _h_action(p, act) +def attr_bool(p, pred): + return _h_attr_bool(p, pred) + +def ch(ch): + if isinstance(ch, str) or isinstance(ch, unicode): + return token(ch) + else: + return _h_ch(ch) + +def ch_range(c1, c2): + dostr = isinstance(c1, str) + dostr2 = isinstance(c2, str) + if isinstance(c1, unicode) or isinstance(c2, unicode): + raise TypeError("ch_range only works on bytes") + if dostr != dostr2: + raise TypeError("Both arguments to ch_range must be the same type") + if dostr: + return action(_h_ch_range(c1, c2), chr) + else: + return _h_ch_range(c1, c2) +def epsilon_p(): return _h_epsilon_p() +def end_p(): + return _h_end_p() +def in_(charset): + return action(_h_in(charset), chr) +def not_in(charset): + return action(_h_not_in(charset), chr) +def not_(p): return _h_not(p) +def int_range(p, i1, i2): + return _h_int_range(p, i1, i2) +def token(string): + return _h_token(string) +def whitespace(p): + return _h_whitespace(p) +def xor(p1, p2): + return _h_xor(p1, p2) +def butnot(p1, p2): + return _h_butnot(p1, p2) +def and_(p1): + return _h_and(p1) +def difference(p1, p2): + return _h_difference(p1, p2) + +def sepBy(p, sep): return _h_sepBy(p, sep) +def sepBy1(p, sep): return _h_sepBy1(p, sep) +def many(p): return _h_many(p) +def many1(p): return _h_many1(p) +def repeat_n(p, n): return _h_repeat_n(p, n) +def choice(*args): return _h_choice__a(list(args)) +def sequence(*args): return _h_sequence__a(list(args)) + +def optional(p): return _h_optional(p) +def nothing_p(): return _h_nothing_p() +def ignore(p): return _h_ignore(p) + +def left(p1, p2): return _h_left(p1, p2) +def middle(p1, p2, p3): return _h_middle(p1, p2, p3) +def right(p1, p2): return _h_right(p1, p2) + + +class HIndirectParser(_HParser_): + def __init__(self): + # Shoves the guts of an _HParser_ into a HIndirectParser. + tret = _h_indirect() + self.__dict__.clear() + self.__dict__.update(tret.__dict__) + + def __dir__(self): + return super(HIndirectParser, self).__dir__() + ['bind'] + def bind(self, parser): + _h_bind_indirect(self, parser) + +def indirect(): + return HIndirectParser() + +def bind_indirect(indirect, new_parser): + indirect.bind(new_parser) + +def uint8(): return _h_uint8() +def uint16(): return _h_uint16() +def uint32(): return _h_uint32() +def uint64(): return _h_uint64() +def int8(): return _h_int8() +def int16(): return _h_int16() +def int32(): return _h_int32() +def int64(): return _h_int64() + + +%} + +#endif diff --git a/src/hammer.c b/src/hammer.c index 7fc80dba0c86ec76a2376d0d69914f235bf08afc..2456bdcedb7c9c7a0b4e374e8b8146bf19603179 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -62,6 +62,10 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* return backends[parser->backend]->parse(mm__, parser, &input_stream); } +void h_parse_result_free__m(HAllocator *alloc, HParseResult *result) { + h_parse_result_free(result); +} + void h_parse_result_free(HParseResult *result) { if(result == NULL) return; h_delete_arena(result->arena); diff --git a/src/hammer.h b/src/hammer.h index 541e38df13f4c0f044faebd058de6c70590d9a61..6756c0cc584951c905b9abdc918d5587b3ced28e 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -29,7 +29,15 @@ #define BIT_LITTLE_ENDIAN 0x0 #define BYTE_LITTLE_ENDIAN 0x0 +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __cplusplus +#ifndef HAMMER_INTERNAL__NO_STDARG_H typedef int bool; +#endif // HAMMER_INTERNAL__NO_STDARG_H +#endif typedef struct HParseState_ HParseState; @@ -68,8 +76,21 @@ typedef struct HBytes_ { size_t len; } HBytes; +#ifdef SWIG +typedef union { + HBytes bytes; + int64_t sint; + uint64_t uint; + double dbl; + float flt; + HCountedArray *seq; + void *user; +} HTokenData; +#endif + typedef struct HParsedToken_ { HTokenType token_type; +#ifndef SWIG union { HBytes bytes; int64_t sint; @@ -79,6 +100,9 @@ typedef struct HParsedToken_ { HCountedArray *seq; // a sequence of HParsedToken's void *user; }; +#else + HTokenData token_data; +#endif size_t index; char bit_offset; } HParsedToken; @@ -126,6 +150,7 @@ typedef struct HCFChoice_ HCFChoice; typedef struct HRVMProg_ HRVMProg; typedef struct HParserVtable_ HParserVtable; +// TODO: Make this internal typedef struct HParser_ { const HParserVtable *vtable; HParserBackend backend; @@ -141,12 +166,23 @@ typedef struct HParserTestcase_ { char* output_unambiguous; } HParserTestcase; +#ifdef SWIG +typedef union { + const char* actual_results; + size_t parse_time; +} HResultTiming; +#endif + typedef struct HCaseResult_ { bool success; +#ifndef SWIG union { const char* actual_results; // on failure, filled in with the results of h_write_result_unamb size_t parse_time; // on success, filled in with time for a single parse, in nsec }; +#else + HResultTiming timestamp; +#endif } HCaseResult; typedef struct HBackendResults_ { @@ -176,7 +212,7 @@ typedef struct HBenchmarkResults_ { rtype_t name(__VA_ARGS__) attr; \ rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr -#ifndef HAMMER_INTERNAL__NO_STDARG_H +#ifndef SWIG #define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \ rtype_t name(__VA_ARGS__, ...); \ rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \ @@ -194,17 +230,17 @@ typedef struct HBenchmarkResults_ { rtype_t name##__a(void *args[]); \ rtype_t name##__ma(HAllocator *mm__, void *args[]) #else -#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \ - rtype_t name(__VA_ARGS__, ...); \ - rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \ - rtype_t name##__a(void *args[]); \ +#define HAMMER_FN_DECL_VARARGS(rtype_t, name, params...) \ + rtype_t name(params, ...); \ + rtype_t name##__m(HAllocator* mm__, params, ...); \ + rtype_t name##__a(void *args[]); \ rtype_t name##__ma(HAllocator *mm__, void *args[]) // Note: this drops the attributes on the floor for the __v versions -#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \ - rtype_t name(__VA_ARGS__, ...) attr; \ - rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \ - rtype_t name##__a(void *args[]); \ +#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, params...) \ + rtype_t name(params, ...); \ + rtype_t name##__m(HAllocator* mm__, params, ...); \ + rtype_t name##__a(void *args[]); \ rtype_t name##__ma(HAllocator *mm__, void *args[]) #endif // HAMMER_INTERNAL__NO_STDARG_H // }}} @@ -586,7 +622,7 @@ char* h_write_result_unamb(const HParsedToken* tok); * Format token to the given output stream. Indent starting at * [indent] spaces, with [delta] spaces between levels. */ -HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta); +void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta); /** * Build parse tables for the given parser backend. See the @@ -630,7 +666,7 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* userdata); // {{{ Benchmark functions HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases); void h_benchmark_report(FILE* stream, HBenchmarkResults* results); -void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); +//void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); // }}} // {{{ Token type registry @@ -644,4 +680,8 @@ int h_get_token_type_number(const char* name); const char* h_get_token_type_name(int token_type); // }}} +#ifdef __cplusplus +} +#endif + #endif // #ifndef HAMMER_HAMMER__H diff --git a/src/parsers/choice.c b/src/parsers/choice.c index bb55fa8dd96c32323b5fe201625c2fc4ca70c9b5..bfc3f904f19a6d88ddc0bd77702b2c45f89c2b0f 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -143,7 +143,7 @@ HParser* h_choice__ma(HAllocator* mm__, void *args[]) { s->len = len; HParser *ret = h_new(HParser, 1); - ret->vtable = &choice_vt; + ret->vtable = &choice_vt; ret->env = (void*)s; ret->backend = PB_MIN; return ret; diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 2217a202968f2a11306c60ccea34c9e3126186c1..c91eaab5905229b178fa0c888dce1fa056babd88 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -21,6 +21,10 @@ static const HParserVtable indirect_vt = { .compile_to_rvm = h_not_regular, }; +void h_bind_indirect__m(HAllocator *mm__, HParser* indirect, const HParser* inner) { + h_bind_indirect(indirect, inner); +} + void h_bind_indirect(HParser* indirect, const HParser* inner) { assert_message(indirect->vtable == &indirect_vt, "You can only bind an indirect parser"); indirect->env = (void*)inner; diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 42c0913d91e16c973299506976ddd882e993b957..93c0cfb983200a33b7909fd1b2c73114711beac5 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -160,8 +160,8 @@ HParser* h_sequence__ma(HAllocator* mm__, void *args[]) { s->len = len; HParser *ret = h_new(HParser, 1); - ret->vtable = &sequence_vt; - ret->env = (void*)s; + ret->vtable = &sequence_vt; + ret->env = (void*)s; ret->backend = PB_MIN; return ret; } diff --git a/src/parsers/token.c b/src/parsers/token.c index 0a43f8d497180fe3a82e2b86cb4de6826ed1a2cc..d36ec54be4c07a35b729da71455c5bc3b3555cbc 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -80,6 +80,8 @@ HParser* h_token(const uint8_t *str, const size_t len) { } HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) { HToken *t = h_new(HToken, 1); - t->str = (uint8_t*)str, t->len = len; + uint8_t *str_cpy = h_new(uint8_t, len); + memcpy(str_cpy, str, len); + t->str = str_cpy, t->len = len; return h_new_parser(mm__, &token_vt, t); } diff --git a/src/t_parser.c b/src/t_parser.c index 12edba93b6b65d94dd2d11bf8a9c1d854de28376..e2eca978e9e96b341b04d55ba8a850d95dacae54 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -412,11 +412,11 @@ static void test_leftrec(gconstpointer backend) { HParser *a_ = h_ch('a'); HParser *lr_ = h_indirect(); - h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL)); - g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); - g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); - g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); } static void test_rightrec(gconstpointer backend) { @@ -485,7 +485,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); - // g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);