From f8604f5c7658d1bc051267ed36c0b12e67350da1 Mon Sep 17 00:00:00 2001 From: Dan Hirsch <thequux@upstandinghackers.com> Date: Fri, 1 Nov 2013 18:01:44 -0400 Subject: [PATCH] Add CFFI python bindings --- src/bindings/python/hammer.py | 244 ++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 src/bindings/python/hammer.py diff --git a/src/bindings/python/hammer.py b/src/bindings/python/hammer.py new file mode 100644 index 00000000..d3d1e3d6 --- /dev/null +++ b/src/bindings/python/hammer.py @@ -0,0 +1,244 @@ +from cffi import FFI + +ffi = FFI() + +## Types +ffi.cdef("typedef struct HAllocator_ HAllocator;") +ffi.cdef("typedef struct HArena_ HArena;") +ffi.cdef("typedef int bool;") +ffi.cdef("typedef struct HParseState_ HParseState;") +ffi.cdef(""" +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = 0, // PB_MIN is always the default. + PB_REGULAR, + PB_LLk, + PB_LALR, + PB_GLR +// TODO: support PB_MAX +} HParserBackend; +""") +ffi.cdef(""" +typedef enum HTokenType_ { + // Before you change the explicit values of these, think of the poor bindings ;_; + TT_NONE = 1, + TT_BYTES = 2, + TT_SINT = 4, + TT_UINT = 8, + TT_SEQUENCE = 16, + TT_RESERVED_1, // reserved for backend-specific internal use + TT_ERR = 32, + TT_USER = 64, + TT_MAX +} HTokenType; +""") +ffi.cdef(""" +typedef struct HCountedArray_ { + size_t capacity; + size_t used; + HArena * arena; + struct HParsedToken_ **elements; +} HCountedArray; +""") +ffi.cdef(""" +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; +""") +ffi.cdef(""" +typedef struct HParsedToken_ { + HTokenType token_type; + union { + HBytes bytes; + int64_t sint; + uint64_t uint; + double dbl; + float flt; + HCountedArray *seq; // a sequence of HParsedToken's + void *user; + }; + size_t index; + char bit_offset; +} HParsedToken; +""") +ffi.cdef(""" +typedef struct HParseResult_ { + const HParsedToken *ast; + long long bit_length; + HArena * arena; +} HParseResult; +""") + +ffi.cdef("""typedef HParsedToken* (*HAction)(const HParseResult *p);""") +ffi.cdef("""typedef bool (*HPredicate)(HParseResult *p);""") +ffi.cdef(""" +typedef struct HCFChoice_ HCFChoice; +typedef struct HRVMProg_ HRVMProg; +typedef struct HParserVtable_ HParserVtable; +""") + +ffi.cdef("typedef struct HParser_ HParser;") +ffi.cdef(""" +typedef struct HParserTestcase_ { + unsigned char* input; + size_t length; + char* output_unambiguous; +} HParserTestcase; + +typedef struct HCaseResult_ { + bool success; + union { + const char* actual_results; // on failure, filled in with the results of h_write_result_unamb + size_t parse_time; // on success, filled in with time for a single parse, in nsec + }; +} HCaseResult; + +typedef struct HBackendResults_ { + HParserBackend backend; + bool compile_success; + size_t n_testcases; + size_t failed_testcases; // actually a count... + HCaseResult *cases; +} HBackendResults; + +typedef struct HBenchmarkResults_ { + size_t len; + HBackendResults *results; +} HBenchmarkResults; +""") + +## The following section was generated by +## $ perl ../desugar-header.pl <../../hammer.h |sed -e 's/.*/ffi.cdef("&")/' +ffi.cdef("HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);") +ffi.cdef("HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length);") +ffi.cdef("HParser* h_token(const uint8_t *str, const size_t len);") +ffi.cdef("HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len);") +ffi.cdef("HParser* h_ch(const uint8_t c);") +ffi.cdef("HParser* h_ch__m(HAllocator* mm__, const uint8_t c);") +ffi.cdef("HParser* h_ch_range(const uint8_t lower, const uint8_t upper);") +ffi.cdef("HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper);") +ffi.cdef("HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);") +ffi.cdef("HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper);") +ffi.cdef("HParser* h_bits(size_t len, bool sign);") +ffi.cdef("HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign);") +ffi.cdef("HParser* h_int64(void);") +ffi.cdef("HParser* h_int64__m(HAllocator* mm__);") +ffi.cdef("HParser* h_int32(void);") +ffi.cdef("HParser* h_int32__m(HAllocator* mm__);") +ffi.cdef("HParser* h_int16(void);") +ffi.cdef("HParser* h_int16__m(HAllocator* mm__);") +ffi.cdef("HParser* h_int8(void);") +ffi.cdef("HParser* h_int8__m(HAllocator* mm__);") +ffi.cdef("HParser* h_uint64(void);") +ffi.cdef("HParser* h_uint64__m(HAllocator* mm__);") +ffi.cdef("HParser* h_uint32(void);") +ffi.cdef("HParser* h_uint32__m(HAllocator* mm__);") +ffi.cdef("HParser* h_uint16(void);") +ffi.cdef("HParser* h_uint16__m(HAllocator* mm__);") +ffi.cdef("HParser* h_uint8(void);") +ffi.cdef("HParser* h_uint8__m(HAllocator* mm__);") +ffi.cdef("HParser* h_whitespace(const HParser* p);") +ffi.cdef("HParser* h_whitespace__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_left(const HParser* p, const HParser* q);") +ffi.cdef("HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q);") +ffi.cdef("HParser* h_right(const HParser* p, const HParser* q);") +ffi.cdef("HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q);") +ffi.cdef("HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);") +ffi.cdef("HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q);") +ffi.cdef("HParser* h_action(const HParser* p, const HAction a);") +ffi.cdef("HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a);") +ffi.cdef("HParser* h_in(const uint8_t *charset, size_t length);") +ffi.cdef("HParser* h_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);") +ffi.cdef("HParser* h_not_in(const uint8_t *charset, size_t length);") +ffi.cdef("HParser* h_not_in__m(HAllocator* mm__, const uint8_t *charset, size_t length);") +ffi.cdef("HParser* h_end_p(void);") +ffi.cdef("HParser* h_end_p__m(HAllocator* mm__);") +ffi.cdef("HParser* h_nothing_p(void);") +ffi.cdef("HParser* h_nothing_p__m(HAllocator* mm__);") +ffi.cdef("HParser* h_sequence(HParser* p, ...);") +ffi.cdef("HParser* h_sequence__m(HAllocator *mm__, HParser* p, ...);") +ffi.cdef("HParser* h_sequence__a(void* args);") +ffi.cdef("HParser* h_sequence__ma(HAllocator* mm__, void* args);") +ffi.cdef("HParser* h_choice(HParser* p, ...);") +ffi.cdef("HParser* h_choice__m(HAllocator *mm__, HParser* p, ...);") +ffi.cdef("HParser* h_choice__a(void* args);") +ffi.cdef("HParser* h_choice__ma(HAllocator* mm__, void* args);") +ffi.cdef("HParser* h_butnot(const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_difference(const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_xor(const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2);") +ffi.cdef("HParser* h_many(const HParser* p);") +ffi.cdef("HParser* h_many__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_many1(const HParser* p);") +ffi.cdef("HParser* h_many1__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_repeat_n(const HParser* p, const size_t n);") +ffi.cdef("HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n);") +ffi.cdef("HParser* h_optional(const HParser* p);") +ffi.cdef("HParser* h_optional__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_ignore(const HParser* p);") +ffi.cdef("HParser* h_ignore__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_sepBy(const HParser* p, const HParser* sep);") +ffi.cdef("HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep);") +ffi.cdef("HParser* h_sepBy1(const HParser* p, const HParser* sep);") +ffi.cdef("HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep);") +ffi.cdef("HParser* h_epsilon_p(void);") +ffi.cdef("HParser* h_epsilon_p__m(HAllocator* mm__);") +ffi.cdef("HParser* h_length_value(const HParser* length, const HParser* value);") +ffi.cdef("HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value);") +ffi.cdef("HParser* h_attr_bool(const HParser* p, HPredicate pred);") +ffi.cdef("HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred);") +ffi.cdef("HParser* h_and(const HParser* p);") +ffi.cdef("HParser* h_and__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_not(const HParser* p);") +ffi.cdef("HParser* h_not__m(HAllocator* mm__, const HParser* p);") +ffi.cdef("HParser* h_indirect(void);") +ffi.cdef("HParser* h_indirect__m(HAllocator* mm__);") +ffi.cdef("void h_bind_indirect(HParser* indirect, const HParser* inner);") +ffi.cdef("void h_bind_indirect__m(HAllocator* mm__, HParser* indirect, const HParser* inner);") +ffi.cdef("void h_parse_result_free(HParseResult *result);") +ffi.cdef("void h_parse_result_free__m(HAllocator* mm__, HParseResult *result);") +ffi.cdef("void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);") +ffi.cdef("int h_compile(HParser* parser, HParserBackend backend, const void* params);") +ffi.cdef("int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params);") +ffi.cdef("HBenchmarkResults * h_benchmark(HParser* parser, HParserTestcase* testcases);") +ffi.cdef("HBenchmarkResults * h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTestcase* testcases);") + +lib = ffi.verify("#include <hammer/hammer.h>", + libraries=['hammer']) + + +# Quick test +def fromCobj(cobj): + # TODO: Free the toplevel parser + tt = cobj.token_type + if cobj.token_type == lib.TT_BYTES: + return ffi.buffer(cobj.bytes.token, cobj.bytes.len)[:] + elif cobj.token_type == lib.TT_ERR: + # I have no idea what this is for + pass + elif cobj.token_type == lib.TT_NONE: + return None + elif cobj.token_type == lib.TT_SEQUENCE: + return [fromCobj(cobj.seq.elements[i]) + for i in range(cobj.seq.used)] + elif cobj.token_type == lib.TT_SINT: + return cobj.sint + elif cobj.token_type == lib.TT_UINT: + return cobj.uint + +def fromParseResult(cobj): + ret = fromCobj(cobj.ast) + lib.h_parse_result_free(cobj) + return ret + +def run_test(): + p_test = lib.h_sepBy1(lib.h_choice(lib.h_ch(ord('1')), + lib.h_ch(ord('2')), + lib.h_ch(ord('3')), + ffi.NULL), + lib.h_ch(ord(','))) + return fromParseResult(lib.h_parse(p_test, "1,2,3", 5)) + -- GitLab