diff --git a/src/SConscript b/src/SConscript index 38ace12a179f34bf540f7c9bf2322ce449583772..b7e6159bb5d78bdef4d2b1e4c19e63fc1370f105 100644 --- a/src/SConscript +++ b/src/SConscript @@ -22,6 +22,7 @@ parsers = ['parsers/%s.c'%s for s in ['action', 'and', 'attr_bool', + 'bind', 'bits', 'butnot', 'ch', diff --git a/src/hammer.h b/src/hammer.h index b0ce75d20d74d65b8a64e5a25694ef8696acf4ad..716ab6d01aad890217e6c5583186bc8773aeff70 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -122,6 +122,19 @@ typedef struct HParseResult_ { */ typedef struct HBitWriter_ HBitWriter; +typedef struct HCFChoice_ HCFChoice; +typedef struct HRVMProg_ HRVMProg; +typedef struct HParserVtable_ HParserVtable; + +// TODO: Make this internal +typedef struct HParser_ { + const HParserVtable *vtable; + HParserBackend backend; + void* backend_data; + void *env; + HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ +} HParser; + /** * Type of an action to apply to an AST, used in the action() parser. * It can be any (user-defined) function that takes a HParseResult* @@ -141,18 +154,12 @@ typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data); */ typedef bool (*HPredicate)(HParseResult *p, void* user_data); -typedef struct HCFChoice_ HCFChoice; -typedef struct HRVMProg_ HRVMProg; -typedef struct HParserVtable_ HParserVtable; - -// TODO: Make this internal -typedef struct HParser_ { - const HParserVtable *vtable; - HParserBackend backend; - void* backend_data; - void *env; - HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ -} HParser; +/** + * Type of a parser that depends on the result of a previous parser, + * used in h_bind(). The void* argument is passed through from h_bind() and can + * be used to arbitrarily parameterize the function further. + */ +typedef HParser* (*HContinuation)(const HParsedToken *x, void *env); // {{{ Stuff for benchmarking typedef struct HParserTestcase_ { @@ -663,6 +670,17 @@ HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); */ HAMMER_FN_DECL(HParser*, h_get_value, const char* name); +/** + * Monadic bind for HParsers, i.e.: + * Sequencing where later parsers may depend on the result(s) of earlier ones. + * + * Run p and call the result x. Then run k(env,x). Fail if p fails or if + * k(env,x) fails. + * + * Result: the result of k(x,env). + */ +HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/parsers/bind.c b/src/parsers/bind.c new file mode 100644 index 0000000000000000000000000000000000000000..ccbf6da8cd5aa067fd6413e67e320bab8e92a49a --- /dev/null +++ b/src/parsers/bind.c @@ -0,0 +1,42 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p; + HContinuation k; + void *env; +} BindEnv; + +static HParseResult *parse_bind(void *be_, HParseState *state) { + BindEnv *be = be_; + + HParseResult *res = h_do_parse(be->p, state); + if(!res) + return NULL; + + HParser *kx = be->k(res->ast, be->env); + return h_do_parse(kx, state); +} + +static const HParserVtable bind_vt = { + .parse = parse_bind, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser *h_bind(const HParser *p, HContinuation k, void *env) +{ + return h_bind__m(&system_allocator, p, k, env); +} + +HParser *h_bind__m(HAllocator *mm__, + const HParser *p, HContinuation k, void *env) +{ + BindEnv *be = h_new(BindEnv, 1); + + be->p = p; + be->k = k; + be->env = env; + + return h_new_parser(mm__, &bind_vt, be); +} diff --git a/src/t_parser.c b/src/t_parser.c index 191996cc838191dd07030fd045d56069af29892e..25495e34194b785cd3d713344e026aa4f0c4d2fc 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -568,6 +568,34 @@ static void test_permutation(gconstpointer backend) { g_check_parse_failed(po2, be, "ccc", 3); } +static HParser *f_test_bind(const HParsedToken *p, void *env) { + uint8_t one = (uintptr_t)env; + + assert(p); + assert(p->token_type == TT_SEQUENCE); + + int v=0; + for(size_t i=0; i<p->seq->used; i++) { + assert(p->seq->elements[i]->token_type == TT_UINT); + v = v*10 + p->seq->elements[i]->uint - '0'; + } + + return h_ch(one - 1 + v); +} +static void test_bind(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *digit = h_ch_range('0', '9'); + const HParser *nat = h_many1(digit); + const HParser *p = h_bind(nat, f_test_bind, (void *)(uintptr_t)'a'); + + g_check_parse_match(p, be, "1a", 2, "u0x61"); + g_check_parse_match(p, be, "2b", 2, "u0x62"); + g_check_parse_match(p, be, "26z", 3, "u0x7a"); + g_check_parse_failed(p, be, "1x", 2); + g_check_parse_failed(p, be, "29y", 3); + g_check_parse_failed(p, be, "@", 1); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -617,6 +645,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); + g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);