diff --git a/src/SConscript b/src/SConscript index 155a6218b26cd03704c2a7a922bef9aea61bbf13..49d43eb7adc18da8f8e6282c2e03ab6b49e548b0 100644 --- a/src/SConscript +++ b/src/SConscript @@ -43,7 +43,8 @@ parsers = ['parsers/%s.c'%s for s in 'token', 'unimplemented', 'whitespace', - 'xor']] + 'xor', + 'value']] backends = ['backends/%s.c' % s for s in ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']] diff --git a/src/datastructures.c b/src/datastructures.c index 141adcd5ffa9df4d9a4a81269704a2361432266e..0feeb2176b0422471f65a053a7ef9a716af1821c 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -1,6 +1,7 @@ #include "internal.h" #include "hammer.h" #include "allocator.h" +#include "parsers/parser_internal.h" #include <assert.h> #include <stdlib.h> #include <string.h> @@ -393,6 +394,28 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) { return hash; } +void h_symbol_put(HParseState *state, const char* key, void *value) { + if (!state->symbol_table) { + state->symbol_table = h_slist_new(state->arena); + h_slist_push(state->symbol_table, h_hashtable_new(state->arena, + h_eq_ptr, + h_hash_ptr)); + } + HHashTable *head = h_slist_top(state->symbol_table); + assert(!h_hashtable_present(head, key)); + h_hashtable_put(head, key, value); +} + +void* h_symbol_get(HParseState *state, const char* key) { + if (state->symbol_table) { + HHashTable *head = h_slist_top(state->symbol_table); + if (head) { + return h_hashtable_get(head, key); + } + } + return NULL; +} + HSArray *h_sarray_new(HAllocator *mm__, size_t size) { HSArray *ret = h_new(HSArray, 1); ret->capacity = size; diff --git a/src/hammer.h b/src/hammer.h index 778087366add8560ccdca702cacbe0876302e545..947456d8330bad55f5a657c65910f045ce9a212c 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -621,6 +621,30 @@ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); */ HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p); +/** + * The 'h_put_value' combinator stashes the result of the parser + * it wraps in a symbol table in the parse state, so that non- + * local actions and predicates can access this value. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: p's token type if name was not already in + * the symbol table. It is an error, and thus a NULL result (and + * parse failure), to attempt to rename a symbol. + */ +HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); + +/** + * The 'h_get_value' combinator retrieves a named HParseResult that + * was previously stashed in the parse state. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: whatever the stashed HParseResult is, if + * present. If absent, NULL (and thus parse failure). + */ +HAMMER_FN_DECL(HParser*, h_get_value, const char* name); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/internal.h b/src/internal.h index 85cd4dbc3407c74f9b8e055b56adc5fcad0a1b61..6c721eb03e3f790308b7539ea0abd3b9ae59f805 100644 --- a/src/internal.h +++ b/src/internal.h @@ -190,6 +190,7 @@ typedef struct HHashTable_ { * arena - the arena that has been allocated for the parse this state is in. * lr_stack - a stack of HLeftRec's, used in Warth's recursion * recursion_heads - table of recursion heads. Keys are HParserCacheKey's with only an HInputStream (parser can be NULL), values are HRecursionHead's. + * symbol_table - stack of tables of values that have been stashed in the context of this parse. * */ @@ -199,6 +200,7 @@ struct HParseState_ { HArena * arena; HSlist *lr_stack; HHashTable *recursion_heads; + HSlist *symbol_table; // its contents are HHashTables }; typedef struct HParserBackendVTable_ { @@ -316,6 +318,7 @@ HSlist* h_slist_new(HArena *arena); HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void* h_slist_drop(HSlist *slist); +static inline void* h_slist_top(HSlist *sl) { return sl->head->elem; } void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); HSlist* h_slist_remove_all(HSlist *slist, const void* item); @@ -347,8 +350,10 @@ bool h_eq_ptr(const void *p, const void *q); HHashValue h_hash_ptr(const void *p); uint32_t h_djbhash(const uint8_t *buf, size_t len); -typedef struct HCFSequence_ HCFSequence; +void h_symbol_put(HParseState *state, const char* key, void *value); +void* h_symbol_get(HParseState *state, const char* key); +typedef struct HCFSequence_ HCFSequence; struct HCFChoice_ { enum HCFChoiceType { diff --git a/src/parsers/value.c b/src/parsers/value.c new file mode 100644 index 0000000000000000000000000000000000000000..531db7cb5274c30d3d482ee5bc84add58c1e9af7 --- /dev/null +++ b/src/parsers/value.c @@ -0,0 +1,69 @@ +#include "parser_internal.h" + +typedef struct { + const HParser* p; + const char* key; +} HStoredValue; + +/* Stash an HParseResult into a symbol table, so that it can be + retrieved and used later. */ + +static HParseResult* parse_put(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (s->p && s->key && !h_symbol_get(state, s->key)) { + HParseResult *tmp = h_do_parse(s->p, state); + if (tmp) { + h_symbol_put(state, s->key, tmp); + } + return tmp; + } + // otherwise there's no parser, no key, or key's stored already + return NULL; +} + +static const HParserVtable put_vt = { + .parse = parse_put, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_put_value(const HParser* p, const char* name) { + return h_put_value__m(&system_allocator, p, name); +} + +HParser* h_put_value__m(HAllocator* mm__, const HParser* p, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = p; + env->key = name; + return h_new_parser(mm__, &put_vt, env); +} + +/* Retrieve a stashed result from the symbol table. */ + +static HParseResult* parse_get(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (!s->p && s->key) { + return h_symbol_get(state, s->key); + } else { // either there's no key, or there was a parser here + return NULL; + } +} + +static const HParserVtable get_vt = { + .parse = parse_get, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_get_value(const char* name) { + return h_get_value__m(&system_allocator, name); +} + +HParser* h_get_value__m(HAllocator* mm__, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = NULL; + env->key = name; + return h_new_parser(mm__, &get_vt, env); +} diff --git a/src/t_parser.c b/src/t_parser.c index a98eb11f491c0c5d9121df21be4c10730e415fc0..2b66bffba923fc391dcab2b078e96662c854d6c1 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -495,6 +495,26 @@ static void test_endianness(gconstpointer backend) { g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc"); } +HParsedToken* act_get(const HParseResult *p, void* user_data) { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = 3 * (1 << p->ast->uint); + return ret; +} + +static void test_put_get(gconstpointer backend) { + HParser *p = h_sequence(h_put_value(h_uint8(), "size"), + h_token((const uint8_t*)"foo", 3), + h_length_value(h_action(h_get_value("size"), + act_get, NULL), + h_uint8()), + NULL); + // Yes, the quotes in the next line look weird. Leave them alone, + // this is to deal with how C strings handle hex-formatted chars. + g_check_parse_match(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcdef", 10, "(u0x1 <66.6f.6f> (u0x61 u0x62 u0x63 u0x64 u0x65 u0x66))"); + g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -542,6 +562,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); + g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);