diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 87f166de95e8fb2b58bd8455622ca8f7cf007115..c1e422ed6e9fa42fe4130c11ad8a1f7e7c22c2a2 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -3,10 +3,22 @@ #include "../internal.h" #include "../parsers/parser_internal.h" -// short-hand for constructing HCachedResult's -static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { - HCachedResult *ret = a_new(HCachedResult, 1); - ret->result = result; +// short-hand for creating cache values (regular case) +static +HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_RIGHT; + ret->right = result; + ret->input_stream = state->input_stream; + return ret; +} + +// short-hand for caching parse results (left recursion case) +static +HParserCacheValue *cached_lr(HParseState *state, HLeftRec *lr) { + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_LEFT; + ret->left = lr; ret->input_stream = state->input_stream; return ret; } @@ -44,27 +56,28 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { HParserCacheValue *cached = h_hashtable_get(state->cache, k); - HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos); if (!head) { // No heads found return cached; } else { // Some heads found if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { // Nothing in the cache, and the key parser is not involved - HParseResult *tmp = a_new(HParseResult, 1); - tmp->ast = NULL; tmp->arena = state->arena; - HParserCacheValue *ret = a_new(HParserCacheValue, 1); - ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); - return ret; + cached = cached_result(state, NULL); + cached->input_stream = k->input_pos; } if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // we know that cached has an entry here, modify it - if (!cached) - cached = a_new(HParserCacheValue, 1); - cached->value_type = PC_RIGHT; - cached->right = cached_result(state, tmp_res); + // update the cache + if (!cached) { + cached = cached_result(state, tmp_res); + h_hashtable_put(state->cache, k, cached); + } else { + cached->value_type = PC_RIGHT; + cached->right = tmp_res; + cached->input_stream = state->input_stream; + } } return cached; } @@ -83,51 +96,62 @@ void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { some->eval_set = NULL; rec_detect->head = some; } - //assert(state->lr_stack->head != NULL); - HSlistNode *head = state->lr_stack->head; - HLeftRec *lr; - while (head && (lr = head->elem)->rule != p) { + + HSlistNode *it; + for(it=state->lr_stack->head; it; it=it->next) { + HLeftRec *lr = it->elem; + + if(lr->rule == p) + break; + lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); - head = head->next; } } +// helper: true iff pos1 is less than pos2 +static inline bool pos_lt(HInputStream pos1, HInputStream pos2) { + return ((pos1.index < pos2.index) || + (pos1.index == pos2.index && pos1.bit_offset < pos2.bit_offset)); +} + /* If recall() returns NULL, we need to store a dummy failure in the cache and compute the * future parse. */ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { // Store the head into the recursion_heads - h_hashtable_put(state->recursion_heads, k, head); + h_hashtable_put(state->recursion_heads, &k->input_pos, head); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) errx(1, "impossible match"); - HParseResult *old_res = old_cached->right->result; + HParseResult *old_res = old_cached->right; + + // rewind the input + state->input_stream = k->input_pos; // reset the eval_set of the head of the recursion at each beginning of growth head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { - if ((old_res->ast->index < tmp_res->ast->index) || - (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, k, v); + if (pos_lt(old_cached->input_stream, state->input_stream)) { + h_hashtable_put(state->cache, k, cached_result(state, tmp_res)); return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { - return cached->right->result; + state->input_stream = cached->input_stream; + return cached->right; } else { errx(1, "impossible match"); } } } else { - h_hashtable_del(state->recursion_heads, k); + h_hashtable_del(state->recursion_heads, &k->input_pos); + state->input_stream = old_cached->input_stream; return old_res; } } @@ -140,9 +164,7 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab } else { // update cache - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - h_hashtable_put(state->cache, k, v); + h_hashtable_put(state->cache, k, cached_result(state, growable->seed)); if (!growable->seed) return NULL; else @@ -165,18 +187,18 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); // cache it - HParserCacheValue *dummy = a_new(HParserCacheValue, 1); - dummy->value_type = PC_LEFT; dummy->left = base; - h_hashtable_put(state->cache, key, dummy); + h_hashtable_put(state->cache, key, cached_lr(state, base)); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache h_slist_pop(state->lr_stack); + // update the cached value to our new position + HParserCacheValue *cached = h_hashtable_get(state->cache, key); + assert(cached != NULL); + cached->input_stream = state->input_stream; // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { - HParserCacheValue *right = a_new(HParserCacheValue, 1); - right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - h_hashtable_put(state->cache, key, right); + h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); return tmp_res; } else { base->seed = tmp_res; @@ -185,12 +207,12 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { } } else { // it exists! + state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); - return m->left->seed; // BUG: this might not be correct + return m->left->seed; } else { - state->input_stream = m->right->input_stream; - return m->right->result; + return m->right; } } } @@ -212,6 +234,14 @@ static bool cache_key_equal(const void* key1, const void* key2) { return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } +static uint32_t pos_hash(const void* key) { + return h_djbhash(key, sizeof(HInputStream)); +} + +static bool pos_equal(const void* key1, const void* key2) { + return memcmp(key1, key2, sizeof(HInputStream)) == 0; +} + HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStream *input_stream) { HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); @@ -219,8 +249,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr cache_key_hash); // hash_func parse_state->input_stream = *input_stream; parse_state->lr_stack = h_slist_new(arena); - parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, - cache_key_hash); + parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); diff --git a/src/internal.h b/src/internal.h index 056a5afc6a2edcf0b8927e40d0ae12df60a93e81..85cd4dbc3407c74f9b8e055b56adc5fcad0a1b61 100644 --- a/src/internal.h +++ b/src/internal.h @@ -255,21 +255,17 @@ typedef struct HLeftRec_ { HRecursionHead *head; } HLeftRec; -/* Result and remaining input, for rerunning from a cached position. */ -typedef struct HCachedResult_ { - HParseResult *result; - HInputStream input_stream; -} HCachedResult; - /* Tagged union for values in the cache: either HLeftRec's (Left) or * HParseResult's (Right). + * Includes the position (input_stream) to advance to after using this value. */ typedef struct HParserCacheValue_t { HParserCacheValueType value_type; union { HLeftRec *left; - HCachedResult *right; + HParseResult *right; }; + HInputStream input_stream; } HParserCacheValue; // This file provides the logical inverse of bitreader.c diff --git a/src/t_parser.c b/src/t_parser.c index e2eca978e9e96b341b04d55ba8a850d95dacae54..4260a7c99447d4dbf1cbe1e58e782f93f3c5e397 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -414,11 +414,24 @@ static void test_leftrec(gconstpointer backend) { HParser *lr_ = h_indirect(); h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), h_epsilon_p(), NULL)); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "(u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "((u0x61) u0x61)"); g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "(((u0x61) u0x61) u0x61)"); } +static void test_leftrec_ne(gconstpointer backend) { + HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); + g_check_parse_match(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); + g_check_parse_failed(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "", 0); +} + static void test_rightrec(gconstpointer backend) { HParser *a_ = h_ch('a'); @@ -485,7 +498,9 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); + // XXX(pesco) it seems to me Warth's algorithm just doesn't work for this case //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); + g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); @@ -599,6 +614,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/attr_bool", GINT_TO_POINTER(PB_LALR), test_attr_bool); g_test_add_data_func("/core/parser/lalr/ignore", GINT_TO_POINTER(PB_LALR), test_ignore); g_test_add_data_func("/core/parser/lalr/leftrec", GINT_TO_POINTER(PB_LALR), test_leftrec); + g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/glr/token", GINT_TO_POINTER(PB_GLR), test_token); @@ -637,6 +653,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/attr_bool", GINT_TO_POINTER(PB_GLR), test_attr_bool); g_test_add_data_func("/core/parser/glr/ignore", GINT_TO_POINTER(PB_GLR), test_ignore); g_test_add_data_func("/core/parser/glr/leftrec", GINT_TO_POINTER(PB_GLR), test_leftrec); + g_test_add_data_func("/core/parser/glr/leftrec-ne", GINT_TO_POINTER(PB_GLR), test_leftrec_ne); g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); }