From 127600425054788c121fa4be3831cf09d2c636d5 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Fri, 4 Sep 2015 21:05:56 +0200 Subject: [PATCH] handle suspend on lookahead at the very end of the chunk --- src/backends/llk.c | 27 ++++++++++++++++----------- src/cfgrammar.c | 11 ++++++++--- src/cfgrammar.h | 3 +++ 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/backends/llk.c b/src/backends/llk.c index 9acf67ec..95289975 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -296,7 +296,7 @@ static HLLkState *llk_parse_start_(HAllocator* mm__, const HParser* parser) return s; } -// returns partial result or NULL +// returns partial result or NULL (no parse) static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, HInputStream* stream) { @@ -316,6 +316,8 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // when we empty the stack, the parse is complete. while(!h_slist_empty(stack)) { + tok = NULL; + // pop top of stack for inspection x = h_slist_pop(stack); assert(x != NULL); @@ -323,22 +325,24 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, if(x != MARK && x->type == HCF_CHOICE) { // x is a nonterminal; apply the appropriate production and continue - // push stack frame - h_slist_push(stack, seq); // save current partial value - h_slist_push(stack, x); // save the nonterminal - h_slist_push(stack, MARK); // frame delimiter - - // open a fresh result sequence - seq = h_carray_new(arena); - // look up applicable production in parse table const HCFSequence *p = h_llk_lookup(table, x, stream); if(p == NULL) goto no_parse; + if(p == H_NEED_INPUT) + goto need_input; // an infinite loop case that shouldn't happen assert(!p->items[0] || p->items[0] != x); + // push stack frame + h_slist_push(stack, seq); // save current partial value + h_slist_push(stack, x); // save the nonterminal + h_slist_push(stack, MARK); // frame delimiter + + // open a fresh result sequence + seq = h_carray_new(arena); + // push production's rhs onto the stack (in reverse order) HCFChoice **s; for(s = p->items; *s; s++); @@ -433,8 +437,9 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, need_input: if(stream->last_chunk) goto no_parse; - h_arena_free(arena, tok); // no result, yet - h_slist_push(stack, x); // try this symbol again next time + if(tok) + h_arena_free(arena, tok); // no result, yet + h_slist_push(stack, x); // try this symbol again next time return seq; } diff --git a/src/cfgrammar.c b/src/cfgrammar.c index a8761b8d..117009a5 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -349,6 +349,7 @@ void *h_stringmap_get(const HStringMap *m, const uint8_t *str, size_t n, bool en return m->epsilon_branch; } +// A NULL result means no parse. H_NEED_INPUT means lookahead is too short. void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) { while(m) { @@ -362,9 +363,13 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead) // reading bits from it does not consume them from the real input. uint8_t c = h_read_bits(&lookahead, 8, false); - if (lookahead.overrun) { // end of input - // XXX assumption of byte-wise grammar and input - return m->end_branch; + if (lookahead.overrun) { // end of chunk + if (lookahead.last_chunk) { // end of input + // XXX assumption of byte-wise grammar and input + return m->end_branch; + } else { + return H_NEED_INPUT; + } } // no match yet, descend diff --git a/src/cfgrammar.h b/src/cfgrammar.h index 9cefc62e..2294d445 100644 --- a/src/cfgrammar.h +++ b/src/cfgrammar.h @@ -56,6 +56,9 @@ bool h_stringmap_empty(const HStringMap *m); static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c) { return h_hashtable_get(m->char_branches, (void *)char_key(c)); } +// dummy return value used by h_stringmap_get_lookahead when out of input +#define H_NEED_INPUT ((void *)&h_stringmap_get_lookahead) + /* Convert 'parser' into CFG representation by desugaring and compiling the set * of nonterminals. -- GitLab