diff --git a/src/backends/llk.c b/src/backends/llk.c index 865c30e65f64f7c56802f6f486d5b421fa8fcfd6..0ab4610a29a1fcdefd1ca163ea2be8785b3ed0e6 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -438,13 +438,12 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // the top of stack is such that there will be a result... tok = h_arena_malloc(arena, sizeof(HParsedToken)); - tok->index = stream->pos + stream->index; - tok->bit_offset = stream->bit_offset; if(x == MARK) { // hit stack frame boundary... // wrap the accumulated parse result, this sequence is finished tok->token_type = TT_SEQUENCE; tok->seq = seq; + // XXX would have to set token pos but we've forgotten pos of seq // recover original nonterminal and result sequence x = h_slist_pop(stack); @@ -454,6 +453,9 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, else { // x is a terminal or simple charset; match against input + tok->index = stream->pos + stream->index; + tok->bit_offset = stream->bit_offset; + // consume the input token uint8_t input = h_read_bits(stream, 8, false); @@ -500,8 +502,16 @@ static HCountedArray *llk_parse_chunk_(HLLkState *s, const HParser* parser, // 'tok' has been parsed; process it // perform token reshape if indicated - if(x->reshape) - tok = (HParsedToken *)x->reshape(make_result(arena, tok), x->user_data); + if(x->reshape) { + HParsedToken *t = x->reshape(make_result(arena, tok), x->user_data); + if(t) { + t->index = tok->index; + t->bit_offset = tok->bit_offset; + } else { + h_arena_free(arena, tok); + } + tok = t; + } // call validation and semantic action, if present if(x->pred && !x->pred(make_result(tarena, tok), x->user_data)) diff --git a/src/backends/lr.c b/src/backends/lr.c index 3f99eb513ad59a0ba0bd59e886ed67413f01f08e..d9aaee72dcca6b290c486680511b0f1100d80069 100644 --- a/src/backends/lr.c +++ b/src/backends/lr.c @@ -267,6 +267,8 @@ static HParsedToken *consume_input(HLREngine *engine) v = h_arena_malloc(engine->arena, sizeof(HParsedToken)); v->token_type = TT_UINT; v->uint = c; + v->index = engine->input.pos + engine->input.index - 1; + v->bit_offset = engine->input.bit_offset; } return v; @@ -309,18 +311,28 @@ bool h_lrengine_step(HLREngine *engine, const HLRAction *action) value->index = v->index; value->bit_offset = v->bit_offset; } else { - // XXX how to get the position in this case? + // result position is current input position XXX ? + value->index = engine->input.pos + engine->input.index; + value->bit_offset = engine->input.bit_offset; } // perform token reshape if indicated - if(symbol->reshape) - value = (HParsedToken *)symbol->reshape(make_result(arena, value), symbol->user_data); + if(symbol->reshape) { + v = symbol->reshape(make_result(arena, value), symbol->user_data); + if(v) { + v->index = value->index; + v->bit_offset = value->bit_offset; + } else { + h_arena_free(arena, value); + } + value = v; + } // call validation and semantic action, if present if(symbol->pred && !symbol->pred(make_result(tarena, value), symbol->user_data)) return false; // validation failed -> no parse; terminate if(symbol->action) - value = (HParsedToken *)symbol->action(make_result(arena, value), symbol->user_data); + value = symbol->action(make_result(arena, value), symbol->user_data); // this is LR, building a right-most derivation bottom-up, so no reduce can // follow a reduce. we can also assume no conflict follows for GLR if we diff --git a/src/t_parser.c b/src/t_parser.c index 3d54ff6bf3bd6d75fcb9c73732a7a04025d5bcf3..c42eca91321c241a1987b99116c8c90deefbdf64 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -1,6 +1,7 @@ #include <glib.h> #include <string.h> #include "hammer.h" +#include "glue.h" #include "internal.h" #include "test_suite.h" #include "parsers/parser_internal.h" @@ -552,6 +553,34 @@ static void test_result_length(gconstpointer backend) { g_check_cmp_int64(r->bit_length, ==, 24); } +static void test_token_position(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + HParser *p = h_sequence(h_token((uint8_t*)"foo",3), + h_token((uint8_t*)"bar",3), NULL); + + if(h_compile(p, be, NULL) != 0) { + g_test_message("Compile failed"); + g_test_fail(); + return; + } + + HParseResult *r = h_parse(p, (uint8_t*)"foobar", 6); + if(!r) { + g_test_message("Parse failed"); + g_test_fail(); + return; + } + + assert(r->ast != NULL); + HParsedToken *foo = H_INDEX_TOKEN(r->ast, 0); + HParsedToken *bar = H_INDEX_TOKEN(r->ast, 1); + + g_check_cmp_uint64(foo->index, ==, 0); + g_check_cmp_uint64(foo->bit_offset, ==, 0); + g_check_cmp_uint64(bar->index, ==, 3); + g_check_cmp_uint64(bar->bit_offset, ==, 0); +} + static void test_ambiguous(gconstpointer backend) { HParser *d_ = h_ch('d'); HParser *p_ = h_ch('+'); @@ -763,6 +792,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/packrat/result_length", GINT_TO_POINTER(PB_PACKRAT), test_result_length); + //g_test_add_data_func("/core/parser/packrat/token_position", GINT_TO_POINTER(PB_PACKRAT), test_token_position); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); @@ -802,6 +832,7 @@ void register_parser_tests(void) { //g_test_add_data_func("/core/parser/llk/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec); g_test_add_data_func("/core/parser/llk/rightrec", GINT_TO_POINTER(PB_LLk), test_rightrec); g_test_add_data_func("/core/parser/llk/result_length", GINT_TO_POINTER(PB_LLk), test_result_length); + //g_test_add_data_func("/core/parser/llk/token_position", GINT_TO_POINTER(PB_LLk), test_token_position); g_test_add_data_func("/core/parser/llk/iterative", GINT_TO_POINTER(PB_LLk), test_iterative); g_test_add_data_func("/core/parser/llk/iterative/lookahead", GINT_TO_POINTER(PB_LLk), test_iterative_lookahead); g_test_add_data_func("/core/parser/llk/iterative/result_length", GINT_TO_POINTER(PB_LLk), test_iterative_result_length); @@ -843,6 +874,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/attr_bool", GINT_TO_POINTER(PB_REGULAR), test_attr_bool); g_test_add_data_func("/core/parser/regex/ignore", GINT_TO_POINTER(PB_REGULAR), test_ignore); g_test_add_data_func("/core/parser/regex/result_length", GINT_TO_POINTER(PB_REGULAR), test_result_length); + g_test_add_data_func("/core/parser/regex/token_position", GINT_TO_POINTER(PB_REGULAR), test_token_position); g_test_add_data_func("/core/parser/lalr/token", GINT_TO_POINTER(PB_LALR), test_token); g_test_add_data_func("/core/parser/lalr/ch", GINT_TO_POINTER(PB_LALR), test_ch); @@ -883,6 +915,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/leftrec-ne", GINT_TO_POINTER(PB_LALR), test_leftrec_ne); g_test_add_data_func("/core/parser/lalr/rightrec", GINT_TO_POINTER(PB_LALR), test_rightrec); g_test_add_data_func("/core/parser/lalr/result_length", GINT_TO_POINTER(PB_LALR), test_result_length); + g_test_add_data_func("/core/parser/lalr/token_position", GINT_TO_POINTER(PB_LALR), test_token_position); g_test_add_data_func("/core/parser/lalr/iterative", GINT_TO_POINTER(PB_LALR), test_iterative); g_test_add_data_func("/core/parser/lalr/iterative/lookahead", GINT_TO_POINTER(PB_LALR), test_iterative_lookahead); g_test_add_data_func("/core/parser/lalr/iterative/result_length", GINT_TO_POINTER(PB_LALR), test_iterative_result_length); @@ -927,4 +960,5 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/rightrec", GINT_TO_POINTER(PB_GLR), test_rightrec); g_test_add_data_func("/core/parser/glr/ambiguous", GINT_TO_POINTER(PB_GLR), test_ambiguous); g_test_add_data_func("/core/parser/glr/result_length", GINT_TO_POINTER(PB_GLR), test_result_length); + g_test_add_data_func("/core/parser/glr/token_position", GINT_TO_POINTER(PB_GLR), test_token_position); }