diff --git a/SConstruct b/SConstruct index 7979b3393d58e2fef09992d8626d3ba2960f6839..c7e3ba5371c8e5419086e2373511bc5478ae0828 100644 --- a/SConstruct +++ b/SConstruct @@ -119,7 +119,7 @@ if env['CC'] == 'cl': ) else: # -Wno-clobbered only really works with gcc >= 4.2.x, but ... scons - env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wno-clobbered') + env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable') # Linker options if env['PLATFORM'] == 'darwin': diff --git a/src/backends/glr.c b/src/backends/glr.c index 535dc2860c59018324893da1450cfc4ff4fadf8b..44b0c50cafd08486866eedf17e29c50236434f9b 100644 --- a/src/backends/glr.c +++ b/src/backends/glr.c @@ -14,7 +14,7 @@ int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params) } int result = h_lalr_compile(mm__, parser, params); - if(result == -1 && parser->backend_data) { + if(result == -2 && parser->backend_data) { // table is there, just has conflicts? nevermind, that's okay. result = 0; } diff --git a/src/backends/lalr.c b/src/backends/lalr.c index b82ef71c477128728db39d4ac72ef8d4ab0dc56c..db9b88ae28caf5d39bf134df454add37577d86c8 100644 --- a/src/backends/lalr.c +++ b/src/backends/lalr.c @@ -279,18 +279,18 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) } HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser)); if(g == NULL) // backend not suitable (language not context-free) - return -1; + return 2; HLRDFA *dfa = h_lr0_dfa(g); if (dfa == NULL) { // this should normally not happen h_cfgrammar_free(g); - return -1; + return 3; } HLRTable *table = h_lr0_table(g, dfa); if (table == NULL) { // this should normally not happen h_cfgrammar_free(g); - return -1; + return 4; } if(has_conflicts(table)) { @@ -300,7 +300,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) if(eg == NULL) { // this should normally not happen h_cfgrammar_free(g); h_lrtable_free(table); - return -1; + return 5; } // go through the inadequate states; replace inadeq with a new list @@ -349,7 +349,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) h_cfgrammar_free(g); parser->backend_data = table; - return has_conflicts(table)? -1 : 0; + return has_conflicts(table)? -2 : 0; } void h_lalr_free(HParser *parser) diff --git a/src/backends/llk.c b/src/backends/llk.c index 4e8209b30f4aa7bd97f5df1c49202643d4efedd4..19944a20930eef5fcb71efd3d055aebeb93e9cfa 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -238,7 +238,7 @@ int h_llk_compile(HAllocator* mm__, HParser* parser, const void* params) // the table was ambiguous h_cfgrammar_free(grammar); h_llktable_free(table); - return -1; + return -2; } parser->backend_data = table; diff --git a/src/backends/regex.c b/src/backends/regex.c index f26abfda67af76900010053c6a6003fad1df55e7..0337949f948e06f332bbbada27170c4e99fae2ef 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -430,9 +430,10 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params prog->actions = NULL; prog->allocator = mm__; if (setjmp(prog->except)) { - return false; + return 3; } if (!h_compile_regex(prog, parser)) { + // this shouldn't normally fail when isValidRegular() returned true h_free(prog->insns); h_free(prog->actions); h_free(prog); diff --git a/src/benchmark.c b/src/benchmark.c index b6a2876fa0a1a85711c610b1d2bc5f1143c77f87..7d56c32e7c17bd204fd76b1cdd5b8d6680aeea57 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -46,7 +46,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest for (backend = PB_MIN; backend <= PB_MAX; backend++) { ret->results[backend].backend = backend; // Step 1: Compile grammar for given parser... - if (h_compile(parser, backend, NULL) == -1) { + if (h_compile(parser, backend, NULL)) { // backend inappropriate for grammar... fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]); ret->results[backend].compile_success = false; diff --git a/src/glue.c b/src/glue.c index 9bca467a86344a360aeeba0458b1bff21cbf42e8..79e106c8bd902d6fe13ac485781222d202ea9dcc 100644 --- a/src/glue.c +++ b/src/glue.c @@ -60,16 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) { } HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) { - HCountedArray *seq = h_carray_new(p->arena); - - act_flatten_(seq, p->ast); - - HParsedToken *res = a_new_(p->arena, HParsedToken, 1); - res->token_type = TT_SEQUENCE; - res->seq = seq; - res->index = p->ast->index; - res->bit_offset = p->ast->bit_offset; - res->bit_length = p->bit_length; + HParsedToken *res = h_make_seq(p->arena); + act_flatten_(res->seq, p->ast); return res; } diff --git a/src/glue.h b/src/glue.h index 38cafa28fd06d14efab178939c671996b3e73fee..08e5255ab2572d5d828943cd98331081ab02fc02 100644 --- a/src/glue.h +++ b/src/glue.h @@ -257,7 +257,7 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); #define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_FLOAT(SEQ, ...) H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) -#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1) // Standard short-hand to access and cast elements on a sequence token. #define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) @@ -267,6 +267,7 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); #define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) #define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__) #define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__) +#define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__) // Lower-level helper for h_seq_index. HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal diff --git a/src/hammer.h b/src/hammer.h index f425d00d4e7ec93e73b8d9e7aec4542ba1f5c514..787af0b254a969226eeda985fc75d470796bd7cc 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -787,7 +787,13 @@ void h_pprintln(FILE* stream, const HParsedToken* tok); * documentation for the parser backend in question for information * about the [params] parameter, or just pass in NULL for the defaults. * - * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. + * Returns a nonzero value on error; 0 otherwise. Common return codes include: + * + * -1: parser uses a combinator that is incompatible with the chosen backend. + * -2: parser could not be compiled with the chosen parameters. + * >0: unexpected internal errors. + * + * Consult each backend for details. */ HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params); diff --git a/src/t_regression.c b/src/t_regression.c index 4e8ad0cdfc9df5c8ba2df32e4faaead50218ee7a..771582cfd835b348e0ed2b4da567ad40fc27800f 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -187,7 +187,7 @@ static void test_charset_bits(void) { .free = NULL, }; test_charset_bits__buf[32] = 0xAB; - HCharset cs = new_charset(&alloc); + new_charset(&alloc); for(size_t i=0; i<32; i++) g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0); g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB); @@ -270,6 +270,102 @@ static void test_bug_19() { g_assert_true(1); } +static void test_flatten_null() { + // h_act_flatten() produces a flat sequence from a nested sequence. it also + // hapens to produce a one-element sequence when given a non-sequence token. + // but given a null token (as from h_epsilon_p() or h_ignore()), it would + // previously segfault. + // + // let's make sure the behavior is consistent and a singular null token + // produces the same thing as a sequence around h_epsilon_p() or h_ignore(). + + HParser *A = h_many(h_ch('a')); + HParser *B = h_ch('b'); + HParser *C = h_sequence(h_ch('c'), NULL); + + HParser *V = h_action(h_epsilon_p(), h_act_flatten, NULL); + HParser *W = h_action(B, h_act_flatten, NULL); + HParser *X = h_action(h_sequence(h_ignore(A), NULL), h_act_flatten, NULL); + HParser *Y = h_action(h_sequence(h_epsilon_p(), NULL), h_act_flatten, NULL); + HParser *Z = h_action(h_sequence(A, B, C, NULL), h_act_flatten, NULL); + + g_check_parse_match(V, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(W, PB_PACKRAT, "b", 1, "(u0x62)"); + g_check_parse_match(X, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(Y, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(Z, PB_PACKRAT, "aabc", 4, "(u0x61 u0x61 u0x62 u0x63)"); + +#if 0 // XXX ast->bit_length and ast->index are currently not set + // let's also check that position and length info get attached correctly... + + HParseResult *p = h_parse(h_sequence(A,V,B, NULL), (uint8_t *)"aaab", 4); + + // top-level token + assert(p != NULL); + assert(p->ast != NULL); + g_check_cmp_int64(p->bit_length, ==, 32); + g_check_cmp_size(p->ast->bit_length, ==, 32); + g_check_cmp_size(p->ast->index, ==, 0); + g_check_cmp_int((int)p->ast->bit_offset, ==, 0); + + // the empty sequence + HParsedToken *tok = H_INDEX_TOKEN(p->ast, 1); + assert(tok != NULL); + assert(tok->token_type == TT_SEQUENCE); + assert(tok->seq->used == 0); + g_check_cmp_size(tok->bit_length, ==, 0); + g_check_cmp_size(tok->index, ==, 2); + g_check_cmp_int((int)tok->bit_offset, ==, 0); +#endif // 0 +} + +#if 0 // XXX ast->bit_length and ast->index are currently not set +static void test_ast_length_index() { + HParser *A = h_many(h_ch('a')); + HParser *B = h_ch('b'); + HParser *C = h_sequence(h_ch('c'), NULL); + + const uint8_t input[] = "aabc"; + size_t len = sizeof input - 1; // sans null + HParseResult *p = h_parse(h_sequence(A,B,C, NULL), input, len); + assert(p != NULL); + assert(p->ast != NULL); + + // top-level token + g_check_cmp_int64(p->bit_length, ==, (int64_t)(8 * len)); + g_check_cmp_size(p->ast->bit_length, ==, 8 * len); + g_check_cmp_size(p->ast->index, ==, 0); + + HParsedToken *tok; + + // "aa" + tok = H_INDEX_TOKEN(p->ast, 0); + g_check_cmp_size(tok->bit_length, ==, 16); + g_check_cmp_size(tok->index, ==, 0); + + // "a", "a" + tok = H_INDEX_TOKEN(p->ast, 0, 0); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 0); + tok = H_INDEX_TOKEN(p->ast, 0, 1); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 1); + + // "b" + tok = H_INDEX_TOKEN(p->ast, 1); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 2); + + // "c" + tok = H_INDEX_TOKEN(p->ast, 2); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 3); + tok = H_INDEX_TOKEN(p->ast, 2, 0); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 3); +} +#endif // 0 + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); @@ -280,4 +376,6 @@ void register_regression_tests(void) { g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq); g_test_add_func("/core/regression/charset_bits", test_charset_bits); g_test_add_func("/core/regression/bug19", test_bug_19); + g_test_add_func("/core/regression/flatten_null", test_flatten_null); + //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index); } diff --git a/src/test_suite.h b/src/test_suite.h index ed640fd8a9dc378701ed815f0c553ccd074dfe52..e59f1c47cd1c45088583ffdde73f533745ec03e1 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -321,6 +321,7 @@ #define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2) #define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2) #define g_check_cmp_uint64(n1, op, n2) g_check_inttype("%" PRIu64, uint64_t, n1, op, n2) +#define g_check_cmp_size(n1, op, n2) g_check_inttype("%zu", size_t, n1, op, n2) #define g_check_cmp_ptr(n1, op, n2) g_check_inttype("%p", void *, n1, op, n2) #define g_check_cmpfloat(n1, op, n2) g_check_inttype("%g", float, n1, op, n2) #define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)