From 6ab2adf1cb82c1875350dc7ef8493843941063e1 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Sun, 9 Feb 2020 17:56:09 +0100 Subject: [PATCH] avoid a segfault when h_act_flatten() is called on a null token also includes commented-out tests for functionality that i discovered we never provided, namely position and length information at the granularity of HParsedTokens. at the moment, these fields always seem to be 0. we should probably fix that or get rid of them. in the latter case it might make sense to move this information to HParseResult, so the user can get access to it via h_action() as needed. --- src/glue.c | 12 +----- src/t_regression.c | 100 ++++++++++++++++++++++++++++++++++++++++++++- src/test_suite.h | 1 + 3 files changed, 102 insertions(+), 11 deletions(-) diff --git a/src/glue.c b/src/glue.c index da2f3af3..1df81731 100644 --- a/src/glue.c +++ b/src/glue.c @@ -60,16 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) { } HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) { - HCountedArray *seq = h_carray_new(p->arena); - - act_flatten_(seq, p->ast); - - HParsedToken *res = a_new_(p->arena, HParsedToken, 1); - res->token_type = TT_SEQUENCE; - res->seq = seq; - res->index = p->ast->index; - res->bit_offset = p->ast->bit_offset; - res->bit_length = p->bit_length; + HParsedToken *res = h_make_seq(p->arena); + act_flatten_(res->seq, p->ast); return res; } diff --git a/src/t_regression.c b/src/t_regression.c index 4e8ad0cd..771582cf 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -187,7 +187,7 @@ static void test_charset_bits(void) { .free = NULL, }; test_charset_bits__buf[32] = 0xAB; - HCharset cs = new_charset(&alloc); + new_charset(&alloc); for(size_t i=0; i<32; i++) g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0); g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB); @@ -270,6 +270,102 @@ static void test_bug_19() { g_assert_true(1); } +static void test_flatten_null() { + // h_act_flatten() produces a flat sequence from a nested sequence. it also + // hapens to produce a one-element sequence when given a non-sequence token. + // but given a null token (as from h_epsilon_p() or h_ignore()), it would + // previously segfault. + // + // let's make sure the behavior is consistent and a singular null token + // produces the same thing as a sequence around h_epsilon_p() or h_ignore(). + + HParser *A = h_many(h_ch('a')); + HParser *B = h_ch('b'); + HParser *C = h_sequence(h_ch('c'), NULL); + + HParser *V = h_action(h_epsilon_p(), h_act_flatten, NULL); + HParser *W = h_action(B, h_act_flatten, NULL); + HParser *X = h_action(h_sequence(h_ignore(A), NULL), h_act_flatten, NULL); + HParser *Y = h_action(h_sequence(h_epsilon_p(), NULL), h_act_flatten, NULL); + HParser *Z = h_action(h_sequence(A, B, C, NULL), h_act_flatten, NULL); + + g_check_parse_match(V, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(W, PB_PACKRAT, "b", 1, "(u0x62)"); + g_check_parse_match(X, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(Y, PB_PACKRAT, "", 0, "()"); + g_check_parse_match(Z, PB_PACKRAT, "aabc", 4, "(u0x61 u0x61 u0x62 u0x63)"); + +#if 0 // XXX ast->bit_length and ast->index are currently not set + // let's also check that position and length info get attached correctly... + + HParseResult *p = h_parse(h_sequence(A,V,B, NULL), (uint8_t *)"aaab", 4); + + // top-level token + assert(p != NULL); + assert(p->ast != NULL); + g_check_cmp_int64(p->bit_length, ==, 32); + g_check_cmp_size(p->ast->bit_length, ==, 32); + g_check_cmp_size(p->ast->index, ==, 0); + g_check_cmp_int((int)p->ast->bit_offset, ==, 0); + + // the empty sequence + HParsedToken *tok = H_INDEX_TOKEN(p->ast, 1); + assert(tok != NULL); + assert(tok->token_type == TT_SEQUENCE); + assert(tok->seq->used == 0); + g_check_cmp_size(tok->bit_length, ==, 0); + g_check_cmp_size(tok->index, ==, 2); + g_check_cmp_int((int)tok->bit_offset, ==, 0); +#endif // 0 +} + +#if 0 // XXX ast->bit_length and ast->index are currently not set +static void test_ast_length_index() { + HParser *A = h_many(h_ch('a')); + HParser *B = h_ch('b'); + HParser *C = h_sequence(h_ch('c'), NULL); + + const uint8_t input[] = "aabc"; + size_t len = sizeof input - 1; // sans null + HParseResult *p = h_parse(h_sequence(A,B,C, NULL), input, len); + assert(p != NULL); + assert(p->ast != NULL); + + // top-level token + g_check_cmp_int64(p->bit_length, ==, (int64_t)(8 * len)); + g_check_cmp_size(p->ast->bit_length, ==, 8 * len); + g_check_cmp_size(p->ast->index, ==, 0); + + HParsedToken *tok; + + // "aa" + tok = H_INDEX_TOKEN(p->ast, 0); + g_check_cmp_size(tok->bit_length, ==, 16); + g_check_cmp_size(tok->index, ==, 0); + + // "a", "a" + tok = H_INDEX_TOKEN(p->ast, 0, 0); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 0); + tok = H_INDEX_TOKEN(p->ast, 0, 1); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 1); + + // "b" + tok = H_INDEX_TOKEN(p->ast, 1); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 2); + + // "c" + tok = H_INDEX_TOKEN(p->ast, 2); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 3); + tok = H_INDEX_TOKEN(p->ast, 2, 0); + g_check_cmp_size(tok->bit_length, ==, 8); + g_check_cmp_size(tok->index, ==, 3); +} +#endif // 0 + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); @@ -280,4 +376,6 @@ void register_regression_tests(void) { g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq); g_test_add_func("/core/regression/charset_bits", test_charset_bits); g_test_add_func("/core/regression/bug19", test_bug_19); + g_test_add_func("/core/regression/flatten_null", test_flatten_null); + //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index); } diff --git a/src/test_suite.h b/src/test_suite.h index ed640fd8..e59f1c47 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -321,6 +321,7 @@ #define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2) #define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2) #define g_check_cmp_uint64(n1, op, n2) g_check_inttype("%" PRIu64, uint64_t, n1, op, n2) +#define g_check_cmp_size(n1, op, n2) g_check_inttype("%zu", size_t, n1, op, n2) #define g_check_cmp_ptr(n1, op, n2) g_check_inttype("%p", void *, n1, op, n2) #define g_check_cmpfloat(n1, op, n2) g_check_inttype("%g", float, n1, op, n2) #define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2) -- GitLab