diff --git a/SConstruct b/SConstruct index 3120df44a885b706cc57534e9a0f548bcd1a0562..149951b026e8d86f9e298da7f8d59084763d8b0a 100644 --- a/SConstruct +++ b/SConstruct @@ -119,7 +119,7 @@ if env['CC'] == 'cl': ) else: # -Wno-clobbered only really works with gcc >= 4.2.x, but ... scons - env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable') + env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable') # Linker options if env['PLATFORM'] == 'darwin': diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 5da26c6a48e338bb025433993f416cccf63715ec..bd69588a70395e007cd8cf83eaa5a3cce69e771e 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -60,6 +60,7 @@ void h_cfgrammar_free(HCFGrammar *g) // helpers static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol); static void collect_geneps(HCFGrammar *grammar); +static void eliminate_dead_rules(HCFGrammar *g); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) @@ -101,6 +102,9 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared) g->start = desugared; } + // simplifications + eliminate_dead_rules(g); + // determine which nonterminals generate epsilon collect_geneps(g); @@ -214,6 +218,76 @@ static void collect_geneps(HCFGrammar *g) } while(g->geneps->used != prevused); } +static bool mentions_symbol(HCFChoice **s, const HCFChoice *x) +{ + for(; *s; s++) { + if (*s == x) + return true; + } + return false; +} + +static void remove_productions_with(HCFGrammar *g, const HCFChoice *x) +{ + HHashTableEntry *hte; + const HCFChoice *symbol; + size_t i; + + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) + continue; + symbol = hte->key; + assert(symbol->type == HCF_CHOICE); + + HCFSequence **p, **q; + for(p = symbol->seq; *p != NULL; ) { + if (mentions_symbol((*p)->items, x)) { + // remove production p + for(q=p; *(q+1) != NULL; q++); // q = last production + *p = *q; // move q over p + *q = NULL; // delete old q + } else { + p++; + } + } + } + } +} + +static void eliminate_dead_rules(HCFGrammar *g) +{ + HHashTableEntry *hte; + const HCFChoice *symbol; + size_t i; + bool found; + + do { + found = false; + for(i=0; !found && i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; !found && hte; hte = hte->next) { + if (hte->key == NULL) + continue; + symbol = hte->key; + assert(symbol->type == HCF_CHOICE); + + // this NT is dead if it has no productions + if (*symbol->seq == NULL) + found = true; + } + } + if (found) { + h_hashtable_del(g->nts, symbol); + remove_productions_with(g, symbol); + } + } while(found); // until nothing left to remove + + // rebuild g->nts. there may now be symbols that no longer appear in any + // productions. we also might have removed g->start. + g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr); + collect_nts(g, g->start); +} + HStringMap *h_stringmap_new(HArena *a) { diff --git a/src/glue.c b/src/glue.c index 1df8173195c7090f3ca861f7263e15f84522766b..79e106c8bd902d6fe13ac485781222d202ea9dcc 100644 --- a/src/glue.c +++ b/src/glue.c @@ -121,6 +121,20 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val) return ret; } +HParsedToken *h_make_double(HArena *arena, double val) +{ + HParsedToken *ret = h_make_(arena, TT_DOUBLE); + ret->dbl = val; + return ret; +} + +HParsedToken *h_make_float(HArena *arena, float val) +{ + HParsedToken *ret = h_make_(arena, TT_FLOAT); + ret->flt = val; + return ret; +} + // XXX -> internal HParsedToken *h_carray_index(const HCountedArray *a, size_t i) { diff --git a/src/glue.h b/src/glue.h index 1de285823b3f2ae6fd956c04c9bc81d92445719a..08e5255ab2572d5d828943cd98331081ab02fc02 100644 --- a/src/glue.h +++ b/src/glue.h @@ -198,6 +198,8 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len); HParsedToken *h_make_sint(HArena *arena, int64_t val); HParsedToken *h_make_uint(HArena *arena, uint64_t val); +HParsedToken *h_make_double(HArena *arena, double val); +HParsedToken *h_make_float(HArena *arena, float val); // Standard short-hands to make tokens in an action. #define H_MAKE(TYP, VAL) h_make(p->arena, (HTokenType)TT_ ## TYP, VAL) @@ -206,6 +208,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN) #define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) #define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) +#define H_MAKE_DOUBLE(VAL) h_make_double(p->arena, VAL) +#define H_MAKE_FLOAT(VAL) h_make_float(p->arena, VAL) // Extract (cast) type-specific value back from HParsedTokens... @@ -218,6 +222,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) #define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) #define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) +#define H_ASSERT_DOUBLE(TOK) h_assert_type(TT_DOUBLE, TOK) +#define H_ASSERT_FLOAT(TOK) h_assert_type(TT_FLOAT, TOK) // Assert expected type and return contained value. #define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) @@ -225,6 +231,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val); #define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) #define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) #define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) +#define H_CAST_DOUBLE(TOK) (H_ASSERT_DOUBLE(TOK)->dbl) +#define H_CAST_FLOAT(TOK) (H_ASSERT_FLOAT(TOK)->flt) // Sequence access... @@ -247,6 +255,8 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); #define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_FLOAT(SEQ, ...) H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) #define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1) // Standard short-hand to access and cast elements on a sequence token. @@ -255,6 +265,8 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); #define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) #define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) #define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) +#define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__) +#define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__) #define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__) // Lower-level helper for h_seq_index. diff --git a/src/hammer.h b/src/hammer.h index 6cd2660d3cfd29a9b4d34e1e054d2613ca4260a2..787af0b254a969226eeda985fc75d470796bd7cc 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -56,6 +56,8 @@ typedef enum HTokenType_ { TT_BYTES = 2, TT_SINT = 4, TT_UINT = 8, + TT_DOUBLE = 12, + TT_FLOAT = 13, TT_SEQUENCE = 16, TT_RESERVED_1, // reserved for backend-specific internal use TT_ERR = 32, diff --git a/src/pprint.c b/src/pprint.c index 145bf5237ae98e7db240aa1540bf8b242801edd1..5f6e1e2c5a6d98869be764218cc2e4f191c0e669 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -63,6 +63,12 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { case TT_UINT: fprintf(stream, "%" PRIu64, tok->uint); break; + case TT_DOUBLE: + fprintf(stream, "%f", tok->dbl); + break; + case TT_FLOAT: + fprintf(stream, "%f", (double)tok->flt); + break; case TT_SEQUENCE: if (tok->seq->used == 0) fprintf(stream, "[ ]"); @@ -183,6 +189,12 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { case TT_UINT: h_append_buf_formatted(buf, "u%#" PRIx64, tok->uint); break; + case TT_DOUBLE: + h_append_buf_formatted(buf, "d%a", tok->dbl); + break; + case TT_FLOAT: + h_append_buf_formatted(buf, "f%a", (double)tok->flt); + break; case TT_ERR: h_append_buf(buf, "ERR", 3); break; diff --git a/src/registry.c b/src/registry.c index 5486fd7bdb8022c65a296205b0dfd562a20a0572..15cf41a6c6eeb1ec8b9849b06a1e1ead2a5d204a 100644 --- a/src/registry.c +++ b/src/registry.c @@ -15,7 +15,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#include <search.h> #include <stdlib.h> #include "hammer.h" #include "internal.h" diff --git a/src/t_parser.c b/src/t_parser.c index 2d933ef1d3a025fd15fa8a1e247dc8ced3ba63ea..356c38f1674d6d3f90e3b0da672646455437f7a2 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -129,6 +129,29 @@ static void test_uint8(gconstpointer backend) { } //@MARK_END +// XXX implement h_double() and h_float(). these just test the pretty-printer... +static HParsedToken *act_double(const HParseResult *p, void *u) { + return H_MAKE_DOUBLE((double)H_FIELD_UINT(0) + (double)H_FIELD_UINT(1)/10); +} +static void test_double(gconstpointer backend) { + HParser *b = h_uint8(); + HParser *dbl = h_action(h_sequence(b, b, NULL), act_double, NULL); + uint8_t input[] = {4,2}; + + g_check_parse_match(dbl, (HParserBackend)GPOINTER_TO_INT(backend), input, 2, "d0x1.0cccccccccccdp+2"); +} + +static HParsedToken *act_float(const HParseResult *p, void *u) { + return H_MAKE_FLOAT((float)H_FIELD_UINT(0) + (float)H_FIELD_UINT(1)/10); +} +static void test_float(gconstpointer backend) { + HParser *b = h_uint8(); + HParser *flt = h_action(h_sequence(b, b, NULL), act_float, NULL); + uint8_t input[] = {4,2}; + + g_check_parse_match(flt, (HParserBackend)GPOINTER_TO_INT(backend), input, 2, "f0x1.0cccccp+2"); +} + static void test_int_range(gconstpointer backend) { const HParser *int_range_ = h_int_range(h_uint8(), 3, 10); @@ -873,10 +896,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/uint16", GINT_TO_POINTER(PB_PACKRAT), test_uint16); g_test_add_data_func("/core/parser/packrat/uint8", GINT_TO_POINTER(PB_PACKRAT), test_uint8); g_test_add_data_func("/core/parser/packrat/int_range", GINT_TO_POINTER(PB_PACKRAT), test_int_range); -#if 0 - g_test_add_data_func("/core/parser/packrat/float64", GINT_TO_POINTER(PB_PACKRAT), test_float64); - g_test_add_data_func("/core/parser/packrat/float32", GINT_TO_POINTER(PB_PACKRAT), test_float32); -#endif + g_test_add_data_func("/core/parser/packrat/double", GINT_TO_POINTER(PB_PACKRAT), test_double); + g_test_add_data_func("/core/parser/packrat/float", GINT_TO_POINTER(PB_PACKRAT), test_float); g_test_add_data_func("/core/parser/packrat/whitespace", GINT_TO_POINTER(PB_PACKRAT), test_whitespace); g_test_add_data_func("/core/parser/packrat/left", GINT_TO_POINTER(PB_PACKRAT), test_left); g_test_add_data_func("/core/parser/packrat/right", GINT_TO_POINTER(PB_PACKRAT), test_right); @@ -931,10 +952,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/uint16", GINT_TO_POINTER(PB_LLk), test_uint16); g_test_add_data_func("/core/parser/llk/uint8", GINT_TO_POINTER(PB_LLk), test_uint8); g_test_add_data_func("/core/parser/llk/int_range", GINT_TO_POINTER(PB_LLk), test_int_range); -#if 0 - g_test_add_data_func("/core/parser/llk/float64", GINT_TO_POINTER(PB_LLk), test_float64); - g_test_add_data_func("/core/parser/llk/float32", GINT_TO_POINTER(PB_LLk), test_float32); -#endif + g_test_add_data_func("/core/parser/llk/double", GINT_TO_POINTER(PB_LLk), test_double); + g_test_add_data_func("/core/parser/llk/float", GINT_TO_POINTER(PB_LLk), test_float); g_test_add_data_func("/core/parser/llk/whitespace", GINT_TO_POINTER(PB_LLk), test_whitespace); g_test_add_data_func("/core/parser/llk/left", GINT_TO_POINTER(PB_LLk), test_left); g_test_add_data_func("/core/parser/llk/right", GINT_TO_POINTER(PB_LLk), test_right); @@ -977,11 +996,9 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/uint32", GINT_TO_POINTER(PB_REGULAR), test_uint32); g_test_add_data_func("/core/parser/regex/uint16", GINT_TO_POINTER(PB_REGULAR), test_uint16); g_test_add_data_func("/core/parser/regex/uint8", GINT_TO_POINTER(PB_REGULAR), test_uint8); -#if 0 - g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); - g_test_add_data_func("/core/parser/regex/float64", GINT_TO_POINTER(PB_REGULAR), test_float64); - g_test_add_data_func("/core/parser/regex/float32", GINT_TO_POINTER(PB_REGULAR), test_float32); -#endif + //g_test_add_data_func("/core/parser/regex/int_range", GINT_TO_POINTER(PB_REGULAR), test_int_range); + g_test_add_data_func("/core/parser/regex/double", GINT_TO_POINTER(PB_REGULAR), test_double); + g_test_add_data_func("/core/parser/regex/float", GINT_TO_POINTER(PB_REGULAR), test_float); g_test_add_data_func("/core/parser/regex/whitespace", GINT_TO_POINTER(PB_REGULAR), test_whitespace); g_test_add_data_func("/core/parser/regex/left", GINT_TO_POINTER(PB_REGULAR), test_left); g_test_add_data_func("/core/parser/regex/right", GINT_TO_POINTER(PB_REGULAR), test_right); @@ -1020,10 +1037,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/lalr/uint16", GINT_TO_POINTER(PB_LALR), test_uint16); g_test_add_data_func("/core/parser/lalr/uint8", GINT_TO_POINTER(PB_LALR), test_uint8); g_test_add_data_func("/core/parser/lalr/int_range", GINT_TO_POINTER(PB_LALR), test_int_range); -#if 0 - g_test_add_data_func("/core/parser/lalr/float64", GINT_TO_POINTER(PB_LALR), test_float64); - g_test_add_data_func("/core/parser/lalr/float32", GINT_TO_POINTER(PB_LALR), test_float32); -#endif + g_test_add_data_func("/core/parser/lalr/double", GINT_TO_POINTER(PB_LALR), test_double); + g_test_add_data_func("/core/parser/lalr/float", GINT_TO_POINTER(PB_LALR), test_float); g_test_add_data_func("/core/parser/lalr/whitespace", GINT_TO_POINTER(PB_LALR), test_whitespace); g_test_add_data_func("/core/parser/lalr/left", GINT_TO_POINTER(PB_LALR), test_left); g_test_add_data_func("/core/parser/lalr/right", GINT_TO_POINTER(PB_LALR), test_right); @@ -1068,10 +1083,8 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/glr/uint16", GINT_TO_POINTER(PB_GLR), test_uint16); g_test_add_data_func("/core/parser/glr/uint8", GINT_TO_POINTER(PB_GLR), test_uint8); g_test_add_data_func("/core/parser/glr/int_range", GINT_TO_POINTER(PB_GLR), test_int_range); -#if 0 - g_test_add_data_func("/core/parser/glr/float64", GINT_TO_POINTER(PB_GLR), test_float64); - g_test_add_data_func("/core/parser/glr/float32", GINT_TO_POINTER(PB_GLR), test_float32); -#endif + g_test_add_data_func("/core/parser/glr/double", GINT_TO_POINTER(PB_GLR), test_double); + g_test_add_data_func("/core/parser/glr/float", GINT_TO_POINTER(PB_GLR), test_float); g_test_add_data_func("/core/parser/glr/whitespace", GINT_TO_POINTER(PB_GLR), test_whitespace); g_test_add_data_func("/core/parser/glr/left", GINT_TO_POINTER(PB_GLR), test_left); g_test_add_data_func("/core/parser/glr/right", GINT_TO_POINTER(PB_GLR), test_right); diff --git a/src/t_regression.c b/src/t_regression.c index 4b1f2bfc284133d03d6973e2660346137ea79a74..4ff3f96263e28ad4002cd2bfc00c58be7effc1dd 100644 --- a/src/t_regression.c +++ b/src/t_regression.c @@ -438,6 +438,30 @@ static void test_issue92() { g_check_cmp_int(r, ==, 0); } +static void test_issue83() { + HParser *p = h_sequence(h_sequence(NULL, NULL), h_nothing_p(), NULL); + /* + * A -> B + * B -> C D + * C -> "" + * D -x + * + * (S) -> 0B1 + * 0B1 -> 0C2 2D3 + * 0C2 -> "" (*) h_follow() + * 2D3 -x + */ + + /* + * similar to issue 91, this would cause the same assertion failure, but for + * a different reason. the follow set of 0C2 above is equal to the first set + * of 2D3, but 2D3 is an empty choice. The first set of an empty choice + * is legitimately empty. the asserting in h_lalr_compile() missed this case. + */ + int r = h_compile(p, PB_LALR, NULL); + g_check_cmp_int(r, ==, 0); +} + void register_regression_tests(void) { g_test_add_func("/core/regression/bug118", test_bug118); g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); @@ -452,4 +476,5 @@ void register_regression_tests(void) { //XXX g_test_add_func("/core/regression/ast_length_index", test_ast_length_index); g_test_add_func("/core/regression/issue91", test_issue91); g_test_add_func("/core/regression/issue92", test_issue92); + g_test_add_func("/core/regression/issue83", test_issue83); } diff --git a/src/tsearch.h b/src/tsearch.h index 7b297db7c7ea425f350f0d2c3350d55a630fb97b..7ba71d97502ca34908284560d0c156d2d29d9c37 100644 --- a/src/tsearch.h +++ b/src/tsearch.h @@ -20,6 +20,7 @@ void *tfind(const void *vkey, void * const *vrootp, int (*compar)(const void *, const void *)); #else +#define _POSIX_C_SOURCE 200809L #include <search.h> #endif