diff --git a/HACKING b/HACKING index 7bffb4c919b99ada37d3df8b9896595fbfc1e05f..d923217e8007694f45de5f99802321ac6f9b37c0 100644 --- a/HACKING +++ b/HACKING @@ -2,14 +2,16 @@ Privileged arguments ==================== As a matter of convenience, there are several identifiers that -internal macros use. Chances are that if you use these names for other -things, you're gonna have a bad time. +internal anaphoric macros use. Chances are that if you use these names +for other things, you're gonna have a bad time. In particular, these names, and the macros that use them, are: - state: Used by a_new and company. Should be an HParseState* - mm__: Used by h_new and h_free. Should be an HAllocator* +- stk__: + Used in desugaring. Should be an HCFStack* Function suffixes ================= diff --git a/Makefile b/Makefile index dfca1177fa95ac6896934ed6a83dd84ca4176063..6c8f38633fde1688eec11b88611940852922c3b3 100644 --- a/Makefile +++ b/Makefile @@ -24,17 +24,19 @@ examples/all: src/all examples/compile: src/compile define SUBDIR_TEMPLATE -$(1)/%: - $$(MAKE) -C $(1) $$* +$(1)/%: force + $(MAKE) -C $(1) $$* endef +force: + $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir)))) #.DEFAULT: # $(if $(findstring ./,$(dir $@)),$(error No rule to make target `$@'),$(MAKE) -C $(dir $@) $(notdir $@)) -TAGS: $(shell find * -name "*.c") - etags $^ +TAGS: force + etags $(shell find * -name "*.c" -o -name "*.h") config: @printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) ) diff --git a/src/backends/contextfree.h b/src/backends/contextfree.h new file mode 100644 index 0000000000000000000000000000000000000000..9c2ec4598cb703ba773c4d515b90a2165a73c336 --- /dev/null +++ b/src/backends/contextfree.h @@ -0,0 +1,147 @@ +// This is an internal header; it provides macros to make desugaring cleaner. +#include <assert.h> +#include "../internal.h" +#ifndef HAMMER_CONTEXTFREE__H +#define HAMMER_CONTEXTFREE__H + + +// HCFStack +struct HCFStack_ { + HCFChoice **stack; + int count; + int cap; + HCFChoice *last_completed; // Last completed choice. +}; + +#ifndef UNUSED +#define UNUSED __attribute__((unused)) +#endif + +static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) UNUSED; +static HCFStack* h_cfstack_new(HAllocator *mm__) UNUSED; +static HCFStack* h_cfstack_new(HAllocator *mm__) { + HCFStack *stack = h_new(HCFStack, 1); + stack->count = 0; + stack->cap = 4; + stack->stack = h_new(HCFChoice*, stack->cap); + return stack; +} + +static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) UNUSED; +static void h_cfstack_free(HAllocator *mm__, HCFStack *stk__) { + h_free(stk__->stack); + h_free(stk__); +} + +static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFChoice *item) UNUSED; +static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFChoice *item) { + HCFChoice *cur_top = stk__->stack[stk__->count-1]; + assert(cur_top->type == HCF_CHOICE); + assert(cur_top->seq[0] != NULL); // There must be at least one sequence... + stk__->last_completed = item; + for (int i = 0;; i++) { + if (cur_top->seq[i+1] == NULL) { + assert(cur_top->seq[i]->items != NULL); + for (int j = 0;; j++) { + if (cur_top->seq[i]->items[j] == NULL) { + cur_top->seq[i]->items = mm__->realloc(mm__, cur_top->seq[i]->items, sizeof(HCFChoice*) * (j+2)); + cur_top->seq[i]->items[j] = item; + cur_top->seq[i]->items[j+1] = NULL; + return; + } + } + } + } +} + +static inline HCFChoice* h_cfstack_new_choice_raw(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *ret = h_new(HCFChoice, 1); + ret->reshape = NULL; + ret->action = NULL; + ret->pred = NULL; + ret->type = ~0; // invalid type + // Add it to the current sequence... + if (stk__->count > 0) { + h_cfstack_add_to_seq(mm__, stk__, ret); + } + + return ret; +} + +static inline void h_cfstack_add_charset(HAllocator *mm__, HCFStack *stk__, HCharset charset) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_CHARSET; + ni->charset = charset; + stk__->last_completed = ni; +} + + +static inline void h_cfstack_add_char(HAllocator *mm__, HCFStack *stk__, uint8_t chr) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_CHAR; + ni->chr = chr; + stk__->last_completed = ni; +} + +static inline void h_cfstack_add_end(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *ni = h_cfstack_new_choice_raw(mm__, stk__); + ni->type = HCF_END; + stk__->last_completed = ni; +} + +static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *choice = h_cfstack_new_choice_raw(mm__, stk__); + choice->type = HCF_CHOICE; + choice->seq = h_new(HCFSequence*, 1); + choice->seq[0] = NULL; + + if (stk__->count + 1 > stk__->cap) { + assert(stk__->cap > 0); + stk__->cap *= 2; + stk__->stack = mm__->realloc(mm__, stk__->stack, stk__->cap * sizeof(HCFChoice*)); + } + assert(stk__->cap >= 1); + stk__->stack[stk__->count++] = choice; +} + +static inline void h_cfstack_begin_seq(HAllocator *mm__, HCFStack *stk__) { + HCFChoice *top = stk__->stack[stk__->count-1]; + for (int i = 0;; i++) { + if (top->seq[i] == NULL) { + top->seq = mm__->realloc(mm__, top->seq, sizeof(HCFSequence*) * (i+2)); + HCFSequence *seq = top->seq[i] = h_new(HCFSequence, 1); + top->seq[i+1] = NULL; + seq->items = h_new(HCFChoice*, 1); + seq->items[0] = NULL; + return; + } + } +} + +static inline void h_cfstack_end_seq(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_end_seq(HAllocator *mm__, HCFStack *stk__) { + // do nothing. You should call this anyway. +} + +static inline void h_cfstack_end_choice(HAllocator *mm__, HCFStack *stk__) UNUSED; +static inline void h_cfstack_end_choice(HAllocator *mm__, HCFStack *stk__) { + assert(stk__->count > 0); + stk__->last_completed = stk__->stack[stk__->count-1]; + stk__->count--; +} + +#define HCFS_APPEND(choice) h_cfstack_add_to_seq(mm__, stk__, (choice)) +#define HCFS_DESUGAR(parser) h_desugar(mm__, stk__, parser) +#define HCFS_ADD_CHARSET(charset) h_cfstack_add_charset(mm__, stk__, (charset)) +#define HCFS_ADD_CHAR(chr) h_cfstack_add_char(mm__, stk__, (chr)) +#define HCFS_ADD_END() h_cfstack_add_end(mm__, stk__) +// The semicolons on BEGIN macros are intentional; pretend that they +// are control structures. +#define HCFS_BEGIN_CHOICE() h_cfstack_begin_choice(mm__, stk__); +#define HCFS_BEGIN_SEQ() h_cfstack_begin_seq(mm__, stk__); +#define HCFS_END_CHOICE() h_cfstack_end_choice(mm__, stk__) +#define HCFS_END_SEQ() h_cfstack_end_seq(mm__, stk__) +#define HCFS_THIS_CHOICE (stk__->stack[stk__->count-1]) + +#endif diff --git a/src/backends/llk.c b/src/backends/llk.c index 59ec790a367e1ea72ed1a427de76a2827329c3db..aeafd6a0cd5ba7a44734c69d5bf87a09cb17d617 100644 --- a/src/backends/llk.c +++ b/src/backends/llk.c @@ -108,7 +108,7 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap * if(src->epsilon_branch) { if(dst->epsilon_branch) dst->epsilon_branch = - combine_entries(workset, dst->epsilon_branch, src->epsilon_branch); + combine_entries(workset, dst->epsilon_branch, src->epsilon_branch); else dst->epsilon_branch = src->epsilon_branch; } else { @@ -122,7 +122,7 @@ static void stringmap_merge(HHashSet *workset, HCFStringMap *dst, HCFStringMap * if(src->end_branch) { if(dst->end_branch) dst->end_branch = - combine_entries(workset, dst->end_branch, src->end_branch); + combine_entries(workset, dst->end_branch, src->end_branch); else dst->end_branch = src->end_branch; } @@ -412,10 +412,10 @@ HParseResult *h_llk_parse(HAllocator* mm__, const HParser* parser, HInputStream* h_delete_arena(tarena); return make_result(arena, seq->elements[0]); - no_parse: - h_delete_arena(tarena); - h_delete_arena(arena); - return NULL; + no_parse: + h_delete_arena(tarena); + h_delete_arena(arena); + return NULL; } @@ -459,8 +459,8 @@ int test_llk(void) h_pprint_symbolset(stdout, g, g->geneps, 0); printf("first(A) = "); h_pprint_stringset(stdout, h_first(3, g, g->start), 0); - //printf("follow(C) = "); - //h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, c)), 0); + // printf("follow(C) = "); + // h_pprint_stringset(stdout, h_follow(3, g, h_desugar(&system_allocator, NULL, c)), 0); if(h_compile(p, PB_LLk, (void *)3)) { fprintf(stderr, "does not compile\n"); diff --git a/src/backends/regex.c b/src/backends/regex.c index 4389bc9172d99a175621fc6488326d3dfc38d07e..6f069bec8726d87b7c2ca435b535af437c5513bf 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -354,6 +354,9 @@ static int h_regex_compile(HAllocator *mm__, HParser* parser, const void* params if (!parser->vtable->isValidRegular(parser->env)) return 1; HRVMProg *prog = h_new(HRVMProg, 1); + prog->length = prog->action_count = 0; + prog->insns = NULL; + prog->actions = NULL; prog->allocator = mm__; if (!h_compile_regex(prog, parser)) { h_free(prog->insns); diff --git a/src/cfgrammar.c b/src/cfgrammar.c index 0ce3cbf2ff8046d93b1a1f70ffa4379521e1489e..a69123073cb7f1cfed12355aada8ee7ad8c00d1b 100644 --- a/src/cfgrammar.c +++ b/src/cfgrammar.c @@ -46,7 +46,7 @@ static void collect_geneps(HCFGrammar *grammar); HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) { // convert parser to CFG form ("desugar"). - HCFChoice *desugared = h_desugar(mm__, parser); + HCFChoice *desugared = h_desugar(mm__, NULL, parser); if(desugared == NULL) return NULL; // -> backend not suitable for this parser @@ -65,6 +65,8 @@ HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser) nt->seq[0]->items[0] = desugared; nt->seq[0]->items[1] = NULL; nt->seq[1] = NULL; + nt->pred = NULL; + nt->action = NULL; nt->reshape = h_act_first; h_hashset_put(g->nts, nt); g->start = nt; diff --git a/src/desugar.c b/src/desugar.c index 6117c2212af77be8cc2db4abbc8ccffa83101753..ce87ca326b1d8ad533c8b9eecbaf81e4513eb488 100644 --- a/src/desugar.c +++ b/src/desugar.c @@ -1,10 +1,20 @@ #include "hammer.h" #include "internal.h" +#include "backends/contextfree.h" -HCFChoice *h_desugar(HAllocator *mm__, const HParser *parser) { +HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser) { + HCFStack *nstk__ = stk__; if(parser->desugared == NULL) { + if (nstk__ == NULL) { + nstk__ = h_cfstack_new(mm__); + } // we're going to do something naughty and cast away the const to memoize - ((HParser *)parser)->desugared = parser->vtable->desugar(mm__, parser->env); + parser->vtable->desugar(mm__, nstk__, parser->env); + ((HParser *)parser)->desugared = nstk__->last_completed; + if (stk__ == NULL) + h_cfstack_free(mm__, nstk__); + } else if (stk__ != NULL) { + HCFS_APPEND(parser->desugared); } return parser->desugared; diff --git a/src/glue.c b/src/glue.c index 48bd222e59aa51dd19174b7153627e4d4ccc31f1..5e3804d0e0ec266cf3185867c975dada86f785dc 100644 --- a/src/glue.c +++ b/src/glue.c @@ -207,7 +207,7 @@ const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) switch(p->token_type) { case TT_SEQUENCE: // Flatten and append all. - for(size_t i; i<p->seq->used; i++) { + for(size_t i = 0; i<p->seq->used; i++) { h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); } break; diff --git a/src/internal.h b/src/internal.h index 7655afa0a8fc62416f5fb771b55bac0f86d7be07..926bf02a6e54da52cf193443a12d0e3c7547ef35 100644 --- a/src/internal.h +++ b/src/internal.h @@ -49,6 +49,7 @@ static inline void h_generic_free(HAllocator *allocator, void* ptr) { } extern HAllocator system_allocator; +typedef struct HCFStack_ HCFStack; typedef struct HInputStream_ { @@ -236,7 +237,7 @@ HParser *h_new_parser(HAllocator *mm__, const HParserVtable *vt, void *env) { return p; } -HCFChoice *h_desugar(HAllocator *mm__, const HParser *parser); +HCFChoice *h_desugar(HAllocator *mm__, HCFStack *stk__, const HParser *parser); HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); @@ -276,8 +277,9 @@ HHashValue h_hash_ptr(const void *p); typedef struct HCFSequence_ HCFSequence; -typedef struct HCFChoice_ { - enum { + +struct HCFChoice_ { + enum HCFChoiceType { HCF_END, HCF_CHOICE, HCF_CHARSET, @@ -292,7 +294,7 @@ typedef struct HCFChoice_ { // to execute before action and pred are applied. HAction action; HPredicate pred; -} HCFChoice; +}; struct HCFSequence_ { HCFChoice **items; // last one is NULL @@ -303,7 +305,7 @@ struct HParserVtable_ { bool (*isValidRegular)(void *env); bool (*isValidCF)(void *env); bool (*compile_to_rvm)(HRVMProg *prog, void* env); // FIXME: forgot what the bool return value was supposed to mean. - HCFChoice* (*desugar)(HAllocator *mm__, void *env); + void (*desugar)(HAllocator *mm__, HCFStack *stk__, void *env); }; bool h_false(void*); diff --git a/src/parsers/action.c b/src/parsers/action.c index b00426a73646bf9ecb637777c62adafd49e231de..52c9bc1e9fdb12c0274f350d546dcde4a73036f6 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -20,20 +20,16 @@ static HParseResult* parse_action(void *env, HParseState *state) { return NULL; } -static HCFChoice* desugar_action(HAllocator *mm__, void *env) { +static void desugar_action(HAllocator *mm__, HCFStack *stk__, void *env) { HParseAction *a = (HParseAction*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 2); - seq->items[0] = h_desugar(mm__, a->p); - seq->items[1] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = a->action; - ret->reshape = h_act_first; - return ret; + + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(a->p); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->action = a->action; + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool action_isValidRegular(void *env) { diff --git a/src/parsers/and.c b/src/parsers/and.c index 49d43870be4a130655d303c66d10eb7487bb4d40..dfd91871574be6b3d5adbbb33508a0997e4e3f8f 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -9,11 +9,6 @@ static HParseResult *parse_and(void* env, HParseState* state) { return NULL; } -static HCFChoice* desugar_and(HAllocator *mm__, void *env) { - assert_message(0, "Not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable and_vt = { .parse = parse_and, .isValidRegular = h_false, /* TODO: strictly speaking this should be regular, @@ -21,7 +16,6 @@ static const HParserVtable and_vt = { to get right, so we're leaving it for a future revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ - .desugar = desugar_and, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index a05caa1f2471aab259e895df961c8402e78b4066..fc980b24f8eaff2fbf29da79cc630ac0a6cdd988 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -33,20 +33,16 @@ static bool ab_isValidCF(void *env) { return ab->p->vtable->isValidCF(ab->p->env); } -static HCFChoice* desugar_ab(HAllocator *mm__, void *env) { +static void desugar_ab(HAllocator *mm__, HCFStack *stk__, void *env) { + HAttrBool *a = (HAttrBool*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 2); - seq->items[0] = h_desugar(mm__, a->p); - seq->items[1] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->pred = a->pred; - ret->reshape = h_act_first; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(a->p); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->pred = a->pred; + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) { diff --git a/src/parsers/bits.c b/src/parsers/bits.c index e153e3a86f2aa3716583fe37bf5cf44124ec5d9e..93b4aef60e9bac7f5fa112d3087192197d3dd0ca 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -52,41 +52,25 @@ static HParsedToken *reshape_bits_signed(const HParseResult *p) { return reshape_bits(p, true); } -static HCFChoice* desugar_bits(HAllocator *mm__, void *env) { +static void desugar_bits(HAllocator *mm__, HCFStack *stk__, void *env) { struct bits_env *bits = (struct bits_env*)env; - if (0 != bits->length % 8) - return NULL; // can't handle non-byte-aligned for now + assert (0 == bits->length % 8); HCharset match_all = new_charset(mm__); for (int i = 0; i < 256; i++) charset_set(match_all, i, 1); - HCFChoice *match_all_choice = h_new(HCFChoice, 1); - match_all_choice->type = HCF_CHARSET; - match_all_choice->charset = match_all; - match_all_choice->action = NULL; - - size_t n = bits->length/8; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, n+1); - for (size_t i=0; i<n; ++i) { - seq->items[i] = match_all_choice; - } - seq->items[n] = NULL; - - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - - if(bits->signedp) - ret->reshape = reshape_bits_signed; - else - ret->reshape = reshape_bits_unsigned; - - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + size_t n = bits->length/8; + for (size_t i=0; i<n; ++i) { + HCFS_ADD_CHARSET(match_all); + } + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = bits->signedp + ? reshape_bits_signed + : reshape_bits_unsigned; + } HCFS_END_CHOICE(); } static bool h_svm_action_bits(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 1400e3652d98bd72e932725ebd83c57b63b98e0b..f114a1fa5dbff8cdbee6bdf22670c271c2044e2e 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -35,16 +35,10 @@ static HParseResult* parse_butnot(void *env, HParseState *state) { } } -static HCFChoice* desugar_butnot(HAllocator *mm__, void *env) { - assert_message(0, "'h_butnot' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable butnot_vt = { .parse = parse_butnot, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_butnot, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/ch.c b/src/parsers/ch.c index 0de61e49cfa158328196217b745524b06b880228..9ee3f2930ec40e21464597dbf2f982aba159cdcf 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -13,12 +13,8 @@ static HParseResult* parse_ch(void* env, HParseState *state) { } } -static HCFChoice* desugar_ch(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHAR; - ret->chr = (uint8_t)(unsigned long)(env); - ret->action = NULL; - return ret; +static void desugar_ch(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_CHAR( (uint8_t)(unsigned long)(env) ); } static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/charset.c b/src/parsers/charset.c index db4c2e777216ce798179bd930d28ea4ad4685cd1..e1a910f8df149a16cb74fd7c661c5490e6d80198 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -15,12 +15,8 @@ static HParseResult* parse_charset(void *env, HParseState *state) { return NULL; } -static HCFChoice* desugar_charset(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHARSET; - ret->charset = (HCharset)env; - ret->action = NULL; - return ret; +static void desugar_charset(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_CHARSET( (HCharset)env ); } static bool h_svm_action_ch(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 67b3742059e869c357d473575968e9ec610f931e..6db1378f1c01dea66698b8632548808b4fbe2984 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -39,20 +39,16 @@ static bool choice_isValidCF(void *env) { return true; } -static HCFChoice* desugar_choice(HAllocator *mm__, void *env) { +static void desugar_choice(HAllocator *mm__, HCFStack *stk__, void *env) { HSequence *s = (HSequence*)env; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 1+s->len); - for (size_t i=0; i<s->len; ++i) { - ret->seq[i] = h_new(HCFSequence, 1); - ret->seq[i]->items = h_new(HCFChoice*, 2); - ret->seq[i]->items[0] = h_desugar(mm__, s->p_array[i]); - ret->seq[i]->items[1] = NULL; - } - ret->seq[s->len] = NULL; - ret->reshape = h_act_first; - return ret; + HCFS_BEGIN_CHOICE() { + for (size_t i = 0; i < s->len; i++) { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(s->p_array[i]); + } HCFS_END_SEQ(); + } + HCFS_THIS_CHOICE->reshape = h_act_first; + } HCFS_END_CHOICE(); } static bool choice_ctrvm(HRVMProg *prog, void* env) { diff --git a/src/parsers/difference.c b/src/parsers/difference.c index 4da0521495a8fc39f415389becf8ba4a3a816b2f..76a2cc447002da5a0e04119c016f7bf83fec443e 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -34,16 +34,10 @@ static HParseResult* parse_difference(void *env, HParseState *state) { } } -static HCFChoice* desugar_difference(HAllocator *mm__, void *env) { - assert_message(0, "'h_difference' is not context-free, can't be desugared"); - return NULL; -} - static HParserVtable difference_vt = { .parse = parse_difference, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_difference, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/end.c b/src/parsers/end.c index fa8ab8b35c5d3b016a79769b04f405b175532d57..30b3ba121a859b87399a59dc04dc86f3a6104a88 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -10,11 +10,8 @@ static HParseResult* parse_end(void *env, HParseState *state) { } } -static HCFChoice* desugar_end(HAllocator *mm__, void *env) { - static HCFChoice ret = { - .type = HCF_END - }; - return &ret; +static void desugar_end(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_ADD_END(); } static bool end_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index 92e394779ee007105387391d6c348ffe55da5c8a..e8ef525ff79d523ab45c6357cfb852a6c3b4dd96 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -25,6 +25,9 @@ HParser* h_epsilon_p() { } HParser* h_epsilon_p__m(HAllocator* mm__) { HParser *epsilon_p = h_new(HParser, 1); + epsilon_p->desugared = NULL; + epsilon_p->backend_data = NULL; + epsilon_p->backend = 0; epsilon_p->vtable = &epsilon_vt; return epsilon_p; } diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 178d97076cf2c677e88c49e62d6dc0456e94f2ae..9aa993aecd4689fe38ac8541f5ea79ce6c17d043 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -21,22 +21,13 @@ static bool ignore_isValidCF(void *env) { return (p->vtable->isValidCF(p->env)); } -static HCFChoice* desugar_ignore(HAllocator *mm__, void *env) { - HParser *p = (HParser*)env; - - HCFChoice *ret = h_new(HCFChoice, 1); - HCFChoice *a = h_desugar(mm__, p); - - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 2); - ret->seq[0]->items[0] = a; - ret->seq[0]->items[1] = NULL; - ret->seq[1] = NULL; - ret->reshape = h_act_ignore; - - return ret; +static void desugar_ignore(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR( (HParser*)env ); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_ignore; + } HCFS_END_CHOICE(); } static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) { diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 8fcc143b8a4b9a6b2248d9a4de5596560593d58d..e562136fdf94eb28cf3f0796463d72f22f42932a 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -31,31 +31,24 @@ static HParseResult* parse_ignoreseq(void* env, HParseState *state) { return res; } -static HCFChoice* desugar_ignoreseq(HAllocator *mm__, void *env) { +static void desugar_ignoreseq(HAllocator *mm__, HCFStack *stk__, void *env) { HIgnoreSeq *seq = (HIgnoreSeq*)env; - HCFSequence *hseq = h_new(HCFSequence, 1); - hseq->items = h_new(HCFChoice*, 1+seq->len); - for (size_t i=0; i<seq->len; ++i) { - hseq->items[i] = h_desugar(mm__, seq->parsers[i]); - } - hseq->items[seq->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = hseq; - ret->seq[1] = NULL; - ret->action = NULL; - - if(seq->which == 0) - ret->reshape = h_act_first; - else if(seq->which == 1) - ret->reshape = h_act_second; // for h_middle - else if(seq->which == seq->len-1) - ret->reshape = h_act_last; - else - ret->reshape = NULL; // XXX - - return ret; + + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i=0; i<seq->len; ++i) + HCFS_DESUGAR(seq->parsers[i]); + } HCFS_END_SEQ(); + + if(seq->which == 0) + HCFS_THIS_CHOICE->reshape = h_act_first; + else if(seq->which == 1) + HCFS_THIS_CHOICE->reshape = h_act_second; // for h_middle + else if(seq->which == seq->len-1) + HCFS_THIS_CHOICE->reshape = h_act_last; + else + assert(!"Ignoreseq must select item 0, 1, or n-1"); + } HCFS_END_CHOICE(); } static bool is_isValidRegular(void *env) { diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 5835d59d7197a39b4af5780f64e4df4cf51d8ad6..746f1a9ee37a0fbcdf6558cf7670290d34a76972 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -9,9 +9,8 @@ static bool indirect_isValidCF(void *env) { return p->vtable->isValidCF(p->env); } -static HCFChoice* desugar_indirect(HAllocator *mm__, void *env) { - HParser *p = (HParser*)env; - return h_desugar(mm__, p); +static void desugar_indirect(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_DESUGAR( (HParser*)env ); } static const HParserVtable indirect_vt = { diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index d67a786f4bad8314bcc643f3ee7be12999713a2a..2937993034c9b18a98f8aeeda7a8eaa6014bdd99 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -28,85 +28,54 @@ static HParseResult* parse_int_range(void *env, HParseState *state) { } } -HCFChoice* gen_int_range(HAllocator *mm__, uint64_t low, uint64_t high, uint8_t bytes) { +void gen_int_range(HAllocator *mm__, HCFStack *stk__, uint64_t low, uint64_t high, uint8_t bytes) { /* Possible FIXME: TallerThanMe */ if (1 == bytes) { - HCFChoice *cs = h_new(HCFChoice, 1); - cs->type = HCF_CHARSET; - cs->charset = new_charset(mm__); + HCharset cs = new_charset(mm__); for (uint64_t i=low; i<=high; ++i) { - charset_set(cs->charset, i, 1); + charset_set(cs, i, 1); } - cs->action = NULL; - return cs; + HCFS_ADD_CHARSET(cs); } else if (1 < bytes) { uint8_t low_head, hi_head; low_head = ((low >> (8*(bytes - 1))) & 0xFF); hi_head = ((high >> (8*(bytes - 1))) & 0xFF); if (low_head != hi_head) { - HCFChoice *root = h_new(HCFChoice, 1); - root->type = HCF_CHOICE; - root->seq = h_new(HCFSequence*, 4); - root->seq[0] = h_new(HCFSequence, 1); - root->seq[0]->items = h_new(HCFChoice*, 3); - root->seq[0]->items[0] = h_new(HCFChoice, 1); - root->seq[0]->items[0]->type = HCF_CHAR; - root->seq[0]->items[0]->chr = low_head; - root->seq[0]->items[0]->action = NULL; - root->seq[0]->items[1] = gen_int_range(mm__, low & ((1 << (8 * (bytes - 1))) - 1), ((1 << (8*(bytes-1)))-1), bytes-1); - root->seq[0]->items[2] = NULL; - root->seq[1] = h_new(HCFSequence, 1); - root->seq[1]->items = h_new(HCFChoice*, bytes+1); - root->seq[1]->items[0] = h_new(HCFChoice, 2); - root->seq[1]->items[0]->type = HCF_CHARSET; - root->seq[1]->items[0]->charset = new_charset(mm__); - root->seq[1]->items[0]->action = NULL; - root->seq[1]->items[1] = root->seq[1]->items[0] + 1; - root->seq[1]->items[1]->type = HCF_CHARSET; - root->seq[1]->items[1]->charset = new_charset(mm__); - for (int i = 0; i < 256; i++) { - charset_set(root->seq[1]->items[0]->charset, i, (i > low_head && i < hi_head)); - charset_set(root->seq[1]->items[1]->charset, i, 1); - } - root->seq[1]->items[1]->action = NULL; - for (int i = 2; i < bytes; i++) - root->seq[1]->items[i] = root->seq[1]->items[1]; - root->seq[1]->items[bytes] = NULL; - root->seq[2] = h_new(HCFSequence, 1); - root->seq[2]->items = h_new(HCFChoice*, 3); - root->seq[2]->items[0] = h_new(HCFChoice, 1); - root->seq[2]->items[0]->type = HCF_CHAR; - root->seq[2]->items[0]->type = hi_head; - root->seq[2]->items[0]->action = NULL; - root->seq[2]->items[1] = gen_int_range(mm__, 0, high & ((1 << (8 * (bytes - 1))) - 1), bytes-1); - root->seq[2]->items[2] = NULL; - root->seq[3] = NULL; - root->action = NULL; - return root; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(low_head); + gen_int_range(mm__, stk__, low & ((1 << (8 * (bytes - 1))) - 1), ((1 << (8*(bytes-1)))-1), bytes-1); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + HCharset hd = new_charset(mm__); + HCharset rest = new_charset(mm__); + for (int i = 0; i < 256; i++) { + charset_set(hd, i, (i > low_head && i < hi_head)); + charset_set(rest, i, 1); + } + HCFS_ADD_CHARSET(hd); + for (int i = 2; i < bytes; i++) + HCFS_ADD_CHARSET(rest); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(hi_head); + gen_int_range(mm__, stk__, 0, high & ((1 << (8 * (bytes - 1))) - 1), bytes-1); + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); } else { - HCFChoice *root = h_new(HCFChoice, 1); - root->type = HCF_CHOICE; - root->seq = h_new(HCFSequence*, 2); - root->seq[0] = h_new(HCFSequence, 1); - root->seq[0]->items = h_new(HCFChoice*, 3); - root->seq[0]->items[0] = h_new(HCFChoice, 1); - root->seq[0]->items[0]->type = HCF_CHAR; - root->seq[0]->items[0]->chr = low_head; - root->seq[0]->items[0]->action = NULL; - root->seq[0]->items[1] = gen_int_range(mm__, - low & ((1 << (8 * (bytes - 1))) - 1), - high & ((1 << (8 * (bytes - 1))) - 1), - bytes - 1); - root->seq[0]->items[2] = NULL; - root->seq[1] = NULL; - root->action = NULL; - return root; + // TODO: find a way to merge this with the higher-up SEQ + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHAR(low_head); + gen_int_range(mm__, stk__, + low & ((1 << (8 * (bytes - 1))) - 1), + high & ((1 << (8 * (bytes - 1))) - 1), + bytes - 1); + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); } } - else { // idk why this would ever be <1, but whatever - return NULL; - } } struct bits_env { @@ -114,11 +83,11 @@ struct bits_env { uint8_t signedp; }; -static HCFChoice* desugar_int_range(HAllocator *mm__, void *env) { +static void desugar_int_range(HAllocator *mm__, HCFStack *stk__, void *env) { HRange *r = (HRange*)env; struct bits_env* be = (struct bits_env*)r->p->env; uint8_t bytes = be->length / 8; - return gen_int_range(mm__, r->lower, r->upper, bytes); + gen_int_range(mm__, stk__, r->lower, r->upper, bytes); } bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) { diff --git a/src/parsers/many.c b/src/parsers/many.c index 8185203c907b9350e2dc577db515373ab9603e65..a095940cc0b4283d23f4ac19d8d51228719e87a2 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -59,11 +59,25 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env))); } -static HCFChoice* desugar_many(HAllocator *mm__, void *env) { +static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) { + // TODO: refactor this. HRepeat *repeat = (HRepeat*)env; + if (!repeat->min_p) { + assert(!"Unreachable"); + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < repeat->count; i++) { + if (i != 0 && repeat->sep != NULL) + HCFS_DESUGAR(repeat->sep); // Should be ignored. + HCFS_DESUGAR(repeat->p); + } + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); + return; + } if(repeat->count > 1) { assert_message(0, "'h_repeat_n' is not context-free, can't be desugared"); - return NULL; + return; } /* many(A) => @@ -73,53 +87,29 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) { -> \epsilon */ - HParser *epsilon = h_epsilon_p__m(mm__); - - HCFChoice *sep = h_desugar(mm__, (repeat->sep != NULL) ? repeat->sep : epsilon); - HCFChoice *a = h_desugar(mm__, repeat->p); - HCFChoice *ma = h_new(HCFChoice, 1); - HCFChoice *mar = h_new(HCFChoice, 1); - HCFChoice *eps = desugar_epsilon(mm__, NULL); - - /* create first subrule */ - ma->type = HCF_CHOICE; - ma->seq = h_new(HCFSequence*, 3); /* enough for 2 productions */ - ma->seq[0] = h_new(HCFSequence, 1); - ma->seq[0]->items = h_new(HCFChoice*, 3); - ma->seq[0]->items[0] = a; - ma->seq[0]->items[1] = mar; - ma->seq[0]->items[2] = NULL; - ma->seq[1] = NULL; - - /* if not many1/sepBy1, attach epsilon */ - if (repeat->count == 0) { - ma->seq[1] = h_new(HCFSequence, 1); - ma->seq[1]->items = h_new(HCFChoice*, 2); - ma->seq[1]->items[0] = eps; - ma->seq[1]->items[1] = NULL; - ma->seq[2] = NULL; - } - - /* create second subrule */ - mar->type = HCF_CHOICE; - mar->seq = h_new(HCFSequence*, 3); - mar->seq[0] = h_new(HCFSequence, 1); - mar->seq[0]->items = h_new(HCFChoice*, 4); - mar->seq[0]->items[0] = sep; - mar->seq[0]->items[1] = a; - mar->seq[0]->items[2] = mar; // woo recursion! - mar->seq[0]->items[3] = NULL; - mar->seq[1] = h_new(HCFSequence, 1); - mar->seq[1]->items = h_new(HCFChoice*, 2); - mar->seq[1]->items[0] = eps; - mar->seq[1]->items[1] = NULL; - mar->seq[2] = NULL; - - /* attach reshapers */ - sep->reshape = h_act_ignore; - ma->reshape = h_act_flatten; - - return ma; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(repeat->p); + HCFS_BEGIN_CHOICE() { // Mar + HCFS_BEGIN_SEQ() { + if (repeat->sep != NULL) { + HCFS_DESUGAR(h_ignore__m(mm__, repeat->sep)); + } + //stk__->last_completed->reshape = h_act_ignore; // BUG: This modifies a memoized entry. + HCFS_DESUGAR(repeat->p); + HCFS_APPEND(HCFS_THIS_CHOICE); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); // Mar + } + if (repeat->count == 0) { + HCFS_BEGIN_SEQ() { + //HCFS_DESUGAR(h_ignore__m(mm__, h_epsilon_p())); + } HCFS_END_SEQ(); + } + HCFS_THIS_CHOICE->reshape = h_act_flatten; + } HCFS_END_CHOICE(); } static bool many_ctrvm(HRVMProg *prog, void *env) { @@ -266,16 +256,10 @@ static HParseResult* parse_length_value(void *env, HParseState *state) { return parse_many(&repeat, state); } -static HCFChoice* desugar_length_value(HAllocator *mm__, void *env) { - assert_message(0, "'h_length_value' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable length_value_vt = { .parse = parse_length_value, .isValidRegular = h_false, .isValidCF = h_false, - .desugar = desugar_length_value, }; HParser* h_length_value(const HParser* length, const HParser* value) { diff --git a/src/parsers/not.c b/src/parsers/not.c index 61b632214f60e7b9664e661241b507e98366a77c..8bf45dda21062699fbd744063c020ab382fa6a97 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -10,16 +10,10 @@ static HParseResult* parse_not(void* env, HParseState* state) { } } -static HCFChoice* desugar_not(HAllocator *mm__, void *env) { - assert_message(0, "'h_not' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable not_vt = { .parse = parse_not, .isValidRegular = h_false, /* see and.c for why */ - .isValidCF = h_false, /* also see and.c for why */ - .desugar = desugar_not, + .isValidCF = h_false, .compile_to_rvm = h_not_regular, // Is actually regular, but the generation step is currently unable to handle it. TODO: fix this. }; diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c index 645a2137cbdec111c94839848a2217395d58459e..120c1e01d0824ab5a70e39f96c2c19657ea0bf18 100644 --- a/src/parsers/nothing.c +++ b/src/parsers/nothing.c @@ -5,13 +5,9 @@ static HParseResult* parse_nothing() { return NULL; } -static HCFChoice *desugar_nothing(HAllocator *mm__, void *env) { - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 1); - ret->seq[0] = NULL; - ret->action = NULL; - return ret; +static void desugar_nothing(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + } HCFS_END_CHOICE(); } static bool nothing_ctrvm(HRVMProg *prog, void* env) { diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 87ba541b91310bae7bd20b30fe2b9387501c0045..c4282a914fb81ae82b1b2ec18bd5d91ec001b5c3 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -26,16 +26,18 @@ static HParsedToken* reshape_optional(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); - HParsedToken *res = p->ast->seq->elements[0]; - if(res) - return res; + if (p->ast->seq->used > 0) { + HParsedToken *res = p->ast->seq->elements[0]; + if(res) + return res; + } HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); ret->token_type = TT_NONE; return ret; } -static HCFChoice* desugar_optional(HAllocator *mm__, void *env) { +static void desugar_optional(HAllocator *mm__, HCFStack *stk__, void *env) { HParser *p = (HParser*) env; /* optional(A) => @@ -43,28 +45,14 @@ static HCFChoice* desugar_optional(HAllocator *mm__, void *env) { -> \epsilon */ - HCFChoice *ret = h_new(HCFChoice, 1); - HCFChoice *a = h_desugar(mm__, p); - HCFChoice *eps = desugar_epsilon(mm__, NULL); - - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 3); /* enough for 2 productions */ - - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 2); - ret->seq[0]->items[0] = a; - ret->seq[0]->items[1] = NULL; - - ret->seq[1] = h_new(HCFSequence, 1); - ret->seq[1]->items = h_new(HCFChoice*, 2); - ret->seq[1]->items[0] = eps; - ret->seq[1]->items[1] = NULL; - - ret->seq[2] = NULL; - - ret->reshape = reshape_optional; - - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_DESUGAR(p); + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_optional; + } HCFS_END_CHOICE(); } static bool h_svm_action_optional(HArena *arena, HSVMContext *ctx, void *env) { diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index af5f959531c3f8c9a9d1491c8fe8ce937c6a7599..aeb202bf410a53b262e87cc3eadde603b9e11e8a 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -3,6 +3,7 @@ #include "../hammer.h" #include "../internal.h" #include "../backends/regex.h" +#include "../backends/contextfree.h" #define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) #define a_new(typ, count) a_new_(state->arena, typ, count) @@ -25,17 +26,12 @@ static inline size_t token_length(HParseResult *pr) { } /* Epsilon rules happen during desugaring. This handles them. */ -static inline HCFChoice* desugar_epsilon(HAllocator *mm__, void *env) { - static HCFChoice *res_seq_l[] = {NULL}; - static HCFSequence res_seq = {res_seq_l}; - static HCFSequence *res_ch_l[] = {&res_seq, NULL}; - static HCFChoice res_ch = { - .type = HCF_CHOICE, - .seq = res_ch_l, - .action = NULL, - .reshape = h_act_ignore - }; - return &res_ch; +static inline void desugar_epsilon(HAllocator *mm__, HCFStack *stk__, void *env) { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_ignore; + } HCFS_END_CHOICE(); } #endif // HAMMER_PARSE_INTERNAL__H diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index aa600231d06ccbf461a855c51658909f0c66e866..eff46109219caf7eaf4f178b4b5f96693d1ee85e 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -64,22 +64,15 @@ static HParsedToken *reshape_sequence(const HParseResult *p) { return res; } -static HCFChoice* desugar_sequence(HAllocator *mm__, void *env) { +static void desugar_sequence(HAllocator *mm__, HCFStack *stk__, void *env) { HSequence *s = (HSequence*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, s->len+1); - for (size_t i=0; i<s->len; ++i) { - seq->items[i] = h_desugar(mm__, s->p_array[i]); - } - seq->items[s->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - ret->reshape = reshape_sequence; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < s->len; i++) + HCFS_DESUGAR(s->p_array[i]); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_sequence; + } HCFS_END_CHOICE(); } static bool sequence_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/token.c b/src/parsers/token.c index 2346a45e0f36d64ccf6a87df07c9ea89f067989d..97886b7afaef94124028c927e354198a705352d1 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -44,25 +44,15 @@ static HParsedToken *reshape_token(const HParseResult *p) { return tok; } -static HCFChoice* desugar_token(HAllocator *mm__, void *env) { +static void desugar_token(HAllocator *mm__, HCFStack *stk__, void *env) { HToken *tok = (HToken*)env; - HCFSequence *seq = h_new(HCFSequence, 1); - seq->items = h_new(HCFChoice*, 1+tok->len); - for (size_t i=0; i<tok->len; ++i) { - seq->items[i] = h_new(HCFChoice, 1); - seq->items[i]->type = HCF_CHAR; - seq->items[i]->chr = tok->str[i]; - } - seq->items[tok->len] = NULL; - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = seq; - ret->seq[1] = NULL; - ret->action = NULL; - ret->pred = NULL; - ret->reshape = reshape_token; - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + for (size_t i = 0; i < tok->len; i++) + HCFS_ADD_CHAR(tok->str[i]); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = reshape_token; + } HCFS_END_CHOICE(); } static bool token_ctrvm(HRVMProg *prog, void *env) { diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 3b8a8236f05e8a2c9912faabddfe8d46715f7e6a..e3f3039407eacaa1d24689767a4a1038fce66a93 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -12,16 +12,11 @@ static HParseResult* parse_unimplemented(void* env, HParseState *state) { return &result; } -static HCFChoice* desugar_unimplemented(HAllocator *mm__, void *env) { - assert_message(0, "'h_unimplemented' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable unimplemented_vt = { .parse = parse_unimplemented, .isValidRegular = h_false, .isValidCF = h_false, - .desugar = desugar_unimplemented, + .desugar = NULL, .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 454e04ed3633d79e9f58bcd64b911a3e8ab26bdb..737affde3bad042e057b8a6c6c1f983116b18786 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -17,39 +17,26 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { static const char SPACE_CHRS[6] = {' ', '\f', '\n', '\r', '\t', '\v'}; -static HCFChoice* desugar_whitespace(HAllocator *mm__, void *env) { - HCFChoice *ws = h_new(HCFChoice, 1); - ws->type = HCF_CHOICE; - ws->seq = h_new(HCFSequence*, 3); - HCFSequence *nonempty = h_new(HCFSequence, 1); - nonempty->items = h_new(HCFChoice*, 3); - nonempty->items[0] = h_new(HCFChoice, 1); - nonempty->items[0]->type = HCF_CHARSET; - nonempty->items[0]->charset = new_charset(mm__); - for(size_t i=0; i<sizeof(SPACE_CHRS); i++) - charset_set(nonempty->items[0]->charset, SPACE_CHRS[i], 1); - nonempty->items[1] = ws; // yay circular pointer! - nonempty->items[2] = NULL; - ws->seq[0] = nonempty; - HCFSequence *empty = h_new(HCFSequence, 1); - empty->items = h_new(HCFChoice*, 1); - empty->items[0] = NULL; - ws->seq[1] = empty; - ws->seq[2] = NULL; - - HCFChoice *ret = h_new(HCFChoice, 1); - ret->type = HCF_CHOICE; - ret->seq = h_new(HCFSequence*, 2); - ret->seq[0] = h_new(HCFSequence, 1); - ret->seq[0]->items = h_new(HCFChoice*, 3); - ret->seq[0]->items[0] = ws; - ret->seq[0]->items[1] = h_desugar(mm__, (HParser *)env); - ret->seq[0]->items[2] = NULL; - ret->seq[1] = NULL; +static void desugar_whitespace(HAllocator *mm__, HCFStack *stk__, void *env) { - ret->reshape = h_act_last; + HCharset ws_cs = new_charset(mm__); + for(size_t i=0; i<sizeof(SPACE_CHRS); i++) + charset_set(ws_cs, SPACE_CHRS[i], 1); - return ret; + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_BEGIN_CHOICE() { + HCFS_BEGIN_SEQ() { + HCFS_ADD_CHARSET(ws_cs); + HCFS_APPEND(HCFS_THIS_CHOICE); // yay circular pointer! + } HCFS_END_SEQ(); + HCFS_BEGIN_SEQ() { + } HCFS_END_SEQ(); + } HCFS_END_CHOICE(); + HCFS_DESUGAR( (HParser*)env ); + } HCFS_END_SEQ(); + HCFS_THIS_CHOICE->reshape = h_act_last; + } HCFS_END_CHOICE(); } static bool ws_isValidRegular(void *env) { diff --git a/src/parsers/xor.c b/src/parsers/xor.c index ccd37fdcdf19e934f75f6bac1195d064ec7eba72..e031d5d542f80d345324c746e63d255e3b308655 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -31,16 +31,10 @@ static HParseResult* parse_xor(void *env, HParseState *state) { } } -static HCFChoice* desugar_xor(HAllocator *mm__, void *env) { - assert_message(0, "'h_xor' is not context-free, can't be desugared"); - return NULL; -} - static const HParserVtable xor_vt = { .parse = parse_xor, .isValidRegular = h_false, .isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF? - .desugar = desugar_xor, .compile_to_rvm = h_not_regular, }; diff --git a/src/system_allocator.c b/src/system_allocator.c index 5f3e48440996892d1335fbc50f6dc282099e39c4..b34810fa3ba29db6de3c0aa43e74fa29f9aed77b 100644 --- a/src/system_allocator.c +++ b/src/system_allocator.c @@ -2,9 +2,14 @@ #include <stdlib.h> #include "internal.h" +//#define DEBUG__MEMFILL 0xFF + static void* system_alloc(HAllocator *allocator, size_t size) { - void* ptr = calloc(size + sizeof(size_t), 1); + void* ptr = malloc(size + sizeof(size_t)); +#ifdef DEBUG__MEMFILL + memset(ptr, DEBUG__MEMFILL, size + sizeof(size_t)); +#endif *(size_t*)ptr = size; return ptr + sizeof(size_t); } @@ -13,10 +18,12 @@ static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { if (ptr == NULL) return system_alloc(allocator, size); ptr = realloc(ptr - sizeof(size_t), size + sizeof(size_t)); - size_t old_size = *(size_t*)ptr; *(size_t*)ptr = size; +#ifdef DEBUG__MEMFILL + size_t old_size = *(size_t*)ptr; if (size > old_size) - memset(ptr+sizeof(size_t)+old_size, 0, size - old_size); + memset(ptr+sizeof(size_t)+old_size, DEBUG__MEMFILL, size - old_size); +#endif return ptr + sizeof(size_t); } diff --git a/src/test_suite.h b/src/test_suite.h index f618712693f371340e99a0bb49aedd148f47448f..168ab641ba7968730deea69ad8aa0df09b47650c 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -162,28 +162,28 @@ #define g_check_terminal(grammar, parser) \ - g_check_hashtable_absent(grammar->nts, h_desugar(&system_allocator, parser)) + g_check_hashtable_absent(grammar->nts, h_desugar(&system_allocator, NULL, parser)) #define g_check_nonterminal(grammar, parser) \ - g_check_hashtable_present(grammar->nts, h_desugar(&system_allocator, parser)) + g_check_hashtable_present(grammar->nts, h_desugar(&system_allocator, NULL, parser)) #define g_check_derives_epsilon(grammar, parser) \ - g_check_hashtable_present(grammar->geneps, h_desugar(&system_allocator, parser)) + g_check_hashtable_present(grammar->geneps, h_desugar(&system_allocator, NULL, parser)) #define g_check_derives_epsilon_not(grammar, parser) \ - g_check_hashtable_absent(grammar->geneps, h_desugar(&system_allocator, parser)) + g_check_hashtable_absent(grammar->geneps, h_desugar(&system_allocator, NULL, parser)) #define g_check_firstset_present(k, grammar, parser, str) \ - g_check_stringmap_present(h_first(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_present(h_first(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_firstset_absent(k, grammar, parser, str) \ - g_check_stringmap_absent(h_first(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_absent(h_first(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_followset_present(k, grammar, parser, str) \ - g_check_stringmap_present(h_follow(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_present(h_follow(k, grammar, h_desugar(&system_allocator, NULL, parser)), str) #define g_check_followset_absent(k, grammar, parser, str) \ - g_check_stringmap_absent(h_follow(k, grammar, h_desugar(&system_allocator, parser)), str) + g_check_stringmap_absent(h_follow(k, grammar, h_desugar(&system_allocator, NULL, parser)), str)