diff --git a/examples/base64.c b/examples/base64.c index ee142e3cbfee5fa7bf2032d4f07882581916d4f7..a02297397a4942d1e2e1e003233465aa1e19a6c8 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -15,21 +15,21 @@ const HParser* document = NULL; void init_parser(void) { // CORE - const HParser *digit = h_ch_range(0x30, 0x39); - const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); + HParser *digit = h_ch_range(0x30, 0x39); + HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); // AUX. - const HParser *plus = h_ch('+'); - const HParser *slash = h_ch('/'); - const HParser *equals = h_ch('='); - - const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); - const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); - const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); - const HParser *base64_3 = h_repeat_n(bsfdig, 4); - const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); - const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); - const HParser *base64 = h_sequence(h_many(base64_3), + HParser *plus = h_ch('+'); + HParser *slash = h_ch('/'); + HParser *equals = h_ch('='); + + HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); + HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); + HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); + HParser *base64_3 = h_repeat_n(bsfdig, 4); + HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); + HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); + HParser *base64 = h_sequence(h_many(base64_3), h_optional(h_choice(base64_2, base64_1, NULL)), NULL); diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index f2a3e82b3ef2ce60befd3ccc8d5570937ecd7166..0a08e50082fd637b9c81bc751bb4a6620f21949b 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -22,7 +22,7 @@ // They must be named act_<rulename>. /// -const HParsedToken *act_bsfdig(const HParseResult *p) +HParsedToken *act_bsfdig(const HParseResult *p) { HParsedToken *res = H_MAKE_UINT(0); @@ -53,7 +53,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0); #define act_document act_index0 // General-form action to turn a block of base64 digits into bytes. -const HParsedToken *act_base64_n(int n, const HParseResult *p) +HParsedToken *act_base64_n(int n, const HParseResult *p) { HParsedToken *res = H_MAKE_SEQN(n); @@ -82,7 +82,7 @@ H_ACT_APPLY(act_base64_3, act_base64_n, 3); H_ACT_APPLY(act_base64_2, act_base64_n, 2); H_ACT_APPLY(act_base64_1, act_base64_n, 1); -const HParsedToken *act_base64(const HParseResult *p) +HParsedToken *act_base64(const HParseResult *p) { assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used == 2); @@ -96,7 +96,7 @@ const HParsedToken *act_base64(const HParseResult *p) h_seq_append(res, seq->elements[i]); // append one trailing base64_2 or _1 block - const HParsedToken *tok = h_seq_index(p->ast, 1); + HParsedToken *tok = h_seq_index(p->ast, 1); if(tok->token_type == TT_SEQUENCE) h_seq_append(res, tok); @@ -108,7 +108,7 @@ const HParsedToken *act_base64(const HParseResult *p) // Set up the parser with the grammar to be recognized. /// -const HParser *init_parser(void) +HParser *init_parser(void) { // CORE H_RULE (digit, h_ch_range(0x30, 0x39)); diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index 32afe5bbc1ab74077f08311c1c9d47405060e3f7..c1549cf5371affd95aaabec667cc7d5018e91f1e 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -48,7 +48,7 @@ uint8_t bsfdig_value(const HParsedToken *p) // helper: append a byte value to a sequence #define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) -const HParsedToken *act_base64(const HParseResult *p) +HParsedToken *act_base64(const HParseResult *p) { assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used == 2); diff --git a/examples/dns.c b/examples/dns.c index 7887ba6a7881d41c21836effa5f3ebaffc986d56..3f730b970cd6d59677b7d8a38dcec8ea7b10ef48 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -86,7 +86,7 @@ void set_rdata(struct dns_rr *rr, HCountedArray *rdata) { } } -const HParsedToken* act_header(const HParseResult *p) { +HParsedToken* act_header(const HParseResult *p) { HParsedToken **fields = h_seq_elements(p->ast); dns_header_t header_ = { .id = H_CAST_UINT(fields[0]), @@ -109,7 +109,7 @@ const HParsedToken* act_header(const HParseResult *p) { return H_MAKE(dns_header_t, header); } -const HParsedToken* act_label(const HParseResult *p) { +HParsedToken* act_label(const HParseResult *p) { dns_label_t *r = H_ALLOC(dns_label_t); r->len = h_seq_len(p->ast); @@ -121,7 +121,7 @@ const HParsedToken* act_label(const HParseResult *p) { return H_MAKE(dns_label_t, r); } -const HParsedToken* act_rr(const HParseResult *p) { +HParsedToken* act_rr(const HParseResult *p) { dns_rr_t *rr = H_ALLOC(dns_rr_t); rr->name = *H_FIELD(dns_domain_t, 0); @@ -136,7 +136,7 @@ const HParsedToken* act_rr(const HParseResult *p) { return H_MAKE(dns_rr_t, rr); } -const HParsedToken* act_question(const HParseResult *p) { +HParsedToken* act_question(const HParseResult *p) { dns_question_t *q = H_ALLOC(dns_question_t); HParsedToken **fields = h_seq_elements(p->ast); @@ -153,7 +153,7 @@ const HParsedToken* act_question(const HParseResult *p) { return H_MAKE(dns_question_t, q); } -const HParsedToken* act_message(const HParseResult *p) { +HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); dns_message_t *msg = H_ALLOC(dns_message_t); diff --git a/examples/dns_common.c b/examples/dns_common.c index 76915b66e8030b26cbf23462ccaad2d944949d05..01dd8f0fb4630680174f8fff657c70f845775624 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -18,8 +18,8 @@ bool validate_label(HParseResult *p) { #define act_label h_act_flatten -const HParsedToken* act_domain(const HParseResult *p) { - const HParsedToken *ret = NULL; +HParsedToken* act_domain(const HParseResult *p) { + HParsedToken *ret = NULL; char *arr = NULL; switch(p->ast->token_type) { @@ -56,8 +56,8 @@ const HParsedToken* act_domain(const HParseResult *p) { return ret; } -const HParser* init_domain() { - static const HParser *ret = NULL; +HParser* init_domain() { + static HParser *ret = NULL; if (ret) return ret; @@ -76,8 +76,8 @@ const HParser* init_domain() { return ret; } -const HParser* init_character_string() { - static const HParser *cstr = NULL; +HParser* init_character_string() { + static HParser *cstr = NULL; if (cstr) return cstr; diff --git a/examples/dns_common.h b/examples/dns_common.h index c1d8d7e9d66f98c666f08c95ff5d34fa93f874c3..8af014b2f22c28da36e3312b56e355e1d5500e73 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -4,9 +4,9 @@ #include "../src/hammer.h" #include "../src/glue.h" -const HParser* init_domain(); -const HParser* init_character_string(); +HParser* init_domain(); +HParser* init_character_string(); -const HParsedToken* act_index0(const HParseResult *p); +HParsedToken* act_index0(const HParseResult *p); #endif diff --git a/examples/rr.c b/examples/rr.c index 8c14e0aec8e678f86dfdbc54dc0499dd3a828d8e..2ba85341d0f444924f9801656eeb8fa94728ac3e 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -17,7 +17,7 @@ bool validate_null(HParseResult *p) { return (65536 > p->ast->seq->used); } -const HParsedToken *act_null(const HParseResult *p) { +HParsedToken *act_null(const HParseResult *p) { dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); size_t len = h_seq_len(p->ast); @@ -28,7 +28,7 @@ const HParsedToken *act_null(const HParseResult *p) { return H_MAKE(dns_rr_null_t, null); } -const HParsedToken *act_txt(const HParseResult *p) { +HParsedToken *act_txt(const HParseResult *p) { dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); const HCountedArray *arr = H_CAST_SEQ(p->ast); @@ -47,7 +47,7 @@ const HParsedToken *act_txt(const HParseResult *p) { return H_MAKE(dns_rr_txt_t, txt); } -const HParsedToken* act_cstr(const HParseResult *p) { +HParsedToken* act_cstr(const HParseResult *p) { dns_cstr_t *cs = H_ALLOC(dns_cstr_t); const HCountedArray *arr = H_CAST_SEQ(p->ast); @@ -60,7 +60,7 @@ const HParsedToken* act_cstr(const HParseResult *p) { return H_MAKE(dns_cstr_t, cs); } -const HParsedToken* act_soa(const HParseResult *p) { +HParsedToken* act_soa(const HParseResult *p) { dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t); soa->mname = *H_FIELD(dns_domain_t, 0); @@ -74,7 +74,7 @@ const HParsedToken* act_soa(const HParseResult *p) { return H_MAKE(dns_rr_soa_t, soa); } -const HParsedToken* act_wks(const HParseResult *p) { +HParsedToken* act_wks(const HParseResult *p) { dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); wks->address = H_FIELD_UINT(0); @@ -87,7 +87,7 @@ const HParsedToken* act_wks(const HParseResult *p) { return H_MAKE(dns_rr_wks_t, wks); } -const HParsedToken* act_hinfo(const HParseResult *p) { +HParsedToken* act_hinfo(const HParseResult *p) { dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t); hinfo->cpu = *H_FIELD(dns_cstr_t, 0); @@ -96,7 +96,7 @@ const HParsedToken* act_hinfo(const HParseResult *p) { return H_MAKE(dns_rr_hinfo_t, hinfo); } -const HParsedToken* act_minfo(const HParseResult *p) { +HParsedToken* act_minfo(const HParseResult *p) { dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t); minfo->rmailbx = *H_FIELD(dns_domain_t, 0); @@ -105,7 +105,7 @@ const HParsedToken* act_minfo(const HParseResult *p) { return H_MAKE(dns_rr_minfo_t, minfo); } -const HParsedToken* act_mx(const HParseResult *p) { +HParsedToken* act_mx(const HParseResult *p) { dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); mx->preference = H_FIELD_UINT(0); @@ -120,8 +120,8 @@ const HParsedToken* act_mx(const HParseResult *p) { /// #define RDATA_TYPE_MAX 16 -const HParser* init_rdata(uint16_t type) { - static const HParser *parsers[RDATA_TYPE_MAX+1]; +HParser* init_rdata(uint16_t type) { + static HParser *parsers[RDATA_TYPE_MAX+1]; static int inited = 0; if (type >= sizeof(parsers)) diff --git a/examples/rr.h b/examples/rr.h index fce457817c7802fbd0cb77b688c99f9244bda86a..bbc1d0331fdcf6f9bcba3d5f534daca9e22ab5e0 100644 --- a/examples/rr.h +++ b/examples/rr.h @@ -3,6 +3,6 @@ #include "../src/hammer.h" -const HParser* init_rdata(uint16_t type); +HParser* init_rdata(uint16_t type); #endif diff --git a/src/backends/regex.c b/src/backends/regex.c index 12e84f63a71a1491e8ce66bf0e70db8beac7b0ea..4389bc9172d99a175621fc6488326d3dfc38d07e 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -53,7 +53,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length), **heads_n = a_new(HRVMTrace*, prog->length); - HRVMTrace *ret_trace; + HRVMTrace *ret_trace = NULL; uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued HRVMThread *ip_queue = a_new(HRVMThread, prog->length); @@ -61,6 +61,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ + #define THREAD ip_queue[ipq_top-1] #define PUSH_SVM(op_, arg_) do { \ @@ -102,15 +103,18 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ uint8_t hi, lo; uint16_t arg; while(ipq_top > 0) { - if (insn_seen[THREAD.ip] == 1) + if (insn_seen[THREAD.ip] == 1) { + ipq_top--; // Kill thread. continue; + } insn_seen[THREAD.ip] = 1; arg = prog->insns[THREAD.ip].arg; switch(prog->insns[THREAD.ip].op) { case RVM_ACCEPT: PUSH_SVM(SVM_ACCEPT, 0); ret_trace = THREAD.trace; - goto run_trace; + ipq_top--; + goto next_insn; case RVM_MATCH: hi = (arg >> 8) & 0xff; lo = arg & 0xff; @@ -163,10 +167,12 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ } // No accept was reached. match_fail: - h_delete_arena(arena); - return NULL; + if (ret_trace == NULL) { + // No match found; definite failure. + h_delete_arena(arena); + return NULL; + } - run_trace: // Invert the direction of the trace linked list. ret_trace = invert_trace(ret_trace); @@ -213,8 +219,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, case SVM_ACTION: // Action should modify stack appropriately if (!orig_prog->actions[cur->arg].action(arena, &ctx, orig_prog->actions[cur->arg].env)) { + // action failed... abort somehow - // TODO: Actually abort + goto fail; } break; case SVM_CAPTURE: @@ -243,7 +250,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, return res; } } - + fail: h_delete_arena(arena); return NULL; } @@ -308,20 +315,20 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) { HParsedToken *res = ctx->stack[ctx->stack_count - 1 - n_items]; assert (res->token_type == TT_MARK); res->token_type = TT_SEQUENCE; - + HCountedArray *ret_carray = h_carray_new_sized(arena, n_items); res->seq = ret_carray; // res index and bit offset are the same as the mark. for (size_t i = 0; i < n_items; i++) { ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i]; } + ret_carray->used = n_items; ctx->stack_count -= n_items; return true; } bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) { - while (ctx->stack_count > 0) { - if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) + while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK) return true; } return false; // no mark found. diff --git a/src/datastructures.c b/src/datastructures.c index 078104090940303ff747c3f9dc0473826bea679d..99b4ca5736e72b2bb84689e154be79f0397e94f8 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -9,7 +9,8 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) { HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray)); - assert(size > 0); + if (size == 0) + size = 1; ret->used = 0; ret->capacity = size; ret->arena = arena; diff --git a/src/glue.c b/src/glue.c index 2cbfde6c2cf5b435c4f6b6a7f3639141b349e988..48bd222e59aa51dd19174b7153627e4d4ccc31f1 100644 --- a/src/glue.c +++ b/src/glue.c @@ -5,7 +5,7 @@ #include "parsers/parser_internal.h" // Helper to build HAction's that pick one index out of a sequence. -const HParsedToken *h_act_index(int i, const HParseResult *p) +HParsedToken *h_act_index(int i, const HParseResult *p) { if(!p) return NULL; @@ -23,7 +23,7 @@ const HParsedToken *h_act_index(int i, const HParseResult *p) return tok->seq->elements[i]; } -const HParsedToken *h_act_first(const HParseResult *p) { +HParsedToken *h_act_first(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used > 0); @@ -31,7 +31,7 @@ const HParsedToken *h_act_first(const HParseResult *p) { return p->ast->seq->elements[0]; } -const HParsedToken *h_act_second(const HParseResult *p) { +HParsedToken *h_act_second(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used > 0); @@ -39,7 +39,7 @@ const HParsedToken *h_act_second(const HParseResult *p) { return p->ast->seq->elements[1]; } -const HParsedToken *h_act_last(const HParseResult *p) { +HParsedToken *h_act_last(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used > 0); @@ -59,7 +59,7 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) { } } -const HParsedToken *h_act_flatten(const HParseResult *p) { +HParsedToken *h_act_flatten(const HParseResult *p) { HCountedArray *seq = h_carray_new(p->arena); act_flatten_(seq, p->ast); @@ -72,7 +72,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p) { return res; } -const HParsedToken *h_act_ignore(const HParseResult *p) { +HParsedToken *h_act_ignore(const HParseResult *p) { return NULL; } diff --git a/src/glue.h b/src/glue.h index ece7e9ea053f95a2d5255e966f97ac60fdf8c2ba..1880988910e926c1c216dcd24cbb99022ddf7866 100644 --- a/src/glue.h +++ b/src/glue.h @@ -55,13 +55,13 @@ // -#define H_RULE(rule, def) const HParser *rule = def -#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) -#define H_VRULE(rule, def) const HParser *rule = \ +#define H_RULE(rule, def) HParser *rule = def +#define H_ARULE(rule, def) HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) HParser *rule = \ h_attr_bool(def, validate_ ## rule) -#define H_VARULE(rule, def) const HParser *rule = \ +#define H_VARULE(rule, def) HParser *rule = \ h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) -#define H_AVRULE(rule, def) const HParser *rule = \ +#define H_AVRULE(rule, def) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) @@ -88,17 +88,17 @@ // action such as h_act_index. // -const HParsedToken *h_act_index(int i, const HParseResult *p); -const HParsedToken *h_act_first(const HParseResult *p); -const HParsedToken *h_act_second(const HParseResult *p); -const HParsedToken *h_act_last(const HParseResult *p); -const HParsedToken *h_act_flatten(const HParseResult *p); -const HParsedToken *h_act_ignore(const HParseResult *p); +HParsedToken *h_act_index(int i, const HParseResult *p); +HParsedToken *h_act_first(const HParseResult *p); +HParsedToken *h_act_second(const HParseResult *p); +HParsedToken *h_act_last(const HParseResult *p); +HParsedToken *h_act_flatten(const HParseResult *p); +HParsedToken *h_act_ignore(const HParseResult *p); // Define 'myaction' as a specialization of 'paction' by supplying the leading // parameters. #define H_ACT_APPLY(myaction, paction, ...) \ - const HParsedToken *myaction(const HParseResult *p) { \ + HParsedToken *myaction(const HParseResult *p) { \ return paction(__VA_ARGS__, p); \ } diff --git a/src/hammer.h b/src/hammer.h index b0fabd115b9cfda3e3d3bd2a66f70d8b4817c45d..455684cc92edbfbf9b9352625e373ca408f61261 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -111,7 +111,7 @@ typedef struct HBitWriter_ HBitWriter; * say, structs) and stuff values for them into the void* in the * tagged union in HParsedToken. */ -typedef const HParsedToken* (*HAction)(const HParseResult *p); +typedef HParsedToken* (*HAction)(const HParseResult *p); /** * Type of a boolean attribute-checking function, used in the @@ -605,11 +605,11 @@ void h_bit_writer_free(HBitWriter* w); // General-purpose actions for use with h_action // XXX to be consolidated with glue.h when merged upstream -const HParsedToken *h_act_first(const HParseResult *p); -const HParsedToken *h_act_second(const HParseResult *p); -const HParsedToken *h_act_last(const HParseResult *p); -const HParsedToken *h_act_flatten(const HParseResult *p); -const HParsedToken *h_act_ignore(const HParseResult *p); +HParsedToken *h_act_first(const HParseResult *p); +HParsedToken *h_act_second(const HParseResult *p); +HParsedToken *h_act_last(const HParseResult *p); +HParsedToken *h_act_flatten(const HParseResult *p); +HParsedToken *h_act_ignore(const HParseResult *p); // {{{ Benchmark functions HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases); diff --git a/src/parsers/action.c b/src/parsers/action.c index 12ec036f144874ea944e647444a976e76a81b764..b00426a73646bf9ecb637777c62adafd49e231de 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -1,3 +1,4 @@ +#include <assert.h> #include "parser_internal.h" typedef struct { @@ -45,9 +46,35 @@ static bool action_isValidCF(void *env) { return a->p->vtable->isValidCF(a->p->env); } +static bool h_svm_action_action(HArena *arena, HSVMContext *ctx, void* arg) { + HParseResult res; + HAction action = arg; + assert(ctx->stack_count >= 1); + if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) { + assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK); + res.ast = ctx->stack[ctx->stack_count-2] = ctx->stack[ctx->stack_count-1]; + ctx->stack_count--; + // mark replaced. + } else { + res.ast = NULL; + } + res.arena = arena; + + HParsedToken *tok = action(&res); + if (tok != NULL) + ctx->stack[ctx->stack_count-1] = tok; + else + ctx->stack_count--; + return true; // action can't fail +} + static bool action_ctrvm(HRVMProg *prog, void* env) { HParseAction *a = (HParseAction*)env; - return a->p->vtable->compile_to_rvm(prog, a->p->env); + h_rvm_insert_insn(prog, RVM_PUSH, 0); + if (!h_compile_regex(prog, a->p)) + return false; + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_action, a->action)); + return true; } static const HParserVtable action_vt = { diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index 635806624c25fcff68bf2f827c5908060f7a3a1e..a05caa1f2471aab259e895df961c8402e78b4066 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -1,3 +1,4 @@ +#include <assert.h> #include "parser_internal.h" typedef struct { @@ -48,9 +49,30 @@ static HCFChoice* desugar_ab(HAllocator *mm__, void *env) { return ret; } +static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) { + HParseResult res; + HPredicate pred = arg; + assert(ctx->stack_count >= 1); + if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) { + assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK); + ctx->stack_count--; + res.ast = ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count]; + // mark replaced. + } else { + ctx->stack_count--; + res.ast = NULL; + } + res.arena = arena; + return pred(&res); +} + static bool ab_ctrvm(HRVMProg *prog, void *env) { HAttrBool *ab = (HAttrBool*)env; - return h_compile_regex(prog, ab->p); + h_rvm_insert_insn(prog, RVM_PUSH, 0); + if (!h_compile_regex(prog, ab->p)) + return false; + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_attr_bool, ab->pred)); + return true; } static const HParserVtable attr_bool_vt = { diff --git a/src/parsers/bits.c b/src/parsers/bits.c index 0da8bc1e757e9e8c0fee91a29ac86b735c05c076..e153e3a86f2aa3716583fe37bf5cf44124ec5d9e 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -45,10 +45,10 @@ static HParsedToken *reshape_bits(const HParseResult *p, bool signedp) { return ret; } -static const HParsedToken *reshape_bits_unsigned(const HParseResult *p) { +static HParsedToken *reshape_bits_unsigned(const HParseResult *p) { return reshape_bits(p, false); } -static const HParsedToken *reshape_bits_signed(const HParseResult *p) { +static HParsedToken *reshape_bits_signed(const HParseResult *p) { return reshape_bits(p, true); } diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 32c49910ae72360874705541fa07f0737c938c22..67b3742059e869c357d473575968e9ec610f931e 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -65,6 +65,7 @@ static bool choice_ctrvm(HRVMProg *prog, void* env) { gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 65535); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); } + h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF); // fail. uint16_t jump = h_rvm_get_ip(prog); for (size_t i=0; i<s->len; ++i) { h_rvm_patch_arg(prog, gotos[i], jump); diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 62f45edf4ee7e703d54136ed293a494cf5b7930a..178d97076cf2c677e88c49e62d6dc0456e94f2ae 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -47,7 +47,7 @@ static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) { static bool ignore_ctrvm(HRVMProg *prog, void *env) { HParser *p = (HParser*)env; - h_compile_regex(prog, p->env); + h_compile_regex(prog, p); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL)); return true; } diff --git a/src/parsers/many.c b/src/parsers/many.c index 6f6e8591a2903abdb1a6c2d59783add9aa0823a1..8185203c907b9350e2dc577db515373ab9603e65 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -1,3 +1,4 @@ +#include <assert.h> #include "parser_internal.h" // TODO: split this up. @@ -14,7 +15,7 @@ static HParseResult *parse_many(void* env, HParseState *state) { HInputStream bak; while (env_->min_p || env_->count > count) { bak = state->input_stream; - if (count > 0) { + if (count > 0 && env_->sep != NULL) { HParseResult *sep = h_do_parse(env_->sep, state); if (!sep) goto err0; @@ -47,13 +48,15 @@ static HParseResult *parse_many(void* env, HParseState *state) { static bool many_isValidRegular(void *env) { HRepeat *repeat = (HRepeat*)env; return (repeat->p->vtable->isValidRegular(repeat->p->env) && - repeat->sep->vtable->isValidRegular(repeat->sep->env)); + (repeat->sep == NULL || + repeat->sep->vtable->isValidRegular(repeat->sep->env))); } static bool many_isValidCF(void *env) { HRepeat *repeat = (HRepeat*)env; return (repeat->p->vtable->isValidCF(repeat->p->env) && - repeat->sep->vtable->isValidCF(repeat->sep->env)); + (repeat->sep == NULL || + repeat->sep->vtable->isValidCF(repeat->sep->env))); } static HCFChoice* desugar_many(HAllocator *mm__, void *env) { @@ -70,7 +73,9 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) { -> \epsilon */ - HCFChoice *sep = h_desugar(mm__, repeat->sep); + HParser *epsilon = h_epsilon_p__m(mm__); + + HCFChoice *sep = h_desugar(mm__, (repeat->sep != NULL) ? repeat->sep : epsilon); HCFChoice *a = h_desugar(mm__, repeat->p); HCFChoice *ma = h_new(HCFChoice, 1); HCFChoice *mar = h_new(HCFChoice, 1); @@ -119,24 +124,56 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) { static bool many_ctrvm(HRVMProg *prog, void *env) { HRepeat *repeat = (HRepeat*)env; - // FIXME: Implement clear_to_mark uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL); + // TODO: implement min & max properly. Right now, it's always + // max==inf, min={0,1} + + // Structure: + // Min == 0: + // FORK end // if Min == 0 + // GOTO mid + // nxt: <SEP> + // mid: <ELEM> + // FORK nxt + // end: + + if (repeat->min_p) { h_rvm_insert_insn(prog, RVM_PUSH, 0); - // TODO: implement min and max properly. Right now, it's always min==0, max==inf - uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); - if (!h_compile_regex(prog, repeat->p)) - return false; - if (repeat->sep != NULL) { - h_rvm_insert_insn(prog, RVM_PUSH, 0); - if (!h_compile_regex(prog, repeat->sep)) + assert(repeat->count < 2); // TODO: The other cases should be supported later. + uint16_t end_fork; + if (repeat->count == 0) + end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF); + uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF); + uint16_t nxt = h_rvm_get_ip(prog); + if (repeat->sep != NULL) { + h_rvm_insert_insn(prog, RVM_PUSH, 0); + if (!h_compile_regex(prog, repeat->sep)) + return false; + h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark); + } + h_rvm_patch_arg(prog, goto_mid, h_rvm_get_ip(prog)); + if (!h_compile_regex(prog, repeat->p)) return false; - h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark); + h_rvm_insert_insn(prog, RVM_FORK, nxt); + h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); + + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); + return true; + } else { + h_rvm_insert_insn(prog, RVM_PUSH, 0); + for (size_t i = 0; i < repeat->count; i++) { + if (repeat->sep != NULL && i != 0) { + h_rvm_insert_insn(prog, RVM_PUSH, 0); + if (!h_compile_regex(prog, repeat->sep)) + return false; + h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark); + } + if (!h_compile_regex(prog, repeat->p)) + return false; + } + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); + return true; } - h_rvm_insert_insn(prog, RVM_GOTO, insn); - h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); - - h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); - return true; } static const HParserVtable many_vt = { @@ -153,7 +190,7 @@ HParser* h_many(const HParser* p) { HParser* h_many__m(HAllocator* mm__, const HParser* p) { HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p__m(mm__); + env->sep = NULL; env->count = 0; env->min_p = true; return h_new_parser(mm__, &many_vt, env); @@ -165,7 +202,7 @@ HParser* h_many1(const HParser* p) { HParser* h_many1__m(HAllocator* mm__, const HParser* p) { HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p__m(mm__); + env->sep = NULL; env->count = 1; env->min_p = true; return h_new_parser(mm__, &many_vt, env); @@ -177,7 +214,7 @@ HParser* h_repeat_n(const HParser* p, const size_t n) { HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) { HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p__m(mm__); + env->sep = NULL; env->count = n; env->min_p = false; return h_new_parser(mm__, &many_vt, env); @@ -222,7 +259,7 @@ static HParseResult* parse_length_value(void *env, HParseState *state) { // TODO: allocate this using public functions HRepeat repeat = { .p = lv->value, - .sep = h_epsilon_p(), + .sep = NULL, .count = len->ast->uint, .min_p = false }; diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 9ba2f198c9c77a93ace611f383103227f1826b4c..6cb5331adb3cfd0a36390528c42cfda82ca3c74a 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -22,7 +22,7 @@ static bool opt_isValidCF(void *env) { return p->vtable->isValidCF(p->env); } -static const HParsedToken* reshape_optional(const HParseResult *p) { +static HParsedToken* reshape_optional(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->seq->used > 0); @@ -83,7 +83,7 @@ static bool opt_ctrvm(HRVMProg *prog, void* env) { h_rvm_insert_insn(prog, RVM_PUSH, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); HParser *p = (HParser*) env; - if (!h_compile_regex(prog, p->env)) + if (!h_compile_regex(prog, p)) return false; h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL)); diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 27a667a6cd0229884277744aefeca848745315fd..aa600231d06ccbf461a855c51658909f0c66e866 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -43,7 +43,7 @@ static bool sequence_isValidCF(void *env) { return true; } -static const HParsedToken *reshape_sequence(const HParseResult *p) { +static HParsedToken *reshape_sequence(const HParseResult *p) { assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); @@ -86,7 +86,7 @@ static bool sequence_ctrvm(HRVMProg *prog, void *env) { HSequence *s = (HSequence*)env; h_rvm_insert_insn(prog, RVM_PUSH, 0); for (size_t i=0; i<s->len; ++i) { - if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i])) + if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env)) return false; } h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); diff --git a/src/parsers/token.c b/src/parsers/token.c index 9b1904264f3113f09f60e50b6b2434fb55891f0e..2346a45e0f36d64ccf6a87df07c9ea89f067989d 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -20,7 +20,7 @@ static HParseResult* parse_token(void *env, HParseState *state) { } -static const HParsedToken *reshape_token(const HParseResult *p) { +static HParsedToken *reshape_token(const HParseResult *p) { // fetch sequence of uints from p assert(p->ast); assert(p->ast->token_type == TT_SEQUENCE); diff --git a/src/t_grammar.c b/src/t_grammar.c index 8003bcf64656d20a06106a2635fa47538b6db41e..0287b2fe6eda00a1d6575e619161d18ca9f20639 100644 --- a/src/t_grammar.c +++ b/src/t_grammar.c @@ -15,9 +15,9 @@ static void test_end(void) { } static void test_example_1(void) { - const HParser *c = h_many(h_ch('x')); - const HParser *q = h_sequence(c, h_ch('y'), NULL); - const HParser *p = h_choice(q, h_end_p(), NULL); + HParser *c = h_many(h_ch('x')); + HParser *q = h_sequence(c, h_ch('y'), NULL); + HParser *p = h_choice(q, h_end_p(), NULL); HCFGrammar *g = h_cfgrammar(&system_allocator, p); g_check_nonterminal(g, c); diff --git a/src/t_parser.c b/src/t_parser.c index 961aa4efa8999b6147eb401362ac6201d44860bf..8aab7bb38e4b950e60da93e1c362b4a09ef0bbb0 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -162,7 +162,7 @@ static void test_middle(gconstpointer backend) { #include <ctype.h> -const HParsedToken* upcase(const HParseResult *p) { +HParsedToken* upcase(const HParseResult *p) { switch(p->ast->token_type) { case TT_SEQUENCE: { @@ -180,17 +180,17 @@ const HParsedToken* upcase(const HParseResult *p) { } } ret->seq = seq; - return (const HParsedToken*)ret; + return ret; } case TT_UINT: { HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); ret->token_type = TT_UINT; ret->uint = toupper(p->ast->uint); - return (const HParsedToken*)ret; + return ret; } default: - return p->ast; + return (HParsedToken*)p->ast; } } @@ -526,6 +526,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/regex/choice", GINT_TO_POINTER(PB_REGULAR), test_choice); g_test_add_data_func("/core/parser/regex/many", GINT_TO_POINTER(PB_REGULAR), test_many); g_test_add_data_func("/core/parser/regex/many1", GINT_TO_POINTER(PB_REGULAR), test_many1); + g_test_add_data_func("/core/parser/regex/repeat_n", GINT_TO_POINTER(PB_REGULAR), test_repeat_n); g_test_add_data_func("/core/parser/regex/optional", GINT_TO_POINTER(PB_REGULAR), test_optional); g_test_add_data_func("/core/parser/regex/sepBy", GINT_TO_POINTER(PB_REGULAR), test_sepBy); g_test_add_data_func("/core/parser/regex/sepBy1", GINT_TO_POINTER(PB_REGULAR), test_sepBy1);