diff --git a/src/hammer.c b/src/hammer.c index c369f64b72ab487ff30e9aff553b5310667c9d43..b1d4ec94a6353e00aa84db1e70379d58e3bb1d2b 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -93,3 +93,8 @@ bool h_true(void* env) { (void)env; return true; } + +bool h_not_regular(HRVMProg *prog, void *env) { + (void)env; + return false; +} diff --git a/src/internal.h b/src/internal.h index 116af899a1162d14b84d3bca3845c4c12c974f00..f7082dd0c05223fb3f6d416e861341ab9d5ccdb9 100644 --- a/src/internal.h +++ b/src/internal.h @@ -225,6 +225,7 @@ void h_hashtable_free(HHashTable* ht); bool h_false(void*); bool h_true(void*); +bool h_not_regular(HRVMProg*, void*); #if 0 #include <stdlib.h> diff --git a/src/parsers/action.c b/src/parsers/action.c index 33e33bc38844fbdb114605cae12368fa3cc96190..05a9b2240d22bbb24d210a86cd547106c22c06a4 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -29,10 +29,16 @@ static bool action_isValidCF(void *env) { return a->p->vtable->isValidCF(a->p->env); } +static bool action_ctrvm(HRVMProg *prog, void* env) { + HParseAction *a = (HParseAction*)env; + return a->p->vtable->compile_to_rvm(prog, a->p->env); +} + static const HParserVtable action_vt = { .parse = parse_action, .isValidRegular = action_isValidRegular, .isValidCF = action_isValidCF, + .compile_to_rvm = action_ctrvm, }; const HParser* h_action(const HParser* p, const HAction a) { diff --git a/src/parsers/and.c b/src/parsers/and.c index ac51be2902a12c8d1a327c7868dccae2beaf6ba9..6808d7f2865f1a6f23dd1232415fb4d7be83c013 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -16,6 +16,7 @@ static const HParserVtable and_vt = { to get right, so we're leaving it for a future revision. --mlp, 18/12/12 */ .isValidCF = h_false, /* despite TODO above, this remains false. */ + .compile_to_rvm = h_not_regular, }; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index bfc4976a62da7a83f17a7312874f783afc5a6cbd..2ccabb5f3c48df564017bcf378483ce05c9de5be 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -27,10 +27,16 @@ static bool ab_isValidCF(void *env) { return ab->p->vtable->isValidCF(ab->p->env); } +static bool ab_ctrvm(HRVMProg *prog, void *env) { + HAttrBool *ab = (HAttrBool*)env; + return h_compile_regex(prog, ab->p); +} + static const HParserVtable attr_bool_vt = { .parse = parse_attr_bool, .isValidRegular = ab_isValidRegular, .isValidCF = ab_isValidCF, + .compile_to_rvm = ab_ctrvm, }; diff --git a/src/parsers/bits.c b/src/parsers/bits.c index c3a40da656cce7e9755601fbda5e5975fd9bed4c..2f7f8a9dcd9eaf00de9ccbdd8b805e149230de5b 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -16,11 +16,24 @@ static HParseResult* parse_bits(void* env, HParseState *state) { return make_result(state, result); } +static bool bits_ctrvm(HRVMProg *prog, void* env) { + struct bits_env *env_ = (struct bits_env*)env; + h_rvm_insert_insn(prog, RVM_PUSH, 0); + for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different + h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00); + h_rvm_insert_insn(prog, RVM_STEP, 0); + } + h_rvm_insert_insn(prog, RVM_CAPTURE, 0); + return true; +} + static const HParserVtable bits_vt = { .parse = parse_bits, .isValidRegular = h_true, .isValidCF = h_true, + .compile_to_rvm = bits_ctrvm, }; + const HParser* h_bits(size_t len, bool sign) { return h_bits__m(&system_allocator, len, sign); } diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 4d5bf5783ef04b7af1fb6bfaac01c84920d4b5bb..bf0cbd38e477a93f2a39d70b85b82bd8edf827a6 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -45,6 +45,7 @@ static const HParserVtable butnot_vt = { .parse = parse_butnot, .isValidRegular = h_false, .isValidCF = bn_isValidCF, + .compile_to_rvm = h_not_regular, }; const HParser* h_butnot(const HParser* p1, const HParser* p2) { diff --git a/src/parsers/charset.c b/src/parsers/charset.c index 7341e0074372bab16bd219aacf5c46a3c4346959..d09c53592fa6a62439d850cd887645bc81a20a5b 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -32,10 +32,31 @@ static HParseResult* parse_charset(void *env, HParseState *state) { return NULL; } +// FUTURE: this is horribly inefficient +static bool cs_ctrvm(HRVMProg *prog, void *env) { + HCharset cs = (HCharset)env; + uint16_t start = h_rvm_get_ip(prog); + for (size_t i=0; i<256; ++i) { + if (charset_isset(cs, i)) { + uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); + h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8); + h_rvm_insert_insn(prog, RVM_GOTO, 0); + h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); + } + } + uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0); + for (size_t i=start; i<jump; ++i) { + if (RVM_GOTO == prog->insns[i].op) + h_rvm_patch_arg(prog, i, jump); + } + return true; +} + static const HParserVtable charset_vt = { .parse = parse_charset, .isValidRegular = h_true, .isValidCF = h_true, + .compile_to_rvm = cs_ctrvm, }; const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { diff --git a/src/parsers/choice.c b/src/parsers/choice.c index d48ed229db32b6d9b542f3054298514a8f717542..435437afcbbedf598fdf699dc2f82a62c033fd8c 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -39,10 +39,29 @@ static bool choice_isValidCF(void *env) { return true; } +static bool choice_ctrvm(HRVMProg *prog, void* env) { + HSequence *s = (HSequence*)env; + uint16_t gotos[s->len]; + uint16_t start = h_rvm_get_ip(prog); + for (size_t i=0; i<s->len; ++i) { + uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); + if (!h_compile_regex(prog, s->p_array[i]->env)) + return false; + gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0); + h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); + } + uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0); + for (size_t i=start; i<s->len; ++i) { + h_rvm_patch_arg(prog, gotos[i], jump); + } + return true; +} + static const HParserVtable choice_vt = { .parse = parse_choice, .isValidRegular = choice_isValidRegular, .isValidCF = choice_isValidCF, + .compile_to_rvm = choice_ctrvm, }; const HParser* h_choice(const HParser* p, ...) { diff --git a/src/parsers/difference.c b/src/parsers/difference.c index e3c41ec33a7f2e36b80f681bf0de9e8f71faa2b4..9c378625275964c9819a71f9d0bd3f240b24daea 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -44,6 +44,7 @@ static HParserVtable difference_vt = { .parse = parse_difference, .isValidRegular = h_false, .isValidCF = diff_isValidCF, + .compile_to_rvm = h_not_regular, }; const HParser* h_difference(const HParser* p1, const HParser* p2) { diff --git a/src/parsers/end.c b/src/parsers/end.c index 45ba37d99ae0df6a282df0540513064b3ecf030a..2927cd0198bbfcc42950fb8ae186953fd4d2026f 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -10,10 +10,16 @@ static HParseResult* parse_end(void *env, HParseState *state) { } } +static bool end_ctrvm(HRVMProg *prog, void *env) { + h_rvm_insert_insn(prog, RVM_EOF, 0); + return true; +} + static const HParserVtable end_vt = { .parse = parse_end, .isValidRegular = h_true, .isValidCF = h_true, + .compile_to_rvm = end_ctrvm, }; const HParser* h_end_p() { diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index 71dcfe74a7d6ac8739296ea39477ed7acdd53fd2..974cfa3223eca02b55d9dd3fe760b55448ac8694 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -8,7 +8,7 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) { return res; } -static bool episilon_ctrvm(HRVMProg *prog, void* env) { +static bool epsilon_ctrvm(HRVMProg *prog, void* env) { return true; } @@ -16,7 +16,7 @@ static const HParserVtable epsilon_vt = { .parse = parse_epsilon, .isValidRegular = h_true, .isValidCF = h_true, - .compile_to_rvm = episilon_ctrvm, + .compile_to_rvm = epsilon_ctrvm, }; static const HParser epsilon_p = { diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 6191eff91ffe068049b597ef11662fc4311a924c..ee734d815e6558d368e0f3754629d3be9e92dc71 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -1,4 +1,5 @@ #include "parser_internal.h" +#include "backends/regex_actions.h" static HParseResult* parse_ignore(void* env, HParseState* state) { HParseResult *res0 = h_do_parse((HParser*)env, state); @@ -20,10 +21,18 @@ static bool ignore_isValidCF(void *env) { return (p->vtable->isValidCF(p->env)); } +static bool ignore_ctrvm(HRVMProg *prog, void *env) { + HParser *p = (HParser*)env; + h_compile_regex(prog, p->env); + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop)); + return true; +} + static const HParserVtable ignore_vt = { .parse = parse_ignore, .isValidRegular = ignore_isValidRegular, .isValidCF = ignore_isValidCF, + .compile_to_rvm = ignore_ctrvm, }; const HParser* h_ignore(const HParser* p) { diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 319f1ebe3eb3058a78e03bec6b39112b1a19f6d0..97b3e841c994011a97df84e291f53f549a0c745a 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -5,7 +5,7 @@ // general case: parse sequence, pick one result // -typedef struct { +typedef struct HIgnoreSeq_ { const HParser **parsers; size_t len; // how many parsers in 'ps' size_t which; // whose result to return @@ -44,10 +44,37 @@ static bool is_isValidCF(void *env) { return true; } +static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) { + HIgnoreSeq *seq = (HIgnoreSeq*)env; + HParsedToken* save; + // We can assume that each subitem generated at most one item on the + // stack. + for (int i = seq->len - 1; i>=0; i--) { + if (i == seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK) + save = ctx->stack[ctx->stack_count-1]; + // skip over everything up to and including the mark. + while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK) + ; + } + ctx->stack[ctx->stack_count++] = save; +} + +static bool is_ctrvm(HRVMProg *prog, void* env) { + HIgnoreSeq *seq = (HIgnoreSeq*)env; + for (size_t i=0; i<seq->len; ++i) { + h_rvm_insert_insn(prog, RVM_PUSH, 0); + if (!h_compile_regex(prog, seq->parsers[i]->env)) + return false; + } + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env)); + return true; +} + static const HParserVtable ignoreseq_vt = { .parse = parse_ignoreseq, .isValidRegular = is_isValidRegular, .isValidCF = is_isValidCF, + .compile_to_rvm = is_ctrvm, }; diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 4415f067e8f5e26ddf29f4490c33af03b90613f1..9cc568660403bc664562165538badc7a336a32a1 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -14,6 +14,7 @@ static const HParserVtable indirect_vt = { .parse = parse_indirect, .isValidRegular = h_false, .isValidCF = indirect_isValidCF, + .compile_to_rvm = h_not_regular, }; void h_bind_indirect(HParser* indirect, const HParser* inner) { diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index d1920589a90bbda21ad1ebf8af48135e69a45774..40607d030ef6815a800944580becfe3997644992 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -28,10 +28,31 @@ static HParseResult* parse_int_range(void *env, HParseState *state) { } } +bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) { + HRange *r_env = (*HRange)env; + HParsedToken *head = ctx->stack[ctx->stack_count-1]; + switch (head-> token_type) { + case TT_SINT: + return head->sint >= r_env->lower && head->sint <= r_env->upper; + case TT_UINT: + return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper; + default: + return false; + } +} +static bool ir_ctrvm(HRVMProg *prog, void *env) { + HRange *r_env = (*HRange)env; + + h_compile_regex(prog, r_env->p); + h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env)); + return false; +} + static const HParserVtable int_range_vt = { .parse = parse_int_range, .isValidRegular = h_true, .isValidCF = h_true, + .compile_to_rvm = ir_ctrvm, }; const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { diff --git a/src/parsers/many.c b/src/parsers/many.c index 2c2577efc2811e97e5373a2169dc4a48fb281118..8b0786db76defac86ac5722a736edd5999dd559e 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -56,10 +56,29 @@ static bool many_isValidCF(void *env) { repeat->sep->vtable->isValidCF(repeat->sep->env)); } +static bool many_ctrvm(HRVMProg *prog, void *env) { + HRepeat *repeat = (HRepeat*)env; + // FIXME: Implement clear_to_mark + uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL); + h_rvm_insert_insn(prog, RVM_PUSH, 0); + uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); + if (!h_compile_regex(prog, repeat->p)) + return false; + if (!h_compile_regex(prog, repeat->sep)) + return false; + h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark); + h_rvm_insert_insn(prog, RVM_GOTO, insn); + h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); + + h_rvm_insert_insn(prog, RVM_ACTION, h_svm_action_make_sequence, NULL); + return true; +} + static const HParserVtable many_vt = { .parse = parse_many, .isValidRegular = many_isValidRegular, .isValidCF = many_isValidCF, + .compile_to_rvm = many_ctrvm, }; const HParser* h_many(const HParser* p) { diff --git a/src/parsers/not.c b/src/parsers/not.c index 6642d9e838d3e1f78a2737144b273e061f825b43..b01282cd9199ea2cbeccfb209146e30ddc25695d 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -14,6 +14,7 @@ static const HParserVtable not_vt = { .parse = parse_not, .isValidRegular = h_false, /* see and.c for why */ .isValidCF = h_false, /* also see and.c for why */ + .compile_to_rvm = h_not_regular, }; const HParser* h_not(const HParser* p) { diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 61b74ec547095302487422f564cde417a4814d4b..6bfab0d5a1df4f459786f8473f52def392ebae09 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -42,7 +42,7 @@ static bool sequence_isValidCF(void *env) { return true; } -static bool sequence_ctrvm(struct HRVMProg_ *prog, void* env) { +static bool sequence_ctrvm(HRVMProg *prog, void *env) { HSequence *s = (HSequence*)env; for (size_t i=0; i<s->len; ++i) { if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env)) diff --git a/src/parsers/token.c b/src/parsers/token.c index f5df9d4c3c3a84829a026b66fcb02c849db1d530..7050724d48ba2cdc7732a96e48f95bd3df1b84c9 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -20,10 +20,22 @@ static HParseResult* parse_token(void *env, HParseState *state) { return make_result(state, tok); } +static bool token_ctrvm(HRVMProg *prog, void *env) { + HToken *t = (HToken*)env; + h_rvm_insert_insn(prog, RVM_PUSH, 0); + for (int i=0; i<t->len; ++i) { + h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8); + h_rvm_insert_insn(prog, RVM_STEP, 0); + } + h_rvm_insert_insn(prog, RVM_CAPTURE, 0); + return true; +} + const HParserVtable token_vt = { .parse = parse_token, .isValidRegular = h_true, .isValidCF = h_true, + .compile_to_rvm = token_ctrvm, }; const HParser* h_token(const uint8_t *str, const size_t len) { diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 32838ce5ce499569491c4fd6599d97a4947a54b5..7c3c6671f8946fc61dd7f33ccd5971cf00e88ed8 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -16,6 +16,7 @@ static const HParserVtable unimplemented_vt = { .parse = parse_unimplemented, .isValidRegular = h_false, .isValidCF = h_false, + .compile_to_rvm = h_not_regular, }; static HParser unimplemented = { diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 7b2477bef0bfd67a41adcce99c0e5b0662b87c08..325b43181fbd7eb56336a1904cf8e3516221a775 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -24,10 +24,16 @@ static bool ws_isValidCF(void *env) { return p->vtable->isValidCF(p->env); } +static bool ws_ctrvm(HRVMProg *prog, void *env) { + HParser *p = (HParser*)env; + return h_compile_regex(prog, p->env); +} + static const HParserVtable whitespace_vt = { .parse = parse_whitespace, .isValidRegular = ws_isValidRegular, .isValidCF = ws_isValidCF, + .compile_to_rvm = ws_ctrvm, }; const HParser* h_whitespace(const HParser* p) { diff --git a/src/parsers/xor.c b/src/parsers/xor.c index 7679a645055660960c6d4f674fd30b69e81990f9..d6fff08238e8fe86df61207a62a2c4dd173f2f2d 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -41,6 +41,7 @@ static const HParserVtable xor_vt = { .parse = parse_xor, .isValidRegular = h_false, .isValidCF = xor_isValidCF, + .compile_to_rvm = h_not_regular, }; const HParser* h_xor(const HParser* p1, const HParser* p2) {