From 07d35c72ff07afbdb59a3f51f7ecc5064ddc934d Mon Sep 17 00:00:00 2001
From: "Meredith L. Patterson" <mlp@thesmartpolitenerd.com>
Date: Mon, 22 Apr 2013 18:06:17 -0700
Subject: [PATCH] Most of RVM desugaring done in first draft, for TQ to check.

---
 src/hammer.c                |  5 +++++
 src/internal.h              |  1 +
 src/parsers/action.c        |  6 ++++++
 src/parsers/and.c           |  1 +
 src/parsers/attr_bool.c     |  6 ++++++
 src/parsers/bits.c          | 13 +++++++++++++
 src/parsers/butnot.c        |  1 +
 src/parsers/charset.c       | 21 +++++++++++++++++++++
 src/parsers/choice.c        | 19 +++++++++++++++++++
 src/parsers/difference.c    |  1 +
 src/parsers/end.c           |  6 ++++++
 src/parsers/epsilon.c       |  4 ++--
 src/parsers/ignore.c        |  9 +++++++++
 src/parsers/ignoreseq.c     | 29 ++++++++++++++++++++++++++++-
 src/parsers/indirect.c      |  1 +
 src/parsers/int_range.c     | 21 +++++++++++++++++++++
 src/parsers/many.c          | 19 +++++++++++++++++++
 src/parsers/not.c           |  1 +
 src/parsers/sequence.c      |  2 +-
 src/parsers/token.c         | 12 ++++++++++++
 src/parsers/unimplemented.c |  1 +
 src/parsers/whitespace.c    |  6 ++++++
 src/parsers/xor.c           |  1 +
 23 files changed, 182 insertions(+), 4 deletions(-)

diff --git a/src/hammer.c b/src/hammer.c
index c369f64b..b1d4ec94 100644
--- a/src/hammer.c
+++ b/src/hammer.c
@@ -93,3 +93,8 @@ bool h_true(void* env) {
   (void)env;
   return true;
 }
+
+bool h_not_regular(HRVMProg *prog, void *env) {
+  (void)env;
+  return false;
+}
diff --git a/src/internal.h b/src/internal.h
index 116af899..f7082dd0 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -225,6 +225,7 @@ void  h_hashtable_free(HHashTable* ht);
 
 bool h_false(void*);
 bool h_true(void*);
+bool h_not_regular(HRVMProg*, void*);
 
 #if 0
 #include <stdlib.h>
diff --git a/src/parsers/action.c b/src/parsers/action.c
index 33e33bc3..05a9b224 100644
--- a/src/parsers/action.c
+++ b/src/parsers/action.c
@@ -29,10 +29,16 @@ static bool action_isValidCF(void *env) {
   return a->p->vtable->isValidCF(a->p->env);
 }
 
+static bool action_ctrvm(HRVMProg *prog, void* env) {
+  HParseAction *a = (HParseAction*)env;
+  return a->p->vtable->compile_to_rvm(prog, a->p->env);
+}
+
 static const HParserVtable action_vt = {
   .parse = parse_action,
   .isValidRegular = action_isValidRegular,
   .isValidCF = action_isValidCF,
+  .compile_to_rvm = action_ctrvm,
 };
 
 const HParser* h_action(const HParser* p, const HAction a) {
diff --git a/src/parsers/and.c b/src/parsers/and.c
index ac51be29..6808d7f2 100644
--- a/src/parsers/and.c
+++ b/src/parsers/and.c
@@ -16,6 +16,7 @@ static const HParserVtable and_vt = {
 				to get right, so we're leaving it for a future
 				revision. --mlp, 18/12/12 */
   .isValidCF = h_false,      /* despite TODO above, this remains false. */
+  .compile_to_rvm = h_not_regular,
 };
 
 
diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c
index bfc4976a..2ccabb5f 100644
--- a/src/parsers/attr_bool.c
+++ b/src/parsers/attr_bool.c
@@ -27,10 +27,16 @@ static bool ab_isValidCF(void *env) {
   return ab->p->vtable->isValidCF(ab->p->env);
 }
 
+static bool ab_ctrvm(HRVMProg *prog, void *env) {
+  HAttrBool *ab = (HAttrBool*)env;
+  return h_compile_regex(prog, ab->p);
+}
+
 static const HParserVtable attr_bool_vt = {
   .parse = parse_attr_bool,
   .isValidRegular = ab_isValidRegular,
   .isValidCF = ab_isValidCF,
+  .compile_to_rvm = ab_ctrvm,
 };
 
 
diff --git a/src/parsers/bits.c b/src/parsers/bits.c
index c3a40da6..2f7f8a9d 100644
--- a/src/parsers/bits.c
+++ b/src/parsers/bits.c
@@ -16,11 +16,24 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
   return make_result(state, result);
 }
 
+static bool bits_ctrvm(HRVMProg *prog, void* env) {
+  struct bits_env *env_ = (struct bits_env*)env;
+  h_rvm_insert_insn(prog, RVM_PUSH, 0);
+  for (size_t i=0; (i < env_->length)/8; ++i) { // FUTURE: when we can handle non-byte-aligned, the env_->length/8 part will be different
+    h_rvm_insert_insn(prog, RVM_MATCH, 0xFF00);
+    h_rvm_insert_insn(prog, RVM_STEP, 0);
+  }
+  h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
+  return true;
+}
+
 static const HParserVtable bits_vt = {
   .parse = parse_bits,
   .isValidRegular = h_true,
   .isValidCF = h_true,
+  .compile_to_rvm = bits_ctrvm,
 };
+
 const HParser* h_bits(size_t len, bool sign) {
   return h_bits__m(&system_allocator, len, sign);
 }
diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c
index 4d5bf578..bf0cbd38 100644
--- a/src/parsers/butnot.c
+++ b/src/parsers/butnot.c
@@ -45,6 +45,7 @@ static const HParserVtable butnot_vt = {
   .parse = parse_butnot,
   .isValidRegular = h_false,
   .isValidCF = bn_isValidCF,
+  .compile_to_rvm = h_not_regular,
 };
 
 const HParser* h_butnot(const HParser* p1, const HParser* p2) {
diff --git a/src/parsers/charset.c b/src/parsers/charset.c
index 7341e007..d09c5359 100644
--- a/src/parsers/charset.c
+++ b/src/parsers/charset.c
@@ -32,10 +32,31 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
     return NULL;
 }
 
+// FUTURE: this is horribly inefficient
+static bool cs_ctrvm(HRVMProg *prog, void *env) {
+  HCharset cs = (HCharset)env;
+  uint16_t start = h_rvm_get_ip(prog);
+  for (size_t i=0; i<256; ++i) {
+    if (charset_isset(cs, i)) {
+      uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
+      h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
+      h_rvm_insert_insn(prog, RVM_GOTO, 0);
+      h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
+    }
+  }
+  uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
+  for (size_t i=start; i<jump; ++i) {
+    if (RVM_GOTO == prog->insns[i].op)
+      h_rvm_patch_arg(prog, i, jump);
+  }
+  return true;
+}
+
 static const HParserVtable charset_vt = {
   .parse = parse_charset,
   .isValidRegular = h_true,
   .isValidCF = h_true,
+  .compile_to_rvm = cs_ctrvm,
 };
 
 const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
diff --git a/src/parsers/choice.c b/src/parsers/choice.c
index d48ed229..435437af 100644
--- a/src/parsers/choice.c
+++ b/src/parsers/choice.c
@@ -39,10 +39,29 @@ static bool choice_isValidCF(void *env) {
   return true;
 }
 
+static bool choice_ctrvm(HRVMProg *prog, void* env) {
+  HSequence *s = (HSequence*)env;
+  uint16_t gotos[s->len];
+  uint16_t start = h_rvm_get_ip(prog);
+  for (size_t i=0; i<s->len; ++i) {
+    uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
+    if (!h_compile_regex(prog, s->p_array[i]->env))
+      return false;
+    gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0);
+    h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
+  }
+  uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
+  for (size_t i=start; i<s->len; ++i) {
+      h_rvm_patch_arg(prog, gotos[i], jump);
+  }
+  return true;
+}
+
 static const HParserVtable choice_vt = {
   .parse = parse_choice,
   .isValidRegular = choice_isValidRegular,
   .isValidCF = choice_isValidCF,
+  .compile_to_rvm = choice_ctrvm,
 };
 
 const HParser* h_choice(const HParser* p, ...) {
diff --git a/src/parsers/difference.c b/src/parsers/difference.c
index e3c41ec3..9c378625 100644
--- a/src/parsers/difference.c
+++ b/src/parsers/difference.c
@@ -44,6 +44,7 @@ static HParserVtable difference_vt = {
   .parse = parse_difference,
   .isValidRegular = h_false,
   .isValidCF = diff_isValidCF,
+  .compile_to_rvm = h_not_regular,
 };
 
 const HParser* h_difference(const HParser* p1, const HParser* p2) {
diff --git a/src/parsers/end.c b/src/parsers/end.c
index 45ba37d9..2927cd01 100644
--- a/src/parsers/end.c
+++ b/src/parsers/end.c
@@ -10,10 +10,16 @@ static HParseResult* parse_end(void *env, HParseState *state) {
   }
 }
 
+static bool end_ctrvm(HRVMProg *prog, void *env) {
+  h_rvm_insert_insn(prog, RVM_EOF, 0);
+  return true;
+}
+
 static const HParserVtable end_vt = {
   .parse = parse_end,
   .isValidRegular = h_true,
   .isValidCF = h_true,
+  .compile_to_rvm = end_ctrvm,
 };
 
 const HParser* h_end_p() {
diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c
index 71dcfe74..974cfa32 100644
--- a/src/parsers/epsilon.c
+++ b/src/parsers/epsilon.c
@@ -8,7 +8,7 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
   return res;
 }
 
-static bool episilon_ctrvm(HRVMProg *prog, void* env) {
+static bool epsilon_ctrvm(HRVMProg *prog, void* env) {
   return true;
 }
 
@@ -16,7 +16,7 @@ static const HParserVtable epsilon_vt = {
   .parse = parse_epsilon,
   .isValidRegular = h_true,
   .isValidCF = h_true,
-  .compile_to_rvm = episilon_ctrvm,
+  .compile_to_rvm = epsilon_ctrvm,
 };
 
 static const HParser epsilon_p = {
diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c
index 6191eff9..ee734d81 100644
--- a/src/parsers/ignore.c
+++ b/src/parsers/ignore.c
@@ -1,4 +1,5 @@
 #include "parser_internal.h"
+#include "backends/regex_actions.h"
 
 static HParseResult* parse_ignore(void* env, HParseState* state) {
   HParseResult *res0 = h_do_parse((HParser*)env, state);
@@ -20,10 +21,18 @@ static bool ignore_isValidCF(void *env) {
   return (p->vtable->isValidCF(p->env));
 }
 
+static bool ignore_ctrvm(HRVMProg *prog, void *env) {
+  HParser *p = (HParser*)env;
+  h_compile_regex(prog, p->env);
+  h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop));
+  return true;
+}
+
 static const HParserVtable ignore_vt = {
   .parse = parse_ignore,
   .isValidRegular = ignore_isValidRegular,
   .isValidCF = ignore_isValidCF,
+  .compile_to_rvm = ignore_ctrvm,
 };
 
 const HParser* h_ignore(const HParser* p) {
diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c
index 319f1ebe..97b3e841 100644
--- a/src/parsers/ignoreseq.c
+++ b/src/parsers/ignoreseq.c
@@ -5,7 +5,7 @@
 // general case: parse sequence, pick one result
 //
 
-typedef struct {
+typedef struct HIgnoreSeq_ {
   const HParser **parsers;
   size_t len;         // how many parsers in 'ps'
   size_t which;         // whose result to return
@@ -44,10 +44,37 @@ static bool is_isValidCF(void *env) {
   return true;
 }
 
+static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
+  HIgnoreSeq *seq = (HIgnoreSeq*)env;
+  HParsedToken* save;
+  // We can assume that each subitem generated at most one item on the
+  // stack.
+  for (int i = seq->len - 1; i>=0; i--) {
+    if (i == seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK) 
+      save = ctx->stack[ctx->stack_count-1];
+    // skip over everything up to and including the mark.
+    while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)
+      ;
+  }
+  ctx->stack[ctx->stack_count++] = save;
+}
+
+static bool is_ctrvm(HRVMProg *prog, void* env) {
+  HIgnoreSeq *seq = (HIgnoreSeq*)env;
+  for (size_t i=0; i<seq->len; ++i) {
+    h_rvm_insert_insn(prog, RVM_PUSH, 0);
+    if (!h_compile_regex(prog, seq->parsers[i]->env))
+      return false;
+  }
+  h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_ignoreseq, env));
+  return true;
+}
+
 static const HParserVtable ignoreseq_vt = {
   .parse = parse_ignoreseq,
   .isValidRegular = is_isValidRegular,
   .isValidCF = is_isValidCF,
+  .compile_to_rvm = is_ctrvm,
 };
 
 
diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c
index 4415f067..9cc56866 100644
--- a/src/parsers/indirect.c
+++ b/src/parsers/indirect.c
@@ -14,6 +14,7 @@ static const HParserVtable indirect_vt = {
   .parse = parse_indirect,
   .isValidRegular = h_false,
   .isValidCF = indirect_isValidCF,
+  .compile_to_rvm = h_not_regular,
 };
 
 void h_bind_indirect(HParser* indirect, const HParser* inner) {
diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c
index d1920589..40607d03 100644
--- a/src/parsers/int_range.c
+++ b/src/parsers/int_range.c
@@ -28,10 +28,31 @@ static HParseResult* parse_int_range(void *env, HParseState *state) {
   }
 }
 
+bool h_svm_action_validate_int_range(HArena *arena, HSVMContext *ctx, void* env) {
+  HRange *r_env = (*HRange)env;
+  HParsedToken *head = ctx->stack[ctx->stack_count-1];
+  switch (head-> token_type) {
+  case TT_SINT: 
+    return head->sint >= r_env->lower && head->sint <= r_env->upper;
+  case TT_UINT: 
+    return head->uint >= (uint64_t)r_env->lower && head->uint <= (uint64_t)r_env->upper;
+  default:
+    return false;
+  }
+}
+static bool ir_ctrvm(HRVMProg *prog, void *env) {
+  HRange *r_env = (*HRange)env;
+  
+  h_compile_regex(prog, r_env->p);
+  h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_validate_int_range, env));
+  return false;
+}
+
 static const HParserVtable int_range_vt = {
   .parse = parse_int_range,
   .isValidRegular = h_true,
   .isValidCF = h_true,
+  .compile_to_rvm = ir_ctrvm,
 };
 
 const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) {
diff --git a/src/parsers/many.c b/src/parsers/many.c
index 2c2577ef..8b0786db 100644
--- a/src/parsers/many.c
+++ b/src/parsers/many.c
@@ -56,10 +56,29 @@ static bool many_isValidCF(void *env) {
 	  repeat->sep->vtable->isValidCF(repeat->sep->env));
 }
 
+static bool many_ctrvm(HRVMProg *prog, void *env) {
+  HRepeat *repeat = (HRepeat*)env;
+  // FIXME: Implement clear_to_mark
+  uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
+  h_rvm_insert_insn(prog, RVM_PUSH, 0);
+  uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
+  if (!h_compile_regex(prog, repeat->p))
+    return false;
+  if (!h_compile_regex(prog, repeat->sep))
+    return false;
+  h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
+  h_rvm_insert_insn(prog, RVM_GOTO, insn);
+  h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
+
+  h_rvm_insert_insn(prog, RVM_ACTION, h_svm_action_make_sequence, NULL);
+  return true;
+}
+
 static const HParserVtable many_vt = {
   .parse = parse_many,
   .isValidRegular = many_isValidRegular,
   .isValidCF = many_isValidCF,
+  .compile_to_rvm = many_ctrvm,
 };
 
 const HParser* h_many(const HParser* p) {
diff --git a/src/parsers/not.c b/src/parsers/not.c
index 6642d9e8..b01282cd 100644
--- a/src/parsers/not.c
+++ b/src/parsers/not.c
@@ -14,6 +14,7 @@ static const HParserVtable not_vt = {
   .parse = parse_not,
   .isValidRegular = h_false,  /* see and.c for why */
   .isValidCF = h_false,       /* also see and.c for why */
+  .compile_to_rvm = h_not_regular,
 };
 
 const HParser* h_not(const HParser* p) {
diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c
index 61b74ec5..6bfab0d5 100644
--- a/src/parsers/sequence.c
+++ b/src/parsers/sequence.c
@@ -42,7 +42,7 @@ static bool sequence_isValidCF(void *env) {
   return true;
 }
 
-static bool sequence_ctrvm(struct HRVMProg_ *prog, void* env) {
+static bool sequence_ctrvm(HRVMProg *prog, void *env) {
   HSequence *s = (HSequence*)env;
   for (size_t i=0; i<s->len; ++i) {
     if (!s->p_array[i]->vtable->compile_to_rvm(prog, s->p_array[i]->env))
diff --git a/src/parsers/token.c b/src/parsers/token.c
index f5df9d4c..7050724d 100644
--- a/src/parsers/token.c
+++ b/src/parsers/token.c
@@ -20,10 +20,22 @@ static HParseResult* parse_token(void *env, HParseState *state) {
   return make_result(state, tok);
 }
 
+static bool token_ctrvm(HRVMProg *prog, void *env) {
+  HToken *t = (HToken*)env;
+  h_rvm_insert_insn(prog, RVM_PUSH, 0);
+  for (int i=0; i<t->len; ++i) {
+    h_rvm_insert_insn(prog, RVM_MATCH, t->str[i] & t->str[i] << 8);
+    h_rvm_insert_insn(prog, RVM_STEP, 0);
+  }
+  h_rvm_insert_insn(prog, RVM_CAPTURE, 0);
+  return true;
+}
+
 const HParserVtable token_vt = {
   .parse = parse_token,
   .isValidRegular = h_true,
   .isValidCF = h_true,
+  .compile_to_rvm = token_ctrvm,
 };
 
 const HParser* h_token(const uint8_t *str, const size_t len) {
diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c
index 32838ce5..7c3c6671 100644
--- a/src/parsers/unimplemented.c
+++ b/src/parsers/unimplemented.c
@@ -16,6 +16,7 @@ static const HParserVtable unimplemented_vt = {
   .parse = parse_unimplemented,
   .isValidRegular = h_false,
   .isValidCF = h_false,
+  .compile_to_rvm = h_not_regular,
 };
 
 static HParser unimplemented = {
diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c
index 7b2477be..325b4318 100644
--- a/src/parsers/whitespace.c
+++ b/src/parsers/whitespace.c
@@ -24,10 +24,16 @@ static bool ws_isValidCF(void *env) {
   return p->vtable->isValidCF(p->env);
 }
 
+static bool ws_ctrvm(HRVMProg *prog, void *env) {
+  HParser *p = (HParser*)env;
+  return h_compile_regex(prog, p->env);
+}
+
 static const HParserVtable whitespace_vt = {
   .parse = parse_whitespace,
   .isValidRegular = ws_isValidRegular,
   .isValidCF = ws_isValidCF,
+  .compile_to_rvm = ws_ctrvm,
 };
 
 const HParser* h_whitespace(const HParser* p) {
diff --git a/src/parsers/xor.c b/src/parsers/xor.c
index 7679a645..d6fff082 100644
--- a/src/parsers/xor.c
+++ b/src/parsers/xor.c
@@ -41,6 +41,7 @@ static const HParserVtable xor_vt = {
   .parse = parse_xor,
   .isValidRegular = h_false,
   .isValidCF = xor_isValidCF,
+  .compile_to_rvm = h_not_regular,
 };
 
 const HParser* h_xor(const HParser* p1, const HParser* p2) {
-- 
GitLab