From 19e36e1cfe2c0cc4aa5f696f3a9b9595704ba888 Mon Sep 17 00:00:00 2001
From: "Meredith L. Patterson" <clonearmy@gmail.com>
Date: Thu, 3 May 2012 01:40:23 +0100
Subject: [PATCH] Fixed build (completed butnot combinator). Also, fixed bug
 where sequence wasn't bailing out if one parser failed.

---
 NOTES        |  6 +++++
 src/hammer.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++---
 src/hammer.h |  3 +++
 3 files changed, 75 insertions(+), 4 deletions(-)
 create mode 100644 NOTES

diff --git a/NOTES b/NOTES
new file mode 100644
index 00000000..4d89c709
--- /dev/null
+++ b/NOTES
@@ -0,0 +1,6 @@
+NOTES
+=====
+
+Regarding parse_result_t:
+If a parse fails, the parse_result_t will be NULL.
+If a parse is successful but there's nothing there (i.e., if end_p succeeds), then there's a parse_result_t but its ast is NULL.
\ No newline at end of file
diff --git a/src/hammer.c b/src/hammer.c
index ced2c3c1..3198022c 100644
--- a/src/hammer.c
+++ b/src/hammer.c
@@ -218,7 +218,12 @@ static parse_result_t* parse_sequence(void *env, parse_state_t *state) {
   GSequence *seq = g_sequence_new(NULL);
   for (size_t i=0; i<s->len; ++i) {
     parse_result_t *tmp = do_parse(s->p_array[i], state);
-    g_sequence_append(seq, tmp);
+    // if the interim parse fails, the whole thing fails
+    if (NULL == tmp) {
+      return NULL;
+    } else {
+      g_sequence_append(seq, tmp);
+    }
   }
   parsed_token_t *tok = g_new(parsed_token_t, 1);
   tok->token_type = TT_SEQUENCE; tok->seq = seq;
@@ -259,16 +264,73 @@ typedef struct {
   const parser_t *p2;
 } two_parsers_t;
 
+void accumulate_size(gpointer pr, gpointer acc) {
+  size_t tmp = GPOINTER_TO_SIZE(acc);
+  if (NULL != ((parse_result_t*)pr)->ast) {
+    switch(((parse_result_t*)pr)->ast->token_type) {
+    case TT_BYTES:
+      tmp += ((parse_result_t*)pr)->ast->bytes.len;
+      acc = GSIZE_TO_POINTER(tmp);
+      break;
+    case TT_SINT:
+    case TT_UINT:
+      tmp += 8;
+      acc = GSIZE_TO_POINTER(tmp);
+      break;
+    case TT_SEQUENCE:
+      g_sequence_foreach(((parse_result_t*)pr)->ast->seq, accumulate_size, acc);
+      break;
+    default:
+      break;
+    }
+  } // no else, if the AST is null then acc doesn't change
+}
+
+size_t token_length(parse_result_t *pr) {
+  size_t ret = 0;
+  if (NULL == pr) {
+    return ret;
+  } else {
+    accumulate_size(pr, GSIZE_TO_POINTER(ret));
+  }
+  return ret;
+}
 
-static parse_result_t parse_butnot(void *env, parse_state_t *state) {
+static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
   two_parsers_t *parsers = (two_parsers_t*)env;
-  input_stream_t tmp_state = state->input_stream;
-  
+  // cache the initial state of the input stream
+  input_stream_t start_state = state->input_stream;
+  parse_result_t *r1 = do_parse(parsers->p1, state);
+  // if r1 is null, bail out early
+  if (NULL == r1) {
+    return NULL;
+  } 
+  // cache the state after parse #1, since we might have to back up to it
+  input_stream_t after_p1_state = state->input_stream;
+  state->input_stream = start_state;
+  parse_result_t *r2 = do_parse(parsers->p2, state);
+  // TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
+  state->input_stream = after_p1_state;
+  // if r2 is null, restore post-p1 state and bail out early
+  if (NULL == r2) {
+    return r1;
+  }
+  size_t r1len = token_length(r1);
+  size_t r2len = token_length(r2);
+  // if both match but p1's text is longer than p2's, fail
+  if (r1len > r2len) {
+    return NULL;
+  } else {
+    return r1;
+  }
 }
 
 const parser_t* butnot(const parser_t* p1, const parser_t* p2) { 
   two_parsers_t *env = g_new(two_parsers_t, 1);
   env->p1 = p1; env->p2 = p2;
+  parser_t *ret = g_new(parser_t, 1);
+  ret->fn = parse_butnot; ret->env = (void*)env;
+  return ret;
 }
 
 const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
diff --git a/src/hammer.h b/src/hammer.h
index 040007de..af153cc3 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -76,6 +76,9 @@ typedef struct parsed_token {
   };
 } parsed_token_t;
 
+/* If a parse fails, the parse result will be NULL.
+ * If a parse is successful but there's nothing there (i.e., if end_p succeeds) then there's a parse result but its ast is NULL.
+ */
 typedef struct parse_result {
   const parsed_token_t *ast;
 } parse_result_t;
-- 
GitLab