From f7e7f92408f9b43eb1088f3057ed8f0cfee11125 Mon Sep 17 00:00:00 2001
From: "Meredith L. Patterson" <clonearmy@gmail.com>
Date: Tue, 1 May 2012 03:21:14 +0100
Subject: [PATCH] Parsers for sequence and choice are done. Got rid of
 join_action, we didn't need it. parse_result_t now contains a single
 parsed_token_t (and whatever we decide to jam into it later), so that
 sequence makes sense.

---
 src/hammer.c | 141 +++++++++++++++++++++------------------------------
 src/hammer.h |  28 +++++++---
 2 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/src/hammer.c b/src/hammer.c
index b0671867..e5d9bc32 100644
--- a/src/hammer.c
+++ b/src/hammer.c
@@ -75,7 +75,7 @@ parse_result_t* get_cached(parse_state_t *ps, const parser_t *p) {
   }
 }
 
-int put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) {
+void put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) {
   gpointer t = g_hash_table_lookup(ps->cache, p);
   if (NULL != t) {
     g_hash_table_insert(t, GUINT_TO_POINTER(djbhash(ps->input_stream.index, ps->input_stream.length)), (gpointer)cached); 
@@ -89,9 +89,9 @@ int put_cached(parse_state_t *ps, const parser_t *p, parse_result_t *cached) {
 parse_result_t* do_parse(const parser_t* parser, parse_state_t *state);
 
 /* Helper function, since these lines appear in every parser */
-inline parse_result_t* make_result(GSequence *ast) {
+inline parse_result_t* make_result(parsed_token_t *tok) {
   parse_result_t *ret = g_new(parse_result_t, 1);
-  ret->ast = ast;
+  ret->ast = tok;
   return ret;
 }
 
@@ -109,10 +109,8 @@ static parse_result_t* parse_token(void *env, parse_state_t *state) {
     }
   }
   parsed_token_t *tok = g_new(parsed_token_t, 1);
-  tok->token = t->str; tok->len = t->len;
-  GSequence *ast = g_sequence_new(NULL);
-  g_sequence_append(ast, tok);
-  return make_result(ast);
+  tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len;
+  return make_result(tok);
 }
 
 const parser_t* token(const uint8_t *str, const size_t len) { 
@@ -128,10 +126,8 @@ static parse_result_t* parse_ch(void* env, parse_state_t *state) {
   uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false);
   if (c == r) {
     parsed_token_t *tok = g_new(parsed_token_t, 1);    
-    tok->token = GUINT_TO_POINTER(c); tok->len = 1;
-    GSequence *ast = g_sequence_new(NULL);
-    g_sequence_append(ast, tok);
-    return make_result(ast);
+    tok->token_type = TT_UINT; tok->uint = r;
+    return make_result(tok);
   } else {
     return NULL;
   }
@@ -153,10 +149,8 @@ static parse_result_t* parse_range(void* env, parse_state_t *state) {
   uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false);
   if (range->lower <= r && range->upper >= r) {
     parsed_token_t *tok = g_new(parsed_token_t, 1);
-    tok->token = GUINT_TO_POINTER(r); tok->len = 1;
-    GSequence *ast = g_sequence_new(NULL);
-    g_sequence_append(ast, tok);
-    return make_result(ast);
+    tok->token_type = TT_UINT; tok->uint = r;
+    return make_result(tok);
   } else {
     return NULL;
   }
@@ -172,67 +166,6 @@ const parser_t* range(const uint8_t lower, const uint8_t upper) {
 const parser_t* whitespace(const parser_t* p) { return NULL; }
 //const parser_t* action(const parser_t* p, /* fptr to action on AST */) { return NULL; }
 
-typedef struct {
-  parser_t *parser;
-  uint8_t *sep;
-  size_t len;
-} join_t;
-
-void join_collect(gpointer tok, gpointer ret) {
-  size_t sz = GPOINTER_TO_SIZE(ret);
-  sz += ((parsed_token_t*)tok)->len;
-  ret = GSIZE_TO_POINTER(sz);
-}
-
-static parse_result_t* parse_join(void *env, parse_state_t *state) {
-  join_t *j = (join_t*)env;
-  parse_result_t *result = do_parse(j->parser, state);
-  size_t num_tokens = g_sequence_get_length((GSequence*)result->ast);
-  if (0 < num_tokens) {
-    gpointer sz = GSIZE_TO_POINTER(0);
-    // aggregate length of tokens in AST
-    g_sequence_foreach((GSequence*)result->ast, join_collect, sz);
-    // plus aggregate length of all separators
-    size_t ret_len = GPOINTER_TO_SIZE(sz) + (num_tokens - 1) * j->len;
-    gpointer ret_str = g_malloc(ret_len);
-    // first the first token ...
-    GSequenceIter *it = g_sequence_get_begin_iter((GSequence*)result->ast);
-    parsed_token_t *tok = g_sequence_get(it);
-    memcpy(ret_str, tok->token, tok->len);
-    ret_str += tok->len;
-    // if there was only one token, don't enter the while loop
-    it = g_sequence_iter_next(it);
-    while (!g_sequence_iter_is_end(it)) {
-      // add a separator
-      memcpy(ret_str, j->sep, j->len);
-      ret_str += j->len;
-      // then the next token
-      tok = g_sequence_get(it);
-      memcpy(ret_str, tok->token, tok->len);
-      // finally, advance the pointer and the iterator
-      ret_str += tok->len;
-      it = g_sequence_iter_next(it);
-    }
-    // reset the return pointer and construct the return parse_result_t
-    ret_str -= ret_len;
-    parsed_token_t *ret_tok = g_new(parsed_token_t, 1);
-    ret_tok->token = ret_str; ret_tok->len = ret_len;
-    GSequence *ast = g_sequence_new(NULL);
-    g_sequence_append(ast, tok);
-    return make_result(ast);
-  } else {
-    return NULL;
-  }
-}
-
-const parser_t* join_action(const parser_t* p, const uint8_t *sep, const size_t len) {  
-  join_t *j = g_new(join_t, 1);
-  j->parser = (parser_t*)p; j->sep = (uint8_t*)sep; j->len = len;
-  parser_t *ret = g_new(parser_t, 1);
-  ret->fn = parse_join; ret->env = (void*)j;
-  return (const parser_t*)ret;
-}
-
 const parser_t* left_factor_action(const parser_t* p) { return NULL; }
 
 static parse_result_t* parse_negate(void *env, parse_state_t *state) {
@@ -241,10 +174,8 @@ static parse_result_t* parse_negate(void *env, parse_state_t *state) {
   if (NULL == result) {
     uint8_t r = (uint8_t)read_bits(&state->input_stream, 8, false);
     parsed_token_t *tok = g_new(parsed_token_t, 1);    
-    tok->token = GUINT_TO_POINTER(r); tok->len = 1;
-    GSequence *ast = g_sequence_new(NULL);
-    g_sequence_append(ast, tok);
-    return make_result(ast);    
+    tok->token_type = TT_UINT; tok->uint = r;
+    return make_result(tok);    
   } else {
     return NULL;
   }
@@ -276,8 +207,54 @@ const parser_t* nothing_p() {
   // not a mistake, this parser always fails
   return NULL; 
 }
-const parser_t* sequence(const parser_t* p_array[]) { return NULL; }
-const parser_t* choice(const parser_t* p_array[]) { return NULL; }
+
+typedef struct {
+  size_t len;
+  const parser_t **p_array;
+} sequence_t;
+
+static parse_result_t* parse_sequence(void *env, parse_state_t *state) {
+  sequence_t *s = (sequence_t*)env;
+  GSequence *seq = g_sequence_new(NULL);
+  for (int i=0; i<s->len; ++i) {
+    parse_result_t *tmp = do_parse(s->p_array[i], state);
+    g_sequence_append(seq, tmp);
+  }
+  parsed_token_t *tok = g_new(parsed_token_t, 1);
+  tok->token_type = TT_SEQUENCE; tok->seq = seq;
+  return make_result(tok);
+}
+
+const parser_t* sequence(const parser_t* p_array[]) { 
+  size_t len = sizeof(p_array) / sizeof(parser_t*);
+  sequence_t *s = g_new(sequence_t, 1);
+  s->p_array = (const parser_t**)p_array; s->len = len;
+  parser_t *ret = g_new(parser_t, 1);
+  ret->fn = parse_sequence; ret->env = (void*)s;
+  return ret;
+}
+
+static parse_result_t* parse_choice(void *env, parse_state_t *state) {
+  sequence_t *s = (sequence_t*)env;
+  GSequence *seq = g_sequence_new(NULL);
+  for (int i=0; i<s->len; ++i) {
+    parse_result_t *tmp = do_parse(s->p_array[i], state);
+    if (NULL != tmp)
+      return tmp;
+  }
+  // nothing succeeded, so fail
+  return NULL;
+}
+
+const parser_t* choice(const parser_t* p_array[]) { 
+  size_t len = sizeof(p_array) / sizeof(parser_t*);
+  sequence_t *s = g_new(sequence_t, 1);
+  s->p_array = (const parser_t**)p_array; s->len = len;
+  parser_t *ret = g_new(parser_t, 1);
+  ret->fn = parse_choice; ret->env = (void*)s;
+  return ret;
+}
+
 const parser_t* butnot(const parser_t* p1, const parser_t* p2) { return NULL; }
 const parser_t* difference(const parser_t* p1, const parser_t* p2) { return NULL; }
 const parser_t* xor(const parser_t* p1, const parser_t* p2) { return NULL; }
diff --git a/src/hammer.h b/src/hammer.h
index add5c743..f2e46add 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -54,13 +54,30 @@ typedef struct parse_state {
   input_stream_t input_stream;
 } parse_state_t;
 
+typedef enum token_type {
+  TT_NONE,
+  TT_BYTES,
+  TT_SINT,
+  TT_UINT,
+  TT_SEQUENCE,
+  TT_MAX
+} token_type_t;
+
 typedef struct parsed_token {
-  const uint8_t *token;
-  size_t len;
+  token_type_t token_type;
+  union {
+    struct {
+      const uint8_t *token;
+      size_t len;
+    } bytes;
+    int64_t sint;
+    uint64_t uint;
+    GSequence *seq;
+  };
 } parsed_token_t;
 
 typedef struct parse_result {
-  const GSequence *ast;
+  const parsed_token_t *ast;
 } parse_result_t;
 
 typedef struct parser {
@@ -85,9 +102,6 @@ const parser_t* whitespace(const parser_t* p);
 /* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */
 //const parser_t* action(const parser_t* p, /* fptr to action on AST */);
 
-/* Given another parser, p, and a separator, sep, returns a parser that applies p, then joins everything in the AST of p's result with sep. For example, if the AST of p's result is {"dog", "cat", "hedgehog"} and sep is "|", the AST of this parser's result will be {"dog|cat|hedgehog"}. */
-const parser_t* join_action(const parser_t* p, const uint8_t *sep, const size_t len);
-
 const parser_t* left_factor_action(const parser_t* p);
 
 /* Given a single-character parser, p, returns a single-character parser that will parse any character *other* than the character p would parse. */
@@ -99,7 +113,7 @@ const parser_t* end_p();
 /* This parser always fails. */
 const parser_t* nothing_p();
 
-
+/* Given an array of parsers, p_array, apply each parser in order. The parse succeeds only if all parsers succeed. */
 const parser_t* sequence(const parser_t* p_array[]);
 const parser_t* choice(const parser_t* p_array[]);
 const parser_t* butnot(const parser_t* p1, const parser_t* p2);
-- 
GitLab