From 32dfae0af3db9eedfae6d97b02677cbd3e9e97db Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 13 Jan 2013 18:19:59 +0100
Subject: [PATCH] add a variant of the base64 example with coarse-grained
 semantic actions

---
 examples/Makefile      |   6 +-
 examples/base64_sem2.c | 207 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 examples/base64_sem2.c

diff --git a/examples/Makefile b/examples/Makefile
index a3be0ce0..98797f3d 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -4,7 +4,9 @@ OUTPUTS := dns.o \
 	   base64.o \
 	   base64 \
 	   base64_sem1.o \
-	   base64_sem1
+	   base64_sem1 \
+	   base64_sem2.o \
+	   base64_sem2
 
 TOPLEVEL := ../
 
@@ -14,7 +16,7 @@ LDFLAGS += $(pkg-config --libs glib-2.0)
 
 
 
-all: dns base64 base64_sem1
+all: dns base64 base64_sem1 base64_sem2
 
 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS)
 dns: dns.o rr.o dns_common.o
diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c
new file mode 100644
index 00000000..957ac48c
--- /dev/null
+++ b/examples/base64_sem2.c
@@ -0,0 +1,207 @@
+#include "../src/hammer.h"
+#include "../src/internal.h"    // for h_carray functions (XXX ?!)
+#include <assert.h>
+
+
+#define H_RULE(rule, def) const HParser *rule = def
+#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule)
+
+
+///
+// Semantic action helpers.
+// These might be candidates for inclusion in the library.
+///
+
+// The action equivalent of h_ignore.
+const HParsedToken *act_ignore(const HParseResult *p)
+{
+    return NULL;
+}
+
+// Helper to build HAction's that pick one index out of a sequence.
+const HParsedToken *act_index(int i, const HParseResult *p)
+{
+    if(!p) return NULL;
+
+    const HParsedToken *tok = p->ast;
+
+    if(!tok || tok->token_type != TT_SEQUENCE)
+        return NULL;
+
+    const HCountedArray *seq = tok->seq;
+    size_t n = seq->used;
+
+    if(i<0 || (size_t)i>=n)
+        return NULL;
+    else
+        return tok->seq->elements[i];
+}
+
+const HParsedToken *act_index0(const HParseResult *p)
+{
+    return act_index(0, p);
+}
+
+
+///
+// Semantic actions for the grammar below, each corresponds to an "ARULE".
+// They must be named act_<rulename>.
+///
+
+// helper: return the numeric value of a parsed base64 digit
+uint8_t bsfdig_value(const HParsedToken *p)
+{
+    uint8_t value = 0;
+
+    if(p && p->token_type == TT_UINT) {
+        uint8_t c = p->uint;
+        if(c >= 0x40 && c <= 0x5A) // A-Z
+            value = c - 0x41;
+        else if(c >= 0x60 && c <= 0x7A) // a-z
+            value = c - 0x61 + 26;
+        else if(c >= 0x30 && c <= 0x39) // 0-9
+            value = c - 0x30 + 52;
+        else if(c == '+')
+            value = 62;
+        else if(c == '/')
+            value = 63;
+    }
+
+    return value;
+}
+
+// helper: append a byte value to a sequence
+void seq_append_byte(HCountedArray *a, uint8_t b)
+{
+    HParsedToken *item = h_arena_malloc(a->arena, sizeof(HParsedToken));
+    item->token_type = TT_UINT;
+    item->uint = b;
+    h_carray_append(a, item);
+}
+
+const HParsedToken *act_base64(const HParseResult *p)
+{
+    assert(p->ast->token_type == TT_SEQUENCE);
+    assert(p->ast->seq->used == 2);
+    assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE);
+
+    // grab b64_3 block sequence
+    // grab and analyze b64 end block (_2 or _1)
+    const HParsedToken *b64_3 = p->ast->seq->elements[0];
+    const HParsedToken *b64_2 = p->ast->seq->elements[1];
+    const HParsedToken *b64_1 = p->ast->seq->elements[1];
+
+    if(b64_2->token_type != TT_SEQUENCE)
+        b64_1 = b64_2 = NULL;
+    else if(b64_2->seq->elements[2]->uint == '=')
+        b64_2 = NULL;
+    else
+        b64_1 = NULL;
+
+    // allocate result sequence
+    HParsedToken *res = h_arena_malloc(p->arena, sizeof(HParsedToken));
+    res->token_type = TT_SEQUENCE;
+    res->seq = h_carray_new(p->arena);
+
+    // concatenate base64_3 blocks
+    for(size_t i=0; i<b64_3->seq->used; i++) {
+        assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE);
+        HParsedToken **digits = b64_3->seq->elements[i]->seq->elements;
+
+        uint32_t x = bsfdig_value(digits[0]);
+        x <<= 6; x |= bsfdig_value(digits[1]);
+        x <<= 6; x |= bsfdig_value(digits[2]);
+        x <<= 6; x |= bsfdig_value(digits[3]);
+        seq_append_byte(res->seq, (x >> 16) & 0xFF);
+        seq_append_byte(res->seq, (x >> 8) & 0xFF);
+        seq_append_byte(res->seq, x & 0xFF);
+    }
+
+    // append one trailing base64_2 or _1 block
+    if(b64_2) {
+        HParsedToken **digits = b64_2->seq->elements;
+        uint32_t x = bsfdig_value(digits[0]);
+        x <<= 6; x |= bsfdig_value(digits[1]);
+        x <<= 6; x |= bsfdig_value(digits[2]);
+        seq_append_byte(res->seq, (x >> 10) & 0xFF);
+        seq_append_byte(res->seq, (x >> 2) & 0xFF);
+    } else if(b64_1) {
+        HParsedToken **digits = b64_1->seq->elements;
+        uint32_t x = bsfdig_value(digits[0]);
+        x <<= 6; x |= bsfdig_value(digits[1]);
+        seq_append_byte(res->seq, (x >> 4) & 0xFF);
+    }
+
+    return res;
+}
+
+#define act_ws           act_ignore
+#define act_document     act_index0
+
+
+///
+// Set up the parser with the grammar to be recognized.
+///
+
+const HParser *init_parser(void)
+{
+    // CORE
+    H_RULE (digit,   h_ch_range(0x30, 0x39));
+    H_RULE (alpha,   h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL));
+    H_RULE (space,   h_in((uint8_t *)" \t\n\r\f\v", 6));
+
+    // AUX.
+    H_RULE (plus,    h_ch('+'));
+    H_RULE (slash,   h_ch('/'));
+    H_RULE (equals,  h_ch('='));
+
+    H_RULE (bsfdig,       h_choice(alpha, digit, plus, slash, NULL));
+    H_RULE (bsfdig_4bit,  h_in((uint8_t *)"AEIMQUYcgkosw048", 16));
+    H_RULE (bsfdig_2bit,  h_in((uint8_t *)"AQgw", 4));
+    H_RULE (base64_3,     h_repeat_n(bsfdig, 4));
+    H_RULE (base64_2,     h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL));
+    H_RULE (base64_1,     h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL));
+    H_ARULE(base64,       h_sequence(h_many(base64_3),
+                                     h_optional(h_choice(base64_2,
+                                                         base64_1, NULL)),
+                                     NULL));
+
+    H_ARULE(ws,           h_many(space));
+    H_ARULE(document,     h_sequence(ws, base64, ws, h_end_p(), NULL));
+
+    // BUG sometimes inputs that should just don't parse.
+    // It *seemed* to happen mostly with things like "bbbbaaaaBA==".
+    // Using less actions seemed to make it less likely.
+
+    return document;
+}
+
+
+///
+// Main routine: print input, parse, print result, return success/failure.
+///
+
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+    uint8_t input[102400];
+    size_t inputsize;
+    const HParser *parser;
+    const HParseResult *result;
+
+    parser = init_parser();
+
+    inputsize = fread(input, 1, sizeof(input), stdin);
+    fprintf(stderr, "inputsize=%lu\ninput=", inputsize);
+    fwrite(input, 1, inputsize, stderr);
+    result = h_parse(parser, input, inputsize);
+
+    if(result) {
+        fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8);
+        h_pprint(stdout, result->ast, 0, 0);
+        return 0;
+    } else {
+        return 1;
+    }
+}
-- 
GitLab