From 5f920b29f8113c46be12cbea45a070b6c9e1f32f Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Wed, 7 May 2014 19:24:26 +0200
Subject: [PATCH] add h_with_endianness()

---
 src/SConscript           |  1 +
 src/hammer.h             | 10 ++++++
 src/parsers/endianness.c | 72 ++++++++++++++++++++++++++++++++++++++++
 src/t_parser.c           | 40 ++++++++++++++++++++++
 4 files changed, 123 insertions(+)
 create mode 100644 src/parsers/endianness.c

diff --git a/src/SConscript b/src/SConscript
index 03883932..155a6218 100644
--- a/src/SConscript
+++ b/src/SConscript
@@ -29,6 +29,7 @@ parsers = ['parsers/%s.c'%s for s in
             'choice',
             'difference',
             'end',
+            'endianness',
             'epsilon',
             'ignore',
             'ignoreseq',
diff --git a/src/hammer.h b/src/hammer.h
index f0ac6866..77808736 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -611,6 +611,16 @@ HAMMER_FN_DECL_NOARG(HParser*, h_indirect);
  */
 HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner);
 
+/**
+ * This parser runs its argument parser with the given endianness setting.
+ *
+ * The value of 'endianness' should be a bit-wise or of the constants
+ * BYTE_BIG_ENDIAN/BYTE_LITTLE_ENDIAN and BIT_BIG_ENDIAN/BIT_LITTLE_ENDIAN.
+ *
+ * Result token type: p's result type.
+ */
+HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p);
+
 /**
  * Free the memory allocated to an HParseResult when it is no longer needed.
  */
diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c
new file mode 100644
index 00000000..091e4c01
--- /dev/null
+++ b/src/parsers/endianness.c
@@ -0,0 +1,72 @@
+#include "parser_internal.h"
+
+
+typedef struct {
+    const HParser *p;
+    char endianness;
+} HParseEndianness;
+
+// helper
+static void switch_bit_order(HInputStream *input)
+{
+    assert(input->bit_offset <= 8);
+
+    if((input->bit_offset % 8) != 0) {
+        // switching bit order in the middle of a byte
+        // we leave bit_offset untouched. this means that something like
+        //     le(bits(5)),le(bits(3))
+        // is equivalent to
+        //     le(bits(5),bits(3)) .
+        // on the other hand,
+        //     le(bits(5)),be(bits(5))
+        // will read the same 5 bits twice and discard the top 3.
+    } else {
+        // flip offset (0 <-> 8)
+        input->bit_offset = 8 - input->bit_offset;
+    }
+}
+
+static HParseResult *parse_endianness(void *env, HParseState *state)
+{
+    HParseEndianness *e = env;
+    HParseResult *res = NULL;
+    char diff = state->input_stream.endianness ^ e->endianness;
+
+    if(!diff) {
+        // all the same, nothing to do
+        res = h_do_parse(e->p, state);
+    } else {
+        if(diff & BIT_BIG_ENDIAN)
+            switch_bit_order(&state->input_stream);
+
+        state->input_stream.endianness ^= diff;
+        res = h_do_parse(e->p, state);
+        state->input_stream.endianness ^= diff;
+
+        if(diff & BIT_BIG_ENDIAN)
+            switch_bit_order(&state->input_stream);
+    }
+
+    return res;
+}
+
+static const HParserVtable endianness_vt = {
+    .parse = parse_endianness,
+    .isValidRegular = h_false,
+    .isValidCF = h_false,
+    .desugar = NULL,
+    .compile_to_rvm = h_not_regular,
+};
+
+HParser* h_with_endianness(char endianness, const HParser *p)
+{
+    return h_with_endianness__m(&system_allocator, endianness, p);
+}
+
+HParser* h_with_endianness__m(HAllocator *mm__, char endianness, const HParser *p)
+{
+    HParseEndianness *env = h_new(HParseEndianness, 1);
+    env->endianness = endianness;
+    env->p = p;
+    return h_new_parser(mm__, &endianness_vt, env);
+}
diff --git a/src/t_parser.c b/src/t_parser.c
index 4260a7c9..efe2497f 100644
--- a/src/t_parser.c
+++ b/src/t_parser.c
@@ -456,6 +456,45 @@ static void test_ambiguous(gconstpointer backend) {
   g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2);
 }
 
+static void test_endianness(gconstpointer backend) {
+  HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend);
+
+  HParser *u32_ = h_uint32();
+  HParser *u5_ = h_bits(5, false);
+
+	char bb = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
+	char bl = BYTE_BIG_ENDIAN | BIT_LITTLE_ENDIAN;
+	char lb = BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN;
+	char ll = BYTE_LITTLE_ENDIAN | BIT_LITTLE_ENDIAN;
+
+  HParser *bb_u32_ = h_with_endianness(bb, u32_);
+  HParser *bb_u5_ = h_with_endianness(bb, u5_);
+  HParser *ll_u32_ = h_with_endianness(ll, u32_);
+  HParser *ll_u5_ = h_with_endianness(ll, u5_);
+  HParser *bl_u32_ = h_with_endianness(bl, u32_);
+  HParser *bl_u5_ = h_with_endianness(bl, u5_);
+  HParser *lb_u32_ = h_with_endianness(lb, u32_);
+  HParser *lb_u5_ = h_with_endianness(lb, u5_);
+
+	// default: big-endian
+  g_check_parse_match(u32_, be, "abcd", 4, "u0x61626364");
+  g_check_parse_match(u5_,  be, "abcd", 4, "u0xc");		// 0x6 << 1
+
+	// both big-endian
+  g_check_parse_match(bb_u32_, be, "abcd", 4, "u0x61626364");
+  g_check_parse_match(bb_u5_,  be, "abcd", 4, "u0xc");		// 0x6 << 1
+
+	// both little-endian
+  g_check_parse_match(ll_u32_, be, "abcd", 4, "u0x64636261");
+  g_check_parse_match(ll_u5_,  be, "abcd", 4, "u0x1");
+
+	// mixed cases
+  g_check_parse_match(bl_u32_, be, "abcd", 4, "u0x61626364");
+  g_check_parse_match(bl_u5_,  be, "abcd", 4, "u0x1");
+  g_check_parse_match(lb_u32_, be, "abcd", 4, "u0x64636261");
+  g_check_parse_match(lb_u5_,  be, "abcd", 4, "u0xc");
+}
+
 void register_parser_tests(void) {
   g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token);
   g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch);
@@ -502,6 +541,7 @@ void register_parser_tests(void) {
   //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec);
   g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne);
   g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec);
+  g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness);
 
   g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token);
   g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);
-- 
GitLab