diff --git a/src/SConscript b/src/SConscript index 038839320cc6f65b0ea5a6a0140442923bd0db00..155a6218b26cd03704c2a7a922bef9aea61bbf13 100644 --- a/src/SConscript +++ b/src/SConscript @@ -29,6 +29,7 @@ parsers = ['parsers/%s.c'%s for s in 'choice', 'difference', 'end', + 'endianness', 'epsilon', 'ignore', 'ignoreseq', diff --git a/src/hammer.h b/src/hammer.h index f0ac6866731f59e824de55422d3a6e105d357c83..778087366add8560ccdca702cacbe0876302e545 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -611,6 +611,16 @@ HAMMER_FN_DECL_NOARG(HParser*, h_indirect); */ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); +/** + * This parser runs its argument parser with the given endianness setting. + * + * The value of 'endianness' should be a bit-wise or of the constants + * BYTE_BIG_ENDIAN/BYTE_LITTLE_ENDIAN and BIT_BIG_ENDIAN/BIT_LITTLE_ENDIAN. + * + * Result token type: p's result type. + */ +HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c new file mode 100644 index 0000000000000000000000000000000000000000..091e4c0142da577c47992ba45084af1f7e447ae9 --- /dev/null +++ b/src/parsers/endianness.c @@ -0,0 +1,72 @@ +#include "parser_internal.h" + + +typedef struct { + const HParser *p; + char endianness; +} HParseEndianness; + +// helper +static void switch_bit_order(HInputStream *input) +{ + assert(input->bit_offset <= 8); + + if((input->bit_offset % 8) != 0) { + // switching bit order in the middle of a byte + // we leave bit_offset untouched. this means that something like + // le(bits(5)),le(bits(3)) + // is equivalent to + // le(bits(5),bits(3)) . + // on the other hand, + // le(bits(5)),be(bits(5)) + // will read the same 5 bits twice and discard the top 3. + } else { + // flip offset (0 <-> 8) + input->bit_offset = 8 - input->bit_offset; + } +} + +static HParseResult *parse_endianness(void *env, HParseState *state) +{ + HParseEndianness *e = env; + HParseResult *res = NULL; + char diff = state->input_stream.endianness ^ e->endianness; + + if(!diff) { + // all the same, nothing to do + res = h_do_parse(e->p, state); + } else { + if(diff & BIT_BIG_ENDIAN) + switch_bit_order(&state->input_stream); + + state->input_stream.endianness ^= diff; + res = h_do_parse(e->p, state); + state->input_stream.endianness ^= diff; + + if(diff & BIT_BIG_ENDIAN) + switch_bit_order(&state->input_stream); + } + + return res; +} + +static const HParserVtable endianness_vt = { + .parse = parse_endianness, + .isValidRegular = h_false, + .isValidCF = h_false, + .desugar = NULL, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_with_endianness(char endianness, const HParser *p) +{ + return h_with_endianness__m(&system_allocator, endianness, p); +} + +HParser* h_with_endianness__m(HAllocator *mm__, char endianness, const HParser *p) +{ + HParseEndianness *env = h_new(HParseEndianness, 1); + env->endianness = endianness; + env->p = p; + return h_new_parser(mm__, &endianness_vt, env); +} diff --git a/src/t_parser.c b/src/t_parser.c index 4260a7c99447d4dbf1cbe1e58e782f93f3c5e397..efe2497f0f759c60dcb86a9925e9f9b2918d969f 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -456,6 +456,45 @@ static void test_ambiguous(gconstpointer backend) { g_check_parse_failed(expr_, (HParserBackend)GPOINTER_TO_INT(backend), "d+", 2); } +static void test_endianness(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + + HParser *u32_ = h_uint32(); + HParser *u5_ = h_bits(5, false); + + char bb = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN; + char bl = BYTE_BIG_ENDIAN | BIT_LITTLE_ENDIAN; + char lb = BYTE_LITTLE_ENDIAN | BIT_BIG_ENDIAN; + char ll = BYTE_LITTLE_ENDIAN | BIT_LITTLE_ENDIAN; + + HParser *bb_u32_ = h_with_endianness(bb, u32_); + HParser *bb_u5_ = h_with_endianness(bb, u5_); + HParser *ll_u32_ = h_with_endianness(ll, u32_); + HParser *ll_u5_ = h_with_endianness(ll, u5_); + HParser *bl_u32_ = h_with_endianness(bl, u32_); + HParser *bl_u5_ = h_with_endianness(bl, u5_); + HParser *lb_u32_ = h_with_endianness(lb, u32_); + HParser *lb_u5_ = h_with_endianness(lb, u5_); + + // default: big-endian + g_check_parse_match(u32_, be, "abcd", 4, "u0x61626364"); + g_check_parse_match(u5_, be, "abcd", 4, "u0xc"); // 0x6 << 1 + + // both big-endian + g_check_parse_match(bb_u32_, be, "abcd", 4, "u0x61626364"); + g_check_parse_match(bb_u5_, be, "abcd", 4, "u0xc"); // 0x6 << 1 + + // both little-endian + g_check_parse_match(ll_u32_, be, "abcd", 4, "u0x64636261"); + g_check_parse_match(ll_u5_, be, "abcd", 4, "u0x1"); + + // mixed cases + g_check_parse_match(bl_u32_, be, "abcd", 4, "u0x61626364"); + g_check_parse_match(bl_u5_, be, "abcd", 4, "u0x1"); + g_check_parse_match(lb_u32_, be, "abcd", 4, "u0x64636261"); + g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc"); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -502,6 +541,7 @@ void register_parser_tests(void) { //g_test_add_data_func("/core/parser/packrat/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); + g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch);