From af73181cf4fbd9b6ff269bea40a0f0f7c3c62113 Mon Sep 17 00:00:00 2001 From: TQ Hirsch <thequux@upstandinghackers.com> Date: Sun, 4 Jan 2015 04:00:09 +0100 Subject: [PATCH] Fix #118 NEWS: * Switching endianness mid-byte no longer potentially re-reads bytes. * bit_offset now consistently refers to the number of bits already read. * HParsedTokens now have a bit_length field; this is a size_t. This may be removed for memory reasons. The bit writer has not yet been updated to match; the result of switching bit writer endianness in the middle of a byte remains undefined. --- src/SConscript | 3 ++- src/backends/packrat.c | 12 ++++++----- src/bitreader.c | 27 ++++++++++++------------- src/hammer.c | 2 +- src/hammer.h | 1 + src/internal.h | 5 +++++ src/parsers/endianness.c | 16 +++------------ src/parsers/parser_internal.h | 1 + src/t_bitreader.c | 15 +++++++------- src/t_bitwriter.c | 2 +- src/t_regression.c | 38 +++++++++++++++++++++++++++++++++++ src/test_suite.c | 2 ++ 12 files changed, 81 insertions(+), 43 deletions(-) create mode 100644 src/t_regression.c diff --git a/src/SConscript b/src/SConscript index 38ace12a..386a9a25 100644 --- a/src/SConscript +++ b/src/SConscript @@ -69,7 +69,8 @@ ctests = ['t_benchmark.c', 't_bitwriter.c', 't_parser.c', 't_grammar.c', - 't_misc.c'] + 't_misc.c', + 't_regression.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c1e422ed..33082c6c 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (tmp_res->bit_length == 0) { // Don't modify if forwarding. + tmp_res->bit_length = bit_length; + } + if (tmp_res->ast && tmp_res->ast->bit_length != 0) { + ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; + } } else tmp_res->bit_length = 0; } diff --git a/src/bitreader.c b/src/bitreader.c index df8c4c36..3627df5d 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. // not in danger of overflowing, so add in bits // add in number of bits... - if (state->endianness & BIT_BIG_ENDIAN) - bits_left = (bits_left << 3) - 8 + state->bit_offset; - else - bits_left = (bits_left << 3) - state->bit_offset; + bits_left = (bits_left << 3) - state->bit_offset - state->margin; if (bits_left < count) { if (state->endianness & BYTE_BIG_ENDIAN) final_shift = count - bits_left; @@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { final_shift = 0; } - if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { + if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) { // fast path if (state->endianness & BYTE_BIG_ENDIAN) { while (count > 0) { @@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int segment, segment_len; // Read a segment... if (state->endianness & BIT_BIG_ENDIAN) { - if (count >= state->bit_offset) { - segment_len = state->bit_offset; - state->bit_offset = 8; - segment = state->input[state->index] & ((1 << segment_len) - 1); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1); state->index++; + state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; - state->bit_offset -= count; - segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); + state->bit_offset += count; + segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1); } } else { // BIT_LITTLE_ENDIAN - if (count + state->bit_offset >= 8) { - segment_len = 8 - state->bit_offset; - segment = (state->input[state->index] >> state->bit_offset); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); state->index++; state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); diff --git a/src/hammer.c b/src/hammer.c index 2456bdce..6bb9ebb4 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* // Set up a parse state... HInputStream input_stream = { .index = 0, - .bit_offset = 8, + .bit_offset = 0, .overrun = 0, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .length = length, diff --git a/src/hammer.h b/src/hammer.h index b0ce75d2..1c02b054 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -99,6 +99,7 @@ typedef struct HParsedToken_ { HTokenData token_data; #endif size_t index; + size_t bit_length; char bit_offset; } HParsedToken; diff --git a/src/internal.h b/src/internal.h index 6c721eb0..0c4d4dc2 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct HInputStream_ { size_t index; size_t length; char bit_offset; + char margin; // The number of bits on the end that is being read + // towards that should be ignored. char endianness; char overrun; } HInputStream; @@ -295,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable; // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. int64_t h_read_bits(HInputStream* state, int count, char signed_p); +static inline size_t h_input_stream_pos(HInputStream* state) { + return state->index * 8 + state->bit_offset + state->margin; +} // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index 091e4c01..e3f53ab8 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input) { assert(input->bit_offset <= 8); - if((input->bit_offset % 8) != 0) { - // switching bit order in the middle of a byte - // we leave bit_offset untouched. this means that something like - // le(bits(5)),le(bits(3)) - // is equivalent to - // le(bits(5),bits(3)) . - // on the other hand, - // le(bits(5)),be(bits(5)) - // will read the same 5 bits twice and discard the top 3. - } else { - // flip offset (0 <-> 8) - input->bit_offset = 8 - input->bit_offset; - } + char tmp = input->bit_offset; + input->bit_offset = input->margin; + input->margin = tmp; } static HParseResult *parse_endianness(void *env, HParseState *state) diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ec97dd1b..9a3b6de3 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse return ret; } diff --git a/src/t_bitreader.c b/src/t_bitreader.c index 40a7bb98..65235c1d 100644 --- a/src/t_bitreader.c +++ b/src/t_bitreader.c @@ -4,14 +4,14 @@ #include "internal.h" #include "test_suite.h" -#define MK_INPUT_STREAM(buf,len,endianness_) \ +#define MK_INPUT_STREAM(buf,len,endianness_) \ { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } static void test_bitreader_ints(void) { @@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) { g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); } - void register_bitreader_tests(void) { g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/le", test_bitreader_le); diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c index 747c86f2..6b9b7051 100644 --- a/src/t_bitwriter.c +++ b/src/t_bitwriter.c @@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) { .input = buf, .index = 0, .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .bit_offset = 0, .endianness = flags, .overrun = 0 }; diff --git a/src/t_regression.c b/src/t_regression.c new file mode 100644 index 00000000..e74f16b9 --- /dev/null +++ b/src/t_regression.c @@ -0,0 +1,38 @@ +#include <glib.h> +#include <stdint.h> +#include "glue.h" +#include "hammer.h" +#include "test_suite.h" + +static void test_bug118(void) { + // https://github.com/UpstandingHackers/hammer/issues/118 + // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 + + HParseResult* p; + const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; + +#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) + H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); + H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); +#undef MY_ENDIAN + + H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); + + H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); + H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); + + H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); + H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); + + + p = h_parse(parser_weird, input, 6); + g_check_cmp_int32(p->bit_length, ==, 44); + h_parse_result_free(p); + p = h_parse(parser_ok, input, 6); + g_check_cmp_int32(p->bit_length, ==, 40); + h_parse_result_free(p); +} + +void register_regression_tests(void) { + g_test_add_func("/core/regression/bug118", test_bug118); +} diff --git a/src/test_suite.c b/src/test_suite.c index 81f86b2c..cba18e8d 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -25,6 +25,7 @@ extern void register_parser_tests(); extern void register_grammar_tests(); extern void register_misc_tests(); extern void register_benchmark_tests(); +extern void register_regression_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -35,6 +36,7 @@ int main(int argc, char** argv) { register_parser_tests(); register_grammar_tests(); register_misc_tests(); + register_regression_tests(); if (g_test_slow() || g_test_perf()) register_benchmark_tests(); -- GitLab