diff --git a/SConstruct b/SConstruct index 4cf48a3a5eeaf91ae6ded46762f5cf7b8e48d595..a8f7ce8b9d39964458dea9fd1ee1fbe3d0a4b474 100644 --- a/SConstruct +++ b/SConstruct @@ -14,7 +14,11 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') -env = Environment(ENV = {'PATH' : os.environ['PATH'], 'PKG_CONFIG_PATH' : os.environ['PKG_CONFIG_PATH']}, +envvars = {'PATH' : os.environ['PATH']} +if 'PKG_CONFIG_PATH' in os.environ: + envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH'] + +env = Environment(ENV = envvars, variables = vars, tools=tools, toolpath=['tools']) diff --git a/src/SConscript b/src/SConscript index 38ace12a179f34bf540f7c9bf2322ce449583772..386a9a25e2b2dfb50616c595deb7cf7edf6594cc 100644 --- a/src/SConscript +++ b/src/SConscript @@ -69,7 +69,8 @@ ctests = ['t_benchmark.c', 't_bitwriter.c', 't_parser.c', 't_grammar.c', - 't_misc.c'] + 't_misc.c', + 't_regression.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) diff --git a/src/allocator.h b/src/allocator.h index 803d89fe9bdbfd861a2ba86b5f216d5442e328fe..4a486936a058c0a619a83e7afdf0c5dfffc50d48 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -33,11 +33,22 @@ typedef struct HAllocator_ { typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... -#ifndef SWIG -void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); + +#if defined __llvm__ +# if __has_attribute(malloc) +# define ATTR_MALLOC(n) __attribute__((malloc)) +# else +# define ATTR_MALLOC(n) +# endif +#elif defined SWIG +# define ATTR_MALLOC(n) +#elif defined __GNUC__ +# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2))) #else -void* h_arena_malloc(HArena *arena, size_t count); +# define ATTR_MALLOC(n) #endif + +void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c1e422ed6e9fa42fe4130c11ad8a1f7e7c22c2a2..33082c6c278beb09b2abf767e5314d18ab471db4 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (tmp_res->bit_length == 0) { // Don't modify if forwarding. + tmp_res->bit_length = bit_length; + } + if (tmp_res->ast && tmp_res->ast->bit_length != 0) { + ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; + } } else tmp_res->bit_length = 0; } diff --git a/src/bitreader.c b/src/bitreader.c index df8c4c3615fe9b36f02621945006adcbc981e60b..3627df5d6f9f228c8c9fe7b6e1b0c1c30b7e7de1 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. // not in danger of overflowing, so add in bits // add in number of bits... - if (state->endianness & BIT_BIG_ENDIAN) - bits_left = (bits_left << 3) - 8 + state->bit_offset; - else - bits_left = (bits_left << 3) - state->bit_offset; + bits_left = (bits_left << 3) - state->bit_offset - state->margin; if (bits_left < count) { if (state->endianness & BYTE_BIG_ENDIAN) final_shift = count - bits_left; @@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { final_shift = 0; } - if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { + if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) { // fast path if (state->endianness & BYTE_BIG_ENDIAN) { while (count > 0) { @@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int segment, segment_len; // Read a segment... if (state->endianness & BIT_BIG_ENDIAN) { - if (count >= state->bit_offset) { - segment_len = state->bit_offset; - state->bit_offset = 8; - segment = state->input[state->index] & ((1 << segment_len) - 1); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1); state->index++; + state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; - state->bit_offset -= count; - segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); + state->bit_offset += count; + segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1); } } else { // BIT_LITTLE_ENDIAN - if (count + state->bit_offset >= 8) { - segment_len = 8 - state->bit_offset; - segment = (state->input[state->index] >> state->bit_offset); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); state->index++; state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); diff --git a/src/glue.h b/src/glue.h index 1fe6ce46f453e911339e5ea3090e2436283f106a..6c1c56ca0e368bc407d846f342dd52ba934c9dda 100644 --- a/src/glue.h +++ b/src/glue.h @@ -11,7 +11,8 @@ // // A few standard semantic actions are defined below. The H_ACT_APPLY macro // allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. +// a generic action to fixed paramters. H_VALIDATE_APPLY is similar for +// h_atter_bool. // // The definition of more complex semantic actions will usually consist of // extracting data from the given parse tree and constructing a token of custom @@ -66,13 +67,13 @@ h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL) #define H_AVRULE(rule, def) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL) -#define H_ADRULE(rule, def, data) HParser *rule = \ +#define H_ADRULE(rule, def, data) HParser *rule = \ h_action(def, act_ ## rule, data) -#define H_VDRULE(rule, def, data) HParser *rule = \ +#define H_VDRULE(rule, def, data) HParser *rule = \ h_attr_bool(def, validate_ ## rule, data) -#define H_VADRULE(rule, def, data) HParser *rule = \ +#define H_VADRULE(rule, def, data) HParser *rule = \ h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data) -#define H_AVDRULE(rule, def, data) HParser *rule = \ +#define H_AVDRULE(rule, def, data) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data) @@ -109,8 +110,14 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); // Define 'myaction' as a specialization of 'paction' by supplying the leading // parameters. #define H_ACT_APPLY(myaction, paction, ...) \ - HParsedToken *myaction(const HParseResult *p, void* user_data) { \ - return paction(__VA_ARGS__, p, user_data); \ + HParsedToken *myaction(const HParseResult *p, void* user_data) { \ + return paction(__VA_ARGS__, p, user_data); \ + } + +// Similar, but for validations. +#define H_VALIDATE_APPLY(myvalidation, pvalidation, ...) \ + bool myvalidation(HParseResult* p, void* user_data) { \ + return pvalidation(__VA_ARGS__, p, user_data); \ } diff --git a/src/hammer.c b/src/hammer.c index 2456bdcedb7c9c7a0b4e374e8b8146bf19603179..6bb9ebb4febe53668a91ae9617ba05f2c158023d 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* // Set up a parse state... HInputStream input_stream = { .index = 0, - .bit_offset = 8, + .bit_offset = 0, .overrun = 0, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .length = length, diff --git a/src/hammer.h b/src/hammer.h index b0ce75d20d74d65b8a64e5a25694ef8696acf4ad..1c02b0548d0964afe47d984a767ac688c6caa7d0 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -99,6 +99,7 @@ typedef struct HParsedToken_ { HTokenData token_data; #endif size_t index; + size_t bit_length; char bit_offset; } HParsedToken; diff --git a/src/internal.h b/src/internal.h index 6c721eb03e3f790308b7539ea0abd3b9ae59f805..0c4d4dc2739953c3cfffa487ea3bd73993698ebd 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct HInputStream_ { size_t index; size_t length; char bit_offset; + char margin; // The number of bits on the end that is being read + // towards that should be ignored. char endianness; char overrun; } HInputStream; @@ -295,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable; // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. int64_t h_read_bits(HInputStream* state, int count, char signed_p); +static inline size_t h_input_stream_pos(HInputStream* state) { + return state->index * 8 + state->bit_offset + state->margin; +} // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index 091e4c0142da577c47992ba45084af1f7e447ae9..e3f53ab8225a75bde08ff7e3dd456822e1234b86 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input) { assert(input->bit_offset <= 8); - if((input->bit_offset % 8) != 0) { - // switching bit order in the middle of a byte - // we leave bit_offset untouched. this means that something like - // le(bits(5)),le(bits(3)) - // is equivalent to - // le(bits(5),bits(3)) . - // on the other hand, - // le(bits(5)),be(bits(5)) - // will read the same 5 bits twice and discard the top 3. - } else { - // flip offset (0 <-> 8) - input->bit_offset = 8 - input->bit_offset; - } + char tmp = input->bit_offset; + input->bit_offset = input->margin; + input->margin = tmp; } static HParseResult *parse_endianness(void *env, HParseState *state) diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ec97dd1b0696fcb69f4a17bfc7d4078138f4d355..9a3b6de3898b42336a84bfe565448c27315e29bb 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse return ret; } diff --git a/src/t_bitreader.c b/src/t_bitreader.c index 40a7bb98369dd32696cb536cbd08c16b1a10c2b4..65235c1d36e3ed4406acee6ec93a524efe94aef9 100644 --- a/src/t_bitreader.c +++ b/src/t_bitreader.c @@ -4,14 +4,14 @@ #include "internal.h" #include "test_suite.h" -#define MK_INPUT_STREAM(buf,len,endianness_) \ +#define MK_INPUT_STREAM(buf,len,endianness_) \ { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } static void test_bitreader_ints(void) { @@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) { g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); } - void register_bitreader_tests(void) { g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/le", test_bitreader_le); diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c index 747c86f2a328d41f1e25bad6fb4c90de3df814e6..6b9b7051fa480b47e9cf173e29d865bdbc4a8943 100644 --- a/src/t_bitwriter.c +++ b/src/t_bitwriter.c @@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) { .input = buf, .index = 0, .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .bit_offset = 0, .endianness = flags, .overrun = 0 }; diff --git a/src/t_regression.c b/src/t_regression.c new file mode 100644 index 0000000000000000000000000000000000000000..e74f16b98a7d037b19b6ece386721830720ab2c1 --- /dev/null +++ b/src/t_regression.c @@ -0,0 +1,38 @@ +#include <glib.h> +#include <stdint.h> +#include "glue.h" +#include "hammer.h" +#include "test_suite.h" + +static void test_bug118(void) { + // https://github.com/UpstandingHackers/hammer/issues/118 + // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 + + HParseResult* p; + const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; + +#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) + H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); + H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); +#undef MY_ENDIAN + + H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); + + H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); + H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); + + H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); + H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); + + + p = h_parse(parser_weird, input, 6); + g_check_cmp_int32(p->bit_length, ==, 44); + h_parse_result_free(p); + p = h_parse(parser_ok, input, 6); + g_check_cmp_int32(p->bit_length, ==, 40); + h_parse_result_free(p); +} + +void register_regression_tests(void) { + g_test_add_func("/core/regression/bug118", test_bug118); +} diff --git a/src/test_suite.c b/src/test_suite.c index 81f86b2c5007f11375995ad50751dfcb4618b7f5..cba18e8db9ad4b1187a028c2a2326ae6c1026633 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -25,6 +25,7 @@ extern void register_parser_tests(); extern void register_grammar_tests(); extern void register_misc_tests(); extern void register_benchmark_tests(); +extern void register_regression_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -35,6 +36,7 @@ int main(int argc, char** argv) { register_parser_tests(); register_grammar_tests(); register_misc_tests(); + register_regression_tests(); if (g_test_slow() || g_test_perf()) register_benchmark_tests();