diff --git a/README.md b/README.md index 14f9b0890fa4aeb36cccdfc8c0a52f75a9a52084..8880cb627764e56964782a2003b6091ab82182fa 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,8 @@ Just `#include <hammer/hammer.h>` (also `#include <hammer/glue.h>` if you plan t If you've installed Hammer system-wide, you can use `pkg-config` in the usual way. +For documentation, see the [user guide](https://github.com/UpstandingHackers/hammer/wiki/User-guide). + Examples ======== The `examples/` directory contains some simple examples, currently including: diff --git a/SConstruct b/SConstruct index f74f8d579850f57dc78d94aff23b3506568a0dbb..a8f7ce8b9d39964458dea9fd1ee1fbe3d0a4b474 100644 --- a/SConstruct +++ b/SConstruct @@ -14,7 +14,11 @@ tools = ['default', 'scanreplace'] if 'dotnet' in ARGUMENTS.get('bindings', []): tools.append('csharp/mono') -env = Environment(ENV = {'PATH' : os.environ['PATH']}, +envvars = {'PATH' : os.environ['PATH']} +if 'PKG_CONFIG_PATH' in os.environ: + envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH'] + +env = Environment(ENV = envvars, variables = vars, tools=tools, toolpath=['tools']) diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c index 4da171f355c9f8b7312ae29e231d58592602b901..afbbef841cc0ef0593e68a1ca7101eacc976f474 100644 --- a/examples/base64_sem1.c +++ b/examples/base64_sem1.c @@ -29,9 +29,9 @@ HParsedToken *act_bsfdig(const HParseResult *p, void* user_data) uint8_t c = H_CAST_UINT(p->ast); - if(c >= 0x40 && c <= 0x5A) // A-Z + if(c >= 0x41 && c <= 0x5A) // A-Z res->uint = c - 0x41; - else if(c >= 0x60 && c <= 0x7A) // a-z + else if(c >= 0x61 && c <= 0x7A) // a-z res->uint = c - 0x61 + 26; else if(c >= 0x30 && c <= 0x39) // 0-9 res->uint = c - 0x30 + 52; diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c index b7a2263efc1810a0140785f3494dc29a4d204f03..b8f7b4a20312dcf39695ba52cdcf9573376d6c69 100644 --- a/examples/base64_sem2.c +++ b/examples/base64_sem2.c @@ -31,9 +31,9 @@ uint8_t bsfdig_value(const HParsedToken *p) if(p && p->token_type == TT_UINT) { uint8_t c = p->uint; - if(c >= 0x40 && c <= 0x5A) // A-Z + if(c >= 0x41 && c <= 0x5A) // A-Z value = c - 0x41; - else if(c >= 0x60 && c <= 0x7A) // a-z + else if(c >= 0x61 && c <= 0x7A) // a-z value = c - 0x61 + 26; else if(c >= 0x30 && c <= 0x39) // 0-9 value = c - 0x30 + 52; diff --git a/src/SConscript b/src/SConscript index 155a6218b26cd03704c2a7a922bef9aea61bbf13..e192b05e182b0020ac7f931f68244b300b93b9bc 100644 --- a/src/SConscript +++ b/src/SConscript @@ -22,6 +22,7 @@ parsers = ['parsers/%s.c'%s for s in ['action', 'and', 'attr_bool', + 'bind', 'bits', 'butnot', 'ch', @@ -39,11 +40,13 @@ parsers = ['parsers/%s.c'%s for s in 'not', 'nothing', 'optional', + 'permutation', 'sequence', 'token', 'unimplemented', 'whitespace', - 'xor']] + 'xor', + 'value']] backends = ['backends/%s.c' % s for s in ['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']] @@ -67,7 +70,8 @@ ctests = ['t_benchmark.c', 't_bitwriter.c', 't_parser.c', 't_grammar.c', - 't_misc.c'] + 't_misc.c', + 't_regression.c'] libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts) libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts) diff --git a/src/allocator.h b/src/allocator.h index 803d89fe9bdbfd861a2ba86b5f216d5442e328fe..4a486936a058c0a619a83e7afdf0c5dfffc50d48 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -33,11 +33,22 @@ typedef struct HAllocator_ { typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... -#ifndef SWIG -void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); + +#if defined __llvm__ +# if __has_attribute(malloc) +# define ATTR_MALLOC(n) __attribute__((malloc)) +# else +# define ATTR_MALLOC(n) +# endif +#elif defined SWIG +# define ATTR_MALLOC(n) +#elif defined __GNUC__ +# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2))) #else -void* h_arena_malloc(HArena *arena, size_t count); +# define ATTR_MALLOC(n) #endif + +void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); diff --git a/src/backends/packrat.c b/src/backends/packrat.c index c1e422ed6e9fa42fe4130c11ad8a1f7e7c22c2a2..33082c6c278beb09b2abf767e5314d18ab471db4 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -33,11 +33,13 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa if (tmp_res) { tmp_res->arena = state->arena; if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + size_t bit_length = h_input_stream_pos(&state->input_stream) - h_input_stream_pos(&bak); + if (tmp_res->bit_length == 0) { // Don't modify if forwarding. + tmp_res->bit_length = bit_length; + } + if (tmp_res->ast && tmp_res->ast->bit_length != 0) { + ((HParsedToken*)(tmp_res->ast))->bit_length = bit_length; + } } else tmp_res->bit_length = 0; } diff --git a/src/benchmark.c b/src/benchmark.c index 408bfdb22716a31afb16a2dc75c72cf0fa34da7d..ce416dad99fcb39504f9dd2bb2d2ba21f6b0071e 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -1,5 +1,6 @@ #include <stdint.h> #include <stdio.h> +#include <stdlib.h> #include <time.h> #include <string.h> #include "hammer.h" @@ -14,6 +15,14 @@ #include <sys/resource.h> #endif +static const char* HParserBackendNames[] = { + "Packrat", + "Regular", + "LL(k)", + "LALR", + "GLR" +}; + void h_benchmark_clock_gettime(struct timespec *ts) { if (ts == NULL) return; @@ -112,6 +121,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest ret->results[backend].failed_testcases++; } h_parse_result_free(res); + free(res_unamb); } if (tc_failed > 0) { diff --git a/src/bitreader.c b/src/bitreader.c index df8c4c3615fe9b36f02621945006adcbc981e60b..fe21e439ec778aa39b3cbeb18c0b3ba4fbe337fd 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -39,10 +39,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (bits_left <= 64) { // Large enough to handle any valid count, but small enough that overflow isn't a problem. // not in danger of overflowing, so add in bits // add in number of bits... - if (state->endianness & BIT_BIG_ENDIAN) - bits_left = (bits_left << 3) - 8 + state->bit_offset; - else - bits_left = (bits_left << 3) - state->bit_offset; + bits_left = (bits_left << 3) - state->bit_offset - state->margin; if (bits_left < count) { if (state->endianness & BYTE_BIG_ENDIAN) final_shift = count - bits_left; @@ -54,7 +51,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { final_shift = 0; } - if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0) { + if ((state->bit_offset & 0x7) == 0 && (count & 0x7) == 0 && (state->margin == 0)) { // fast path if (state->endianness & BYTE_BIG_ENDIAN) { while (count > 0) { @@ -65,7 +62,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int i; for (i = 0; count > 0; i += 8) { count -= 8; - out |= state->input[state->index++] << i; + out |= (int64_t)state->input[state->index++] << i; } } } else { @@ -73,22 +70,24 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { int segment, segment_len; // Read a segment... if (state->endianness & BIT_BIG_ENDIAN) { - if (count >= state->bit_offset) { - segment_len = state->bit_offset; - state->bit_offset = 8; - segment = state->input[state->index] & ((1 << segment_len) - 1); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->margin) & ((1 << segment_len) - 1); state->index++; + state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; - state->bit_offset -= count; - segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); + state->bit_offset += count; + segment = (state->input[state->index] >> (8 - state->bit_offset)) & ((1 << segment_len) - 1); } } else { // BIT_LITTLE_ENDIAN - if (count + state->bit_offset >= 8) { - segment_len = 8 - state->bit_offset; - segment = (state->input[state->index] >> state->bit_offset); + if (count + state->bit_offset + state->margin >= 8) { + segment_len = 8 - state->bit_offset - state->margin; + segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); state->index++; state->bit_offset = 0; + state->margin = 0; } else { segment_len = count; segment = (state->input[state->index] >> state->bit_offset) & ((1 << segment_len) - 1); @@ -100,7 +99,7 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) { if (state->endianness & BYTE_BIG_ENDIAN) { out = out << segment_len | segment; } else { // BYTE_LITTLE_ENDIAN - out |= segment << offset; + out |= (int64_t)segment << offset; offset += segment_len; } count -= segment_len; diff --git a/src/datastructures.c b/src/datastructures.c index 141adcd5ffa9df4d9a4a81269704a2361432266e..0feeb2176b0422471f65a053a7ef9a716af1821c 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -1,6 +1,7 @@ #include "internal.h" #include "hammer.h" #include "allocator.h" +#include "parsers/parser_internal.h" #include <assert.h> #include <stdlib.h> #include <string.h> @@ -393,6 +394,28 @@ uint32_t h_djbhash(const uint8_t *buf, size_t len) { return hash; } +void h_symbol_put(HParseState *state, const char* key, void *value) { + if (!state->symbol_table) { + state->symbol_table = h_slist_new(state->arena); + h_slist_push(state->symbol_table, h_hashtable_new(state->arena, + h_eq_ptr, + h_hash_ptr)); + } + HHashTable *head = h_slist_top(state->symbol_table); + assert(!h_hashtable_present(head, key)); + h_hashtable_put(head, key, value); +} + +void* h_symbol_get(HParseState *state, const char* key) { + if (state->symbol_table) { + HHashTable *head = h_slist_top(state->symbol_table); + if (head) { + return h_hashtable_get(head, key); + } + } + return NULL; +} + HSArray *h_sarray_new(HAllocator *mm__, size_t size) { HSArray *ret = h_new(HSArray, 1); ret->capacity = size; diff --git a/src/glue.c b/src/glue.c index c2d915aeaab8e8d58646569f33edc5402af43023..cb3a7ce7de4dbc435da4ddefc4dfae956a3a063f 100644 --- a/src/glue.c +++ b/src/glue.c @@ -173,7 +173,7 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) int j; while((j = va_arg(va, int)) >= 0) - ret = h_seq_index(p, j); + ret = h_seq_index(ret, j); return ret; } diff --git a/src/glue.h b/src/glue.h index 1fe6ce46f453e911339e5ea3090e2436283f106a..6c1c56ca0e368bc407d846f342dd52ba934c9dda 100644 --- a/src/glue.h +++ b/src/glue.h @@ -11,7 +11,8 @@ // // A few standard semantic actions are defined below. The H_ACT_APPLY macro // allows semantic actions to be defined by "partial application" of -// a generic action to fixed paramters. +// a generic action to fixed paramters. H_VALIDATE_APPLY is similar for +// h_atter_bool. // // The definition of more complex semantic actions will usually consist of // extracting data from the given parse tree and constructing a token of custom @@ -66,13 +67,13 @@ h_attr_bool(h_action(def, act_ ## rule, NULL), validate_ ## rule, NULL) #define H_AVRULE(rule, def) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, NULL), act_ ## rule, NULL) -#define H_ADRULE(rule, def, data) HParser *rule = \ +#define H_ADRULE(rule, def, data) HParser *rule = \ h_action(def, act_ ## rule, data) -#define H_VDRULE(rule, def, data) HParser *rule = \ +#define H_VDRULE(rule, def, data) HParser *rule = \ h_attr_bool(def, validate_ ## rule, data) -#define H_VADRULE(rule, def, data) HParser *rule = \ +#define H_VADRULE(rule, def, data) HParser *rule = \ h_attr_bool(h_action(def, act_ ## rule, data), validate_ ## rule, data) -#define H_AVDRULE(rule, def, data) HParser *rule = \ +#define H_AVDRULE(rule, def, data) HParser *rule = \ h_action(h_attr_bool(def, validate_ ## rule, data), act_ ## rule, data) @@ -109,8 +110,14 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data); // Define 'myaction' as a specialization of 'paction' by supplying the leading // parameters. #define H_ACT_APPLY(myaction, paction, ...) \ - HParsedToken *myaction(const HParseResult *p, void* user_data) { \ - return paction(__VA_ARGS__, p, user_data); \ + HParsedToken *myaction(const HParseResult *p, void* user_data) { \ + return paction(__VA_ARGS__, p, user_data); \ + } + +// Similar, but for validations. +#define H_VALIDATE_APPLY(myvalidation, pvalidation, ...) \ + bool myvalidation(HParseResult* p, void* user_data) { \ + return pvalidation(__VA_ARGS__, p, user_data); \ } diff --git a/src/hammer.c b/src/hammer.c index 2456bdcedb7c9c7a0b4e374e8b8146bf19603179..6bb9ebb4febe53668a91ae9617ba05f2c158023d 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -52,7 +52,7 @@ HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* // Set up a parse state... HInputStream input_stream = { .index = 0, - .bit_offset = 8, + .bit_offset = 0, .overrun = 0, .endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN, .length = length, diff --git a/src/hammer.h b/src/hammer.h index 778087366add8560ccdca702cacbe0876302e545..f893f10df4349d2ccc9d3a1c8c8675f60e014c9b 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -46,14 +46,6 @@ typedef enum HParserBackend_ { PB_MAX = PB_GLR } HParserBackend; -static const char* HParserBackendNames[] = { - "Packrat", - "Regular", - "LL(k)", - "LALR", - "GLR" -}; - typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; TT_NONE = 1, @@ -107,6 +99,7 @@ typedef struct HParsedToken_ { HTokenData token_data; #endif size_t index; + size_t bit_length; char bit_offset; } HParsedToken; @@ -130,6 +123,19 @@ typedef struct HParseResult_ { */ typedef struct HBitWriter_ HBitWriter; +typedef struct HCFChoice_ HCFChoice; +typedef struct HRVMProg_ HRVMProg; +typedef struct HParserVtable_ HParserVtable; + +// TODO: Make this internal +typedef struct HParser_ { + const HParserVtable *vtable; + HParserBackend backend; + void* backend_data; + void *env; + HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ +} HParser; + /** * Type of an action to apply to an AST, used in the action() parser. * It can be any (user-defined) function that takes a HParseResult* @@ -149,18 +155,17 @@ typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data); */ typedef bool (*HPredicate)(HParseResult *p, void* user_data); -typedef struct HCFChoice_ HCFChoice; -typedef struct HRVMProg_ HRVMProg; -typedef struct HParserVtable_ HParserVtable; - -// TODO: Make this internal -typedef struct HParser_ { - const HParserVtable *vtable; - HParserBackend backend; - void* backend_data; - void *env; - HCFChoice *desugared; /* if the parser can be desugared, its desugared form */ -} HParser; +/** + * Type of a parser that depends on the result of a previous parser, + * used in h_bind(). The void* argument is passed through from h_bind() and can + * be used to arbitrarily parameterize the function further. + * + * The HAllocator* argument gives access to temporary memory and is to be used + * for any allocations inside the function. Specifically, construction of any + * HParsers should use the '__m' combinator variants with the given allocator. + * Anything allocated thus will be freed by 'h_bind'. + */ +typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env); // {{{ Stuff for benchmarking typedef struct HParserTestcase_ { @@ -437,6 +442,32 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HPa */ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p); +/** + * Given a null-terminated list of parsers, match a permutation phrase of these + * parsers, i.e. match all parsers exactly once in any order. + * + * If multiple orders would match, the lexically smallest permutation is used; + * in other words, at any step the remaining available parsers are tried in + * the order in which they appear in the arguments. + * + * As an exception, 'h_optional' parsers (actually those that return a result + * of token type TT_NONE) are detected and the algorithm will try to match them + * with a non-empty result. Specifically, a result of TT_NONE is treated as a + * non-match as long as any other argument matches. + * + * Other parsers that succeed on any input (e.g. h_many), that match the same + * input as others, or that match input which is a prefix of another match can + * lead to unexpected results and should probably not be used as arguments. + * + * The result is a sequence of the same length as the argument list. + * Each parser's result is placed at that parser's index in the arguments. + * The permutation itself (the order in which the arguments were matched) is + * not returned. + * + * Result token type: TT_SEQUENCE + */ +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_permutation, HParser* p); + /** * Given two parsers, p1 and p2, this parser succeeds in the following * cases: @@ -621,6 +652,41 @@ HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); */ HAMMER_FN_DECL(HParser*, h_with_endianness, char endianness, const HParser* p); +/** + * The 'h_put_value' combinator stashes the result of the parser + * it wraps in a symbol table in the parse state, so that non- + * local actions and predicates can access this value. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: p's token type if name was not already in + * the symbol table. It is an error, and thus a NULL result (and + * parse failure), to attempt to rename a symbol. + */ +HAMMER_FN_DECL(HParser*, h_put_value, const HParser *p, const char* name); + +/** + * The 'h_get_value' combinator retrieves a named HParseResult that + * was previously stashed in the parse state. + * + * Try not to use this combinator if you can avoid it. + * + * Result token type: whatever the stashed HParseResult is, if + * present. If absent, NULL (and thus parse failure). + */ +HAMMER_FN_DECL(HParser*, h_get_value, const char* name); + +/** + * Monadic bind for HParsers, i.e.: + * Sequencing where later parsers may depend on the result(s) of earlier ones. + * + * Run p and call the result x. Then run k(env,x). Fail if p fails or if + * k(env,x) fails or if k(env,x) is NULL. + * + * Result: the result of k(x,env). + */ +HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env); + /** * Free the memory allocated to an HParseResult when it is no longer needed. */ diff --git a/src/internal.h b/src/internal.h index 85cd4dbc3407c74f9b8e055b56adc5fcad0a1b61..0c4d4dc2739953c3cfffa487ea3bd73993698ebd 100644 --- a/src/internal.h +++ b/src/internal.h @@ -70,6 +70,8 @@ typedef struct HInputStream_ { size_t index; size_t length; char bit_offset; + char margin; // The number of bits on the end that is being read + // towards that should be ignored. char endianness; char overrun; } HInputStream; @@ -190,6 +192,7 @@ typedef struct HHashTable_ { * arena - the arena that has been allocated for the parse this state is in. * lr_stack - a stack of HLeftRec's, used in Warth's recursion * recursion_heads - table of recursion heads. Keys are HParserCacheKey's with only an HInputStream (parser can be NULL), values are HRecursionHead's. + * symbol_table - stack of tables of values that have been stashed in the context of this parse. * */ @@ -199,6 +202,7 @@ struct HParseState_ { HArena * arena; HSlist *lr_stack; HHashTable *recursion_heads; + HSlist *symbol_table; // its contents are HHashTables }; typedef struct HParserBackendVTable_ { @@ -293,6 +297,9 @@ extern HParserBackendVTable h__glr_backend_vtable; // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. int64_t h_read_bits(HInputStream* state, int count, char signed_p); +static inline size_t h_input_stream_pos(HInputStream* state) { + return state->index * 8 + state->bit_offset + state->margin; +} // need to decide if we want to make this public. HParseResult* h_do_parse(const HParser* parser, HParseState *state); void put_cached(HParseState *ps, const HParser *p, HParseResult *cached); @@ -316,6 +323,7 @@ HSlist* h_slist_new(HArena *arena); HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void* h_slist_drop(HSlist *slist); +static inline void* h_slist_top(HSlist *sl) { return sl->head->elem; } void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); HSlist* h_slist_remove_all(HSlist *slist, const void* item); @@ -347,8 +355,10 @@ bool h_eq_ptr(const void *p, const void *q); HHashValue h_hash_ptr(const void *p); uint32_t h_djbhash(const uint8_t *buf, size_t len); -typedef struct HCFSequence_ HCFSequence; +void h_symbol_put(HParseState *state, const char* key, void *value); +void* h_symbol_get(HParseState *state, const char* key); +typedef struct HCFSequence_ HCFSequence; struct HCFChoice_ { enum HCFChoiceType { diff --git a/src/parsers/bind.c b/src/parsers/bind.c new file mode 100644 index 0000000000000000000000000000000000000000..f024a82fe9952efa82fed5dcdb4bf28b1d9e8545 --- /dev/null +++ b/src/parsers/bind.c @@ -0,0 +1,81 @@ +#include "parser_internal.h" + +typedef struct { + const HParser *p; + HContinuation k; + void *env; + HAllocator *mm__; +} BindEnv; + +// an HAllocator backed by an HArena +typedef struct { + HAllocator allocator; // inherit XXX is this the proper way to do it? + HArena *arena; +} ArenaAllocator; + +static void *aa_alloc(HAllocator *allocator, size_t size) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + return h_arena_malloc(arena, size); +} + +static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + assert(((void)"XXX need realloc for arena allocator", 0)); + return NULL; +} + +static void aa_free(HAllocator *allocator, void *ptr) +{ + HArena *arena = ((ArenaAllocator *)allocator)->arena; + h_arena_free(arena, ptr); +} + +static HParseResult *parse_bind(void *be_, HParseState *state) { + BindEnv *be = be_; + + HParseResult *res = h_do_parse(be->p, state); + if(!res) + return NULL; + + // create a temporary arena allocator for the continuation + HArena *arena = h_new_arena(be->mm__, 0); + ArenaAllocator aa = {{aa_alloc, aa_realloc, aa_free}, arena}; + + HParser *kx = be->k((HAllocator *)&aa, res->ast, be->env); + if(!kx) { + h_delete_arena(arena); + return NULL; + } + + res = h_do_parse(kx, state); + + h_delete_arena(arena); + return res; +} + +static const HParserVtable bind_vt = { + .parse = parse_bind, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser *h_bind(const HParser *p, HContinuation k, void *env) +{ + return h_bind__m(&system_allocator, p, k, env); +} + +HParser *h_bind__m(HAllocator *mm__, + const HParser *p, HContinuation k, void *env) +{ + BindEnv *be = h_new(BindEnv, 1); + + be->p = p; + be->k = k; + be->env = env; + be->mm__ = mm__; + + return h_new_parser(mm__, &bind_vt, be); +} diff --git a/src/parsers/endianness.c b/src/parsers/endianness.c index 091e4c0142da577c47992ba45084af1f7e447ae9..e3f53ab8225a75bde08ff7e3dd456822e1234b86 100644 --- a/src/parsers/endianness.c +++ b/src/parsers/endianness.c @@ -11,19 +11,9 @@ static void switch_bit_order(HInputStream *input) { assert(input->bit_offset <= 8); - if((input->bit_offset % 8) != 0) { - // switching bit order in the middle of a byte - // we leave bit_offset untouched. this means that something like - // le(bits(5)),le(bits(3)) - // is equivalent to - // le(bits(5),bits(3)) . - // on the other hand, - // le(bits(5)),be(bits(5)) - // will read the same 5 bits twice and discard the top 3. - } else { - // flip offset (0 <-> 8) - input->bit_offset = 8 - input->bit_offset; - } + char tmp = input->bit_offset; + input->bit_offset = input->margin; + input->margin = tmp; } static HParseResult *parse_endianness(void *env, HParseState *state) diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index ec97dd1b0696fcb69f4a17bfc7d4078138f4d355..9a3b6de3898b42336a84bfe565448c27315e29bb 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -18,6 +18,7 @@ static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; + ret->bit_length = 0; // This way it gets overridden in h_do_parse return ret; } diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c new file mode 100644 index 0000000000000000000000000000000000000000..564565af555a0059a8a85773a86f2ae9a320df0f --- /dev/null +++ b/src/parsers/permutation.c @@ -0,0 +1,179 @@ +#include <stdarg.h> +#include "parser_internal.h" + +typedef struct { + size_t len; + HParser **p_array; +} HSequence; + +// main recursion, used by parse_permutation below +static int parse_permutation_tail(const HSequence *s, + HCountedArray *seq, + const size_t k, char *set, + HParseState *state) +{ + // shorthands + const size_t n = s->len; + HParser **ps = s->p_array; + + // trivial base case + if(k >= n) + return 1; + + HInputStream bak = state->input_stream; + + // try available parsers as first element of the permutation tail + HParseResult *match = NULL; + size_t i; + for(i=0; i<n; i++) { + if(set[i]) { + match = h_do_parse(ps[i], state); + + // save result + if(match) + seq->elements[i] = (void *)match->ast; + + // treat empty optionals (TT_NONE) like failure here + if(match && match->ast && match->ast->token_type == TT_NONE) + match = NULL; + + if(match) { + // remove parser from active set + set[i] = 0; + + // parse the rest of the permutation phrase + if(parse_permutation_tail(s, seq, k+1, set, state)) { + // success + return 1; + } else { + // place parser back in active set and try the next + set[i] = 1; + } + } + + state->input_stream = bak; // rewind input + } + } + + // if all available parsers were empty optionals (TT_NONE), still succeed + for(i=0; i<n; i++) { + if(set[i]) { + HParsedToken *tok = seq->elements[i]; + if(!(tok && tok->token_type == TT_NONE)) + break; + } + } + if(i==n) // all were TT_NONE + return 1; + + // permutations exhausted + return 0; +} + +static HParseResult *parse_permutation(void *env, HParseState *state) +{ + const HSequence *s = env; + const size_t n = s->len; + + // current set of available (not yet matched) parsers + char *set = h_arena_malloc(state->arena, sizeof(char) * n); + memset(set, 1, sizeof(char) * n); + + // parse result + HCountedArray *seq = h_carray_new_sized(state->arena, n); + + if(parse_permutation_tail(s, seq, 0, set, state)) { + // success + // return the sequence of results + seq->used = n; + HParsedToken *tok = a_new(HParsedToken, 1); + tok->token_type = TT_SEQUENCE; + tok->seq = seq; + return make_result(state->arena, tok); + } else { + // no parse + // XXX free seq + return NULL; + } +} + + +static const HParserVtable permutation_vt = { + .parse = parse_permutation, + .isValidRegular = h_false, + .isValidCF = h_false, + .desugar = NULL, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_permutation(HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__m(HAllocator* mm__, HParser* p, ...) { + va_list ap; + va_start(ap, p); + HParser* ret = h_permutation__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +HParser* h_permutation__v(HParser* p, va_list ap) { + return h_permutation__mv(&system_allocator, p, ap); +} + +HParser* h_permutation__mv(HAllocator* mm__, HParser* p, va_list ap_) { + va_list ap; + size_t len = 0; + HSequence *s = h_new(HSequence, 1); + + HParser *arg; + va_copy(ap, ap_); + do { + len++; + arg = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + s->p_array = h_new(HParser *, len); + + va_copy(ap, ap_); + s->p_array[0] = p; + for (size_t i = 1; i < len; i++) { + s->p_array[i] = va_arg(ap, HParser *); + } while (arg); + va_end(ap); + + s->len = len; + return h_new_parser(mm__, &permutation_vt, s); +} + +HParser* h_permutation__a(void *args[]) { + return h_permutation__ma(&system_allocator, args); +} + +HParser* h_permutation__ma(HAllocator* mm__, void *args[]) { + size_t len = -1; // because do...while + const HParser *arg; + + do { + arg=((HParser **)args)[++len]; + } while(arg); + + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(HParser *, len); + + for (size_t i = 0; i < len; i++) { + s->p_array[i] = ((HParser **)args)[i]; + } + + s->len = len; + HParser *ret = h_new(HParser, 1); + ret->vtable = &permutation_vt; + ret->env = (void*)s; + ret->backend = PB_MIN; + return ret; +} diff --git a/src/parsers/value.c b/src/parsers/value.c new file mode 100644 index 0000000000000000000000000000000000000000..531db7cb5274c30d3d482ee5bc84add58c1e9af7 --- /dev/null +++ b/src/parsers/value.c @@ -0,0 +1,69 @@ +#include "parser_internal.h" + +typedef struct { + const HParser* p; + const char* key; +} HStoredValue; + +/* Stash an HParseResult into a symbol table, so that it can be + retrieved and used later. */ + +static HParseResult* parse_put(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (s->p && s->key && !h_symbol_get(state, s->key)) { + HParseResult *tmp = h_do_parse(s->p, state); + if (tmp) { + h_symbol_put(state, s->key, tmp); + } + return tmp; + } + // otherwise there's no parser, no key, or key's stored already + return NULL; +} + +static const HParserVtable put_vt = { + .parse = parse_put, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_put_value(const HParser* p, const char* name) { + return h_put_value__m(&system_allocator, p, name); +} + +HParser* h_put_value__m(HAllocator* mm__, const HParser* p, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = p; + env->key = name; + return h_new_parser(mm__, &put_vt, env); +} + +/* Retrieve a stashed result from the symbol table. */ + +static HParseResult* parse_get(void *env, HParseState* state) { + HStoredValue *s = (HStoredValue*)env; + if (!s->p && s->key) { + return h_symbol_get(state, s->key); + } else { // either there's no key, or there was a parser here + return NULL; + } +} + +static const HParserVtable get_vt = { + .parse = parse_get, + .isValidRegular = h_false, + .isValidCF = h_false, + .compile_to_rvm = h_not_regular, +}; + +HParser* h_get_value(const char* name) { + return h_get_value__m(&system_allocator, name); +} + +HParser* h_get_value__m(HAllocator* mm__, const char* name) { + HStoredValue *env = h_new(HStoredValue, 1); + env->p = NULL; + env->key = name; + return h_new_parser(mm__, &get_vt, env); +} diff --git a/src/t_bitreader.c b/src/t_bitreader.c index 40a7bb98369dd32696cb536cbd08c16b1a10c2b4..65235c1d36e3ed4406acee6ec93a524efe94aef9 100644 --- a/src/t_bitreader.c +++ b/src/t_bitreader.c @@ -4,14 +4,14 @@ #include "internal.h" #include "test_suite.h" -#define MK_INPUT_STREAM(buf,len,endianness_) \ +#define MK_INPUT_STREAM(buf,len,endianness_) \ { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } static void test_bitreader_ints(void) { @@ -56,7 +56,6 @@ static void test_offset_largebits_le(void) { g_check_cmp_int32(h_read_bits(&is, 11, false), ==, 0x2D3); } - void register_bitreader_tests(void) { g_test_add_func("/core/bitreader/be", test_bitreader_be); g_test_add_func("/core/bitreader/le", test_bitreader_le); diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c index 747c86f2a328d41f1e25bad6fb4c90de3df814e6..6b9b7051fa480b47e9cf173e29d865bdbc4a8943 100644 --- a/src/t_bitwriter.c +++ b/src/t_bitwriter.c @@ -24,7 +24,7 @@ void run_bitwriter_test(bitwriter_test_elem data[], char flags) { .input = buf, .index = 0, .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .bit_offset = 0, .endianness = flags, .overrun = 0 }; diff --git a/src/t_parser.c b/src/t_parser.c index a98eb11f491c0c5d9121df21be4c10730e415fc0..df9567ed201b1d07d1ebdf9e815fd625ba8de5c8 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -495,6 +495,114 @@ static void test_endianness(gconstpointer backend) { g_check_parse_match(lb_u5_, be, "abcd", 4, "u0xc"); } +HParsedToken* act_get(const HParseResult *p, void* user_data) { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = 3 * (1 << p->ast->uint); + return ret; +} + +static void test_put_get(gconstpointer backend) { + HParser *p = h_sequence(h_put_value(h_uint8(), "size"), + h_token((const uint8_t*)"foo", 3), + h_length_value(h_action(h_get_value("size"), + act_get, NULL), + h_uint8()), + NULL); + // Yes, the quotes in the next line look weird. Leave them alone, + // this is to deal with how C strings handle hex-formatted chars. + g_check_parse_match(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcdef", 10, "(u0x1 <66.6f.6f> (u0x61 u0x62 u0x63 u0x64 u0x65 u0x66))"); + g_check_parse_failed(p, (HParserBackend)GPOINTER_TO_INT(backend), "\x01""fooabcde", 9); +} + +static void test_permutation(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p = h_permutation(h_ch('a'), h_ch('b'), h_ch('c'), NULL); + + g_check_parse_match(p, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(p, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_failed(p, be, "a", 1); + g_check_parse_failed(p, be, "ab", 2); + g_check_parse_failed(p, be, "abb", 3); + + const HParser *po = h_permutation(h_ch('a'), h_ch('b'), h_optional(h_ch('c')), NULL); + + g_check_parse_match(po, be, "abc", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "acb", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bac", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "bca", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cab", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "cba", 3, "(u0x61 u0x62 u0x63)"); + g_check_parse_match(po, be, "ab", 2, "(u0x61 u0x62 null)"); + g_check_parse_match(po, be, "ba", 2, "(u0x61 u0x62 null)"); + g_check_parse_failed(po, be, "a", 1); + g_check_parse_failed(po, be, "b", 1); + g_check_parse_failed(po, be, "c", 1); + g_check_parse_failed(po, be, "ca", 2); + g_check_parse_failed(po, be, "cb", 2); + g_check_parse_failed(po, be, "cc", 2); + g_check_parse_failed(po, be, "ccab", 4); + g_check_parse_failed(po, be, "ccc", 3); + + const HParser *po2 = h_permutation(h_optional(h_ch('c')), h_ch('a'), h_ch('b'), NULL); + + g_check_parse_match(po2, be, "abc", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "acb", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bac", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "bca", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cab", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "cba", 3, "(u0x63 u0x61 u0x62)"); + g_check_parse_match(po2, be, "ab", 2, "(null u0x61 u0x62)"); + g_check_parse_match(po2, be, "ba", 2, "(null u0x61 u0x62)"); + g_check_parse_failed(po2, be, "a", 1); + g_check_parse_failed(po2, be, "b", 1); + g_check_parse_failed(po2, be, "c", 1); + g_check_parse_failed(po2, be, "ca", 2); + g_check_parse_failed(po2, be, "cb", 2); + g_check_parse_failed(po2, be, "cc", 2); + g_check_parse_failed(po2, be, "ccab", 4); + g_check_parse_failed(po2, be, "ccc", 3); +} + +static HParser *k_test_bind(HAllocator *mm__, const HParsedToken *p, void *env) { + uint8_t one = (uintptr_t)env; + + assert(p); + assert(p->token_type == TT_SEQUENCE); + + int v=0; + for(size_t i=0; i<p->seq->used; i++) { + assert(p->seq->elements[i]->token_type == TT_UINT); + v = v*10 + p->seq->elements[i]->uint - '0'; + } + + if(v > 26) + return h_nothing_p__m(mm__); // fail + else if(v > 127) + return NULL; // equivalent to the above + else + return h_ch__m(mm__, one - 1 + v); +} +static void test_bind(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *digit = h_ch_range('0', '9'); + const HParser *nat = h_many1(digit); + const HParser *p = h_bind(nat, k_test_bind, (void *)(uintptr_t)'a'); + + g_check_parse_match(p, be, "1a", 2, "u0x61"); + g_check_parse_match(p, be, "2b", 2, "u0x62"); + g_check_parse_match(p, be, "26z", 3, "u0x7a"); + g_check_parse_failed(p, be, "1x", 2); + g_check_parse_failed(p, be, "29y", 3); + g_check_parse_failed(p, be, "@", 1); + g_check_parse_failed(p, be, "27{", 3); + g_check_parse_failed(p, be, "272{", 4); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -542,6 +650,9 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/leftrec-ne", GINT_TO_POINTER(PB_PACKRAT), test_leftrec_ne); g_test_add_data_func("/core/parser/packrat/rightrec", GINT_TO_POINTER(PB_PACKRAT), test_rightrec); g_test_add_data_func("/core/parser/packrat/endianness", GINT_TO_POINTER(PB_PACKRAT), test_endianness); + g_test_add_data_func("/core/parser/packrat/putget", GINT_TO_POINTER(PB_PACKRAT), test_put_get); + g_test_add_data_func("/core/parser/packrat/permutation", GINT_TO_POINTER(PB_PACKRAT), test_permutation); + g_test_add_data_func("/core/parser/packrat/bind", GINT_TO_POINTER(PB_PACKRAT), test_bind); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); diff --git a/src/t_regression.c b/src/t_regression.c new file mode 100644 index 0000000000000000000000000000000000000000..d05cbde0d0419addfac081a4a9292bbc18a007c4 --- /dev/null +++ b/src/t_regression.c @@ -0,0 +1,102 @@ +#include <glib.h> +#include <stdint.h> +#include "glue.h" +#include "hammer.h" +#include "test_suite.h" +#include "internal.h" + +static void test_bug118(void) { + // https://github.com/UpstandingHackers/hammer/issues/118 + // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5 + + HParseResult* p; + const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A"; + +#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN) + H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false))); + H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false))); +#undef MY_ENDIAN + + H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL)); + + H_RULE(header_ok, h_sequence(nibble, nibble, NULL)); + H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL)); + + H_RULE(parser_ok, h_sequence(header_ok, samples, NULL)); + H_RULE(parser_weird, h_sequence(header_weird, samples, NULL)); + + + p = h_parse(parser_weird, input, 6); + g_check_cmp_int32(p->bit_length, ==, 44); + h_parse_result_free(p); + p = h_parse(parser_ok, input, 6); + g_check_cmp_int32(p->bit_length, ==, 40); + h_parse_result_free(p); +} + +static void test_seq_index_path(void) { + HArena *arena = h_new_arena(&system_allocator, 0); + + HParsedToken *seq = h_make_seqn(arena, 1); + HParsedToken *seq2 = h_make_seqn(arena, 2); + HParsedToken *tok1 = h_make_uint(arena, 41); + HParsedToken *tok2 = h_make_uint(arena, 42); + + seq->seq->elements[0] = seq2; + seq->seq->used = 1; + seq2->seq->elements[0] = tok1; + seq2->seq->elements[1] = tok2; + seq2->seq->used = 2; + + g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE); + g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT); + g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41); + g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42); +} + +#define MK_INPUT_STREAM(buf,len,endianness_) \ + { \ + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = 0, \ + .endianness = endianness_ \ + } + +static void test_read_bits_48(void) { + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 32, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 16, false), ==, 0xBC9A); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 31, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x17934); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 33, false), ==, 0x78563412); + g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x5E4D); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 36, false), ==, 0xA78563412); + g_check_cmp_int64(h_read_bits(&is, 12, false), ==, 0xBC9); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 40, false), ==, 0x9A78563412); + g_check_cmp_int64(h_read_bits(&is, 8, false), ==, 0xBC); + } + { + HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmp_int64(h_read_bits(&is, 48, false), ==, 0xBC9A78563412); + } +} + +void register_regression_tests(void) { + g_test_add_func("/core/regression/bug118", test_bug118); + g_test_add_func("/core/regression/seq_index_path", test_seq_index_path); + g_test_add_func("/core/regression/read_bits_48", test_read_bits_48); +} diff --git a/src/test_suite.c b/src/test_suite.c index 81f86b2c5007f11375995ad50751dfcb4618b7f5..cba18e8db9ad4b1187a028c2a2326ae6c1026633 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -25,6 +25,7 @@ extern void register_parser_tests(); extern void register_grammar_tests(); extern void register_misc_tests(); extern void register_benchmark_tests(); +extern void register_regression_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -35,6 +36,7 @@ int main(int argc, char** argv) { register_parser_tests(); register_grammar_tests(); register_misc_tests(); + register_regression_tests(); if (g_test_slow() || g_test_perf()) register_benchmark_tests(); diff --git a/src/test_suite.h b/src/test_suite.h index 1f983c7f752aadaefefa5dd637212c963cdd41d2..9a58a20fc40fe266ae286e047dfb81bed09869c8 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -212,6 +212,7 @@ +#define g_check_cmp_int(n1, op, n2) g_check_inttype("%d", int, n1, op, n2) #define g_check_cmp_int32(n1, op, n2) g_check_inttype("%d", int32_t, n1, op, n2) #define g_check_cmp_int64(n1, op, n2) g_check_inttype("%" PRId64, int64_t, n1, op, n2) #define g_check_cmp_uint32(n1, op, n2) g_check_inttype("%u", uint32_t, n1, op, n2)