diff --git a/src/SConscript b/src/SConscript index cb6469673c973c16c16030485f91e92143a9c17b..325decf824628e268798b1737767061fe133b82c 100644 --- a/src/SConscript +++ b/src/SConscript @@ -35,6 +35,7 @@ parsers = ['parsers/%s.c'%s for s in 'attr_bool', 'bind', 'bits', + 'bytes', 'butnot', 'ch', 'charset', diff --git a/src/hammer.h b/src/hammer.h index d84497d9a7f1ce302efea3cedea710f23834331e..00df5bceb381d3a02e112a5eb65691c4806ae6a8 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -487,6 +487,14 @@ HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, con */ HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign); +/** + * Returns a parser that parses the specified number of octets. + * The input does not have to be aligned to a byte boundary. + * + * Result token type: TT_BYTES + */ +HAMMER_FN_DECL(HParser*, h_bytes, size_t len); + /** * Returns a parser that parses a signed 8-byte integer value. * diff --git a/src/parsers/bytes.c b/src/parsers/bytes.c new file mode 100644 index 0000000000000000000000000000000000000000..3c582888653cfaf171873e0c082453a68a0d61b1 --- /dev/null +++ b/src/parsers/bytes.c @@ -0,0 +1,43 @@ +#include "parser_internal.h" + +struct bytes_env { + size_t length; +}; + +static HParseResult *parse_bytes(void *env_, HParseState *state) +{ + struct bytes_env *env = env_; + uint8_t *bs; + size_t i; + + bs = a_new(uint8_t, env->length); + for (i=0; i < env->length && !state->input_stream.overrun; i++) + bs[i] = h_read_bits(&state->input_stream, 8, false); + + HParsedToken *result = a_new(HParsedToken, 1); + result->token_type = TT_BYTES; + result->bytes.token = bs; + result->bytes.len = env->length; + result->index = 0; + result->bit_length = 0; + result->bit_offset = 0; + return make_result(state->arena, result); +} + +static const HParserVtable bytes_vt = { + .parse = parse_bytes, + .isValidRegular = h_false, // XXX need desugar_bytes, reshape_bytes + .isValidCF = h_false, // XXX need bytes_ctrvm +}; + +HParser *h_bytes(size_t len) +{ + return h_bytes__m(&system_allocator, len); +} + +HParser *h_bytes__m(HAllocator *mm__, size_t len) +{ + struct bytes_env *env = h_new(struct bytes_env, 1); + env->length = len; + return h_new_parser(mm__, &bytes_vt, env); +} diff --git a/src/t_parser.c b/src/t_parser.c index dbeaabde7c4573018d0564383ec1a7f5d3069017..2c6092ab39cedbe59eeb9ae6b8613277b1bc9330 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -65,6 +65,29 @@ static void test_bits(gconstpointer backend) { g_check_parse_failed(bits_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1); } +static void test_bytes(gconstpointer backend) { + HParserBackend be = (HParserBackend)GPOINTER_TO_INT(backend); + const HParser *p; + + p = h_bytes(0); + g_check_parse_match(p, be, "", 0, "<>"); + g_check_parse_match(p, be, "abc", 3, "<>"); + + p = h_bytes(1); + g_check_parse_failed(p, be, "", 0); + g_check_parse_match(p, be, " ", 1, "<20>"); + g_check_parse_match(p, be, "abc", 3, "<61>"); + + p = h_bytes(5); + g_check_parse_failed(p, be, "", 0); + g_check_parse_failed(p, be, "1", 1); + g_check_parse_failed(p, be, "12", 2); + g_check_parse_failed(p, be, "123", 3); + g_check_parse_failed(p, be, "1234", 4); + g_check_parse_match(p, be, "12345", 5, "<31.32.33.34.35>"); + g_check_parse_match(p, be, "12345abc", 8, "<31.32.33.34.35>"); +} + //@MARK_START static void test_int64(gconstpointer backend) { const HParser *int64_ = h_int64(); @@ -985,6 +1008,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/ch_range", GINT_TO_POINTER(PB_PACKRAT), test_ch_range); g_test_add_data_func("/core/parser/packrat/bits0", GINT_TO_POINTER(PB_PACKRAT), test_bits0); g_test_add_data_func("/core/parser/packrat/bits", GINT_TO_POINTER(PB_PACKRAT), test_bits); + g_test_add_data_func("/core/parser/packrat/bytes", GINT_TO_POINTER(PB_PACKRAT), test_bytes); g_test_add_data_func("/core/parser/packrat/int64", GINT_TO_POINTER(PB_PACKRAT), test_int64); g_test_add_data_func("/core/parser/packrat/int32", GINT_TO_POINTER(PB_PACKRAT), test_int32); g_test_add_data_func("/core/parser/packrat/int16", GINT_TO_POINTER(PB_PACKRAT), test_int16);