diff --git a/common.mk b/common.mk index de00ecbf0e2f0f58ff033d547f85ce200b67883e..5a1fa5016b67a4a622ea718e2efe997508055102 100644 --- a/common.mk +++ b/common.mk @@ -1,4 +1,4 @@ -CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter +CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes LDFLAGS := $(shell pkg-config --libs glib-2.0) CC := gcc # Set V=1 for verbose mode... diff --git a/src/Makefile b/src/Makefile index 0e71c1897e074339d7a0735479371814bfe79e43..d362118c293cd5d66cfc2a0e1e1e69559de1dc15 100644 --- a/src/Makefile +++ b/src/Makefile @@ -2,6 +2,9 @@ OUTPUTS := bitreader.o \ hammer.o \ libhammer.a \ + pprint.o \ + allocator.o \ + datastructures.o \ test_suite TOPLEVEL := ../ @@ -14,7 +17,7 @@ all: libhammer.a test_suite test_suite: test_suite.o libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -libhammer.a: bitreader.o hammer.o pprint.o allocator.o +libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitreader.o: test_suite.h hammer.o: hammer.h diff --git a/src/allocator.c b/src/allocator.c index 2d446f1aca4b9b2222b7e60d47f8a52053c8de1f..ba35fd347a4c58058732234dacff5c846bfdd4c5 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -60,10 +60,10 @@ void* arena_malloc(arena_t arena, size_t size) { if (size <= arena->head->free) { // fast path.. void* ret = arena->head->rest + arena->head->used; - arena->used += size; + arena->used += size + 1; arena->wasted -= size; - arena->head->used += size; - arena->head->free -= size; + arena->head->used += size + 1; + arena->head->free -= size + 1; return ret; } else if (size > arena->block_size) { // We need a new, dedicated block for it, because it won't fit in a standard sized one. @@ -73,7 +73,7 @@ void* arena_malloc(arena_t arena, size_t size) { void* link = g_malloc(size + sizeof(struct arena_link*)); *(struct arena_link**)link = arena->head->next; arena->head->next = (struct arena_link*)link; - return (void*)((uint8_t*)link + sizeof(struct arena_link*)); + return (void*)(((uint8_t*)link) + sizeof(struct arena_link*)); } else { // we just need to allocate an ordinary new block. struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size); diff --git a/src/datastructures.c b/src/datastructures.c new file mode 100644 index 0000000000000000000000000000000000000000..7954c434910e882bbf33bdef8aa11ecf639b436a --- /dev/null +++ b/src/datastructures.c @@ -0,0 +1,32 @@ +#include "internal.h" +#include "hammer.h" +#include "allocator.h" +#include <assert.h> +#include <malloc.h> +// {{{ counted arrays + + +counted_array_t *carray_new_sized(arena_t arena, size_t size) { + counted_array_t *ret = arena_malloc(arena, sizeof(counted_array_t)); + assert(size > 0); + ret->used = 0; + ret->capacity = size; + ret->arena = arena; + ret->elements = arena_malloc(arena, sizeof(void*) * size); + return ret; +} +counted_array_t *carray_new(arena_t arena) { + return carray_new_sized(arena, 4); +} + +void carray_append(counted_array_t *array, void* item) { + if (array->used >= array->capacity) { + void **elements = arena_malloc(array->arena, (array->capacity *= 2) * sizeof(counted_array_t*)); + for (size_t i = 0; i < array->used; i++) + elements[i] = array->elements[i]; + for (size_t i = array->used; i < array->capacity; i++) + elements[i] = 0; + array->elements = elements; + } + array->elements[array->used++] = item; +} diff --git a/src/hammer.c b/src/hammer.c index ddafaabe539de95cb8ce4b22406b8ff3ce4aa419..0aed6a88518dea1cacb60defa70da461259b9d99 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -37,7 +37,9 @@ guint djbhash(const uint8_t *buf, size_t len) { parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) { // TODO(thequux): add caching here. - parser_cache_key_t *key = a_new(parser_cache_key_t, 1); + parser_cache_key_t *key; + key = a_new(parser_cache_key_t, 1); + memset(key, 0, sizeof(*key)); key->input_pos = state->input_stream; key->parser = parser; @@ -232,7 +234,7 @@ typedef struct { static parse_result_t* parse_sequence(void *env, parse_state_t *state) { sequence_t *s = (sequence_t*)env; - GSequence *seq = g_sequence_new(NULL); + counted_array_t *seq = carray_new_sized(state->arena, (s->len > 0) ? s->len : 4); for (size_t i=0; i<s->len; ++i) { parse_result_t *tmp = do_parse(s->p_array[i], state); // if the interim parse fails, the whole thing fails @@ -240,7 +242,7 @@ static parse_result_t* parse_sequence(void *env, parse_state_t *state) { return NULL; } else { if (tmp->ast) - g_sequence_append(seq, (void*)tmp->ast); + carray_append(seq, (void*)tmp->ast); } } parsed_token_t *tok = a_new(parsed_token_t, 1); @@ -320,36 +322,34 @@ typedef struct { const parser_t *p2; } two_parsers_t; -void accumulate_size(gpointer pr, gpointer acc) { - size_t tmp = GPOINTER_TO_SIZE(acc); +size_t accumulate_size(parse_result_t *pr) { if (NULL != ((parse_result_t*)pr)->ast) { - switch(((parse_result_t*)pr)->ast->token_type) { + switch (pr->ast->token_type) { case TT_BYTES: - tmp += ((parse_result_t*)pr)->ast->bytes.len; - acc = GSIZE_TO_POINTER(tmp); - break; + return pr->ast->bytes.len; case TT_SINT: case TT_UINT: - tmp += 8; - acc = GSIZE_TO_POINTER(tmp); - break; - case TT_SEQUENCE: - g_sequence_foreach(((parse_result_t*)pr)->ast->seq, accumulate_size, acc); - break; + return sizeof(pr->ast->uint); + case TT_SEQUENCE: { + counted_array_t *arr = pr->ast->seq; + size_t ret = 0; + for (size_t i = 0; i < arr->used; i++) + ret += accumulate_size(arr->elements[i]); + return ret; + } default: - break; + return 0; } } // no else, if the AST is null then acc doesn't change + return 0; } size_t token_length(parse_result_t *pr) { - size_t ret = 0; if (NULL == pr) { - return ret; + return 0; } else { - accumulate_size(pr, GSIZE_TO_POINTER(ret)); + return accumulate_size(pr); } - return ret; } static parse_result_t* parse_butnot(void *env, parse_state_t *state) { @@ -464,9 +464,10 @@ typedef struct { size_t count; bool min_p; } repeat_t; + static parse_result_t *parse_many(void* env, parse_state_t *state) { repeat_t *env_ = (repeat_t*) env; - GSequence *seq = g_sequence_new(NULL); + counted_array_t *seq = carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4)); size_t count = 0; input_stream_t bak; while (env_->min_p || env_->count > count) { @@ -480,7 +481,7 @@ static parse_result_t *parse_many(void* env, parse_state_t *state) { if (!elem) goto err0; if (elem->ast) - g_sequence_append(seq, (gpointer)elem->ast); + carray_append(seq, (void*)elem->ast); count++; } if (count < env_->count) @@ -497,7 +498,6 @@ static parse_result_t *parse_many(void* env, parse_state_t *state) { goto succ; } err: - g_sequence_free(seq); state->input_stream = bak; return NULL; } @@ -653,10 +653,10 @@ parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t lengt parse_state->input_stream.length = length; parse_state->arena = arena; parse_result_t *res = do_parse(parser, parse_state); - // tear down the parse state. For now, leak like a sieve. - // BUG: Leaks like a sieve. - // TODO(thequux): Pull in the arena allocator. + // tear down the parse state g_hash_table_destroy(parse_state->cache); + if (!res) + delete_arena(parse_state->arena); return res; } @@ -851,11 +851,12 @@ static void test_xor(void) { static void test_many(void) { const parser_t *many_ = many(choice(ch('a'), ch('b'), NULL)); - - g_check_parse_ok(many_, "adef", 4, "(s0x61)"); - g_check_parse_ok(many_, "bdef", 4, "(s0x62)"); - g_check_parse_ok(many_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); - g_check_parse_ok(many_, "daabbabadef", 11, "()"); + for (int i = 0; i < 100; i++) { + g_check_parse_ok(many_, "adef", 4, "(s0x61)"); + g_check_parse_ok(many_, "bdef", 4, "(s0x62)"); + g_check_parse_ok(many_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)"); + g_check_parse_ok(many_, "daabbabadef", 11, "()"); + } } static void test_many1(void) { diff --git a/src/hammer.h b/src/hammer.h index 054e3f82b001c0ba9fd1c3ccb50bff99bcd0ff27..adbfcc72e63de3145445ddcc034205f370d3079a 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -61,6 +61,13 @@ typedef enum token_type { TT_MAX } token_type_t; +typedef struct counted_array { + size_t capacity; + size_t used; + arena_t arena; + void **elements; +} counted_array_t; + typedef struct parsed_token { token_type_t token_type; union { @@ -72,10 +79,12 @@ typedef struct parsed_token { uint64_t uint; double dbl; float flt; - GSequence *seq; // a sequence of parsed_token_t's + counted_array_t *seq; // a sequence of parsed_token_t's }; } parsed_token_t; + + /* If a parse fails, the parse result will be NULL. * If a parse is successful but there's nothing there (i.e., if end_p succeeds) then there's a parse result but its ast is NULL. */ diff --git a/src/internal.h b/src/internal.h index 1a65a071fde698737545f5d4905667a649a12cb2..6f500791ffe8c7c1fee5696e6c60c1a3fd1de7d5 100644 --- a/src/internal.h +++ b/src/internal.h @@ -32,6 +32,7 @@ #define false 0 #define true 1 + typedef struct parser_cache_key { input_stream_t input_pos; const parser_t *parser; @@ -81,4 +82,13 @@ guint djbhash(const uint8_t *buf, size_t len); char* write_result_unamb(const parsed_token_t* tok); void pprint(const parsed_token_t* tok, int indent, int delta); +counted_array_t *carray_new_sized(arena_t arena, size_t size); +counted_array_t *carray_new(arena_t arena); +void carray_append(counted_array_t *array, void* item); + +#if 0 +#include <malloc.h> +#define arena_malloc(a, s) malloc(s) +#endif + #endif // #ifndef HAMMER_INTERNAL__H diff --git a/src/pprint.c b/src/pprint.c index 106e7844f2bc3ca1c010703c4d0d3888403af672..c7f58c918434bf7263311431bbc25a5f31ffd925 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -52,13 +52,9 @@ void pprint(const parsed_token_t* tok, int indent, int delta) { printf("%*ss %#lx\n", indent, "", tok->uint); break; case TT_SEQUENCE: { - GSequenceIter *it; printf("%*s[\n", indent, ""); - for (it = g_sequence_get_begin_iter(tok->seq); - !g_sequence_iter_is_end(it); - it = g_sequence_iter_next(it)) { - parsed_token_t* subtok = g_sequence_get(it); - pprint(subtok, indent + delta, delta); + for (size_t i = 0; i < tok->seq->used; i++) { + pprint(tok->seq->elements[i], indent + delta, delta); } printf("%*s]\n", indent, ""); } // TODO: implement this @@ -123,18 +119,11 @@ static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) { append_buf(buf, "ERR", 3); break; case TT_SEQUENCE: { - GSequenceIter *it; - int at_begin = 1; append_buf_c(buf, '('); - for (it = g_sequence_get_begin_iter(tok->seq); - !g_sequence_iter_is_end(it); - it = g_sequence_iter_next(it)) { - parsed_token_t* subtok = g_sequence_get(it); - if (at_begin) - at_begin = 0; - else + for (size_t i = 0; i < tok->seq->used; i++) { + if (i > 0) append_buf_c(buf, ' '); - unamb_sub(subtok, buf); + unamb_sub(tok->seq->elements[i], buf); } append_buf_c(buf, ')'); } @@ -153,6 +142,7 @@ char* write_result_unamb(const parsed_token_t* tok) { .capacity = 16 }; unamb_sub(tok, &buf); + append_buf_c(&buf, 0); return buf.output; } diff --git a/src/test_suite.h b/src/test_suite.h index f313e5d3b5c3e4078f6d64c14996364fa34d9ec9..d27d283c062d121c17cb25e87c14122d8b6bc37c 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -17,6 +17,7 @@ #ifndef HAMMER_TEST_SUITE__H #define HAMMER_TEST_SUITE__H +#include <malloc.h> // Equivalent to g_assert_*, but not using g_assert... #define g_check_inttype(fmt, typ, n1, op, n2) do { \ @@ -76,12 +77,14 @@ } else { \ char* cres = write_result_unamb(res->ast); \ g_check_string(cres, ==, result); \ + g_free(cres); \ arena_stats_t stats; \ allocator_stats(res->arena, &stats); \ g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ "Inefficiency: %5f%%", \ stats.used, stats.wasted, \ stats.wasted * 100. / (stats.used+stats.wasted)); \ + delete_arena(res->arena); \ } \ } while(0)