diff --git a/.gitignore b/.gitignore index 1a1ad509c47b8ad6eabc36103df41d623e3744ef..65465f4273f6d08208f273676fc465345e01c4f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,28 +1,36 @@ +# generated files *.o -*~ +*.os +*.so *.a +*.gem +*.pyc *.class -*.so -jni/com*.h -src/test_suite -lib/hush +libhammer.pc +build/ examples/dns examples/base64 examples/base64_sem1 examples/base64_sem2 -TAGS -*.swp -*.swo +jni/com*.h +src/test_suite + +# coverage and profiling stuff +*.gcov +*.gcda +*.gcno +gmon.out + +# editor leftovers +*~ +*.sw? \#* + +# misc +lib/hush +TAGS docs/milestone2.dot.pdf *.dot.pdf Session.vim -*.gcov cscope.out -build/ -libhammer.pc .sconsign.dblite -*.os -*.pyc -*.gem -/bin/ diff --git a/SConstruct b/SConstruct index e03d2728fcdedc93cc11828d1d8ba1832a723dc1..1aea467fe3cf8b5869039b5b2c6687fd333f939f 100644 --- a/SConstruct +++ b/SConstruct @@ -73,6 +73,12 @@ AddOption('--coverage', action='store_true', help='Build with coverage instrumentation') +AddOption('--force-debug', + dest='force_debug', + default=False, + action='store_true', + help='Build with debug symbols, even in the opt variant') + AddOption('--gprof', dest='gprof', default=False, @@ -112,7 +118,8 @@ if env['CC'] == 'cl': ] ) else: - env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable') + # -Wno-clobbered only really works with gcc >= 4.2.x, but ... scons + env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable -Wno-clobbered') # Linker options if env['PLATFORM'] == 'darwin': @@ -126,21 +133,25 @@ else: env.MergeFlags('-lrt') if GetOption('coverage'): - env.Append(CFLAGS=['--coverage'], - CXXFLAGS=['--coverage'], - LDFLAGS=['--coverage']) + env.Append(CCFLAGS=['--coverage'], + LDFLAGS=['--coverage'], + LINKFLAGS=['--coverage']) if env['CC'] == 'gcc': env.Append(LIBS=['gcov']) else: env.ParseConfig('llvm-config --ldflags') +if GetOption('force_debug'): + if env['CC'] == 'cl': + env.Append(CCFLAGS=['/Z7']) + else: + env.Append(CCFLAGS=['-g']) + if GetOption('gprof'): if env['CC'] == 'gcc' and env['CXX'] == 'g++': - env.Append(CFLAGS=['-pg', '-fprofile-arcs'], - CXXFLAGS=['-pg', '-fprofile-arcs'], - LDFLAGS=['-pg', '-fprofile-arcs'], - LINKFLAGS=['-pg', '-fprofile-arcs']) - env.Append(LIBS=['gcov']) + env.Append(CCFLAGS=['-pg'], + LDFLAGS=['-pg'], + LINKFLAGS=['-pg']) env['GPROF'] = 1 else: print("Can only use gprof with gcc") diff --git a/src/allocator.c b/src/allocator.c index cc259e605c56573b506f39194793e804ab4bf8b6..2ff5cacaa0e05da47ac851ef1ff71239ed5cde3b 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -29,24 +29,35 @@ struct arena_link { // For efficiency, we should probably allocate the arena links in // their own slice, and link to a block directly. That can be // implemented later, though, with no change in interface. - struct arena_link *next; // It is crucial that this be the first item; so that - // any arena link can be casted to struct arena_link**. - + struct arena_link *next; size_t free; size_t used; uint8_t rest[]; -} ; +}; struct HArena_ { struct arena_link *head; struct HAllocator_ *mm__; + /* does mm__ zero blocks for us? */ + bool malloc_zeros; size_t block_size; size_t used; size_t wasted; +#ifdef DETAILED_ARENA_STATS + size_t mm_malloc_count, mm_malloc_bytes; + size_t memset_count, memset_bytes; + size_t arena_malloc_count, arena_malloc_bytes; + size_t arena_su_malloc_count, arena_su_malloc_bytes; + size_t arena_si_malloc_count, arena_si_malloc_bytes; + size_t arena_lu_malloc_count, arena_lu_malloc_bytes; + size_t arena_li_malloc_count, arena_li_malloc_bytes; +#endif jmp_buf *except; }; +static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero); + void* h_alloc(HAllocator* mm__, size_t size) { void *p = mm__->alloc(mm__, size); if(!p) @@ -61,7 +72,6 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) { struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size); assert(ret != NULL); assert(link != NULL); - memset(link, 0, sizeof(struct arena_link) + block_size); link->free = block_size; link->used = 0; link->next = NULL; @@ -69,6 +79,19 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) { ret->block_size = block_size; ret->used = 0; ret->mm__ = mm__; +#ifdef DETAILED_ARENA_STATS + ret->mm_malloc_count = 2; + ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size; + ret->memset_count = 0; + ret->memset_bytes = 0; + ret->arena_malloc_count = ret->arena_malloc_bytes = 0; + ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0; + ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0; + ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0; + ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0; +#endif + /* XXX provide a mechanism to indicate mm__ returns zeroed blocks */ + ret->malloc_zeros = false; ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size; ret->except = NULL; return ret; @@ -90,39 +113,120 @@ static void *alloc_block(HArena *arena, size_t size) return block; } -void* h_arena_malloc(HArena *arena, size_t size) { +void * h_arena_malloc_noinit(HArena *arena, size_t size) { + return h_arena_malloc_raw(arena, size, false); +} + +void * h_arena_malloc(HArena *arena, size_t size) { + return h_arena_malloc_raw(arena, size, true); +} + +static void * h_arena_malloc_raw(HArena *arena, size_t size, + bool need_zero) { + struct arena_link *link = NULL; + void *ret = NULL; + if (size <= arena->head->free) { - // fast path.. - void* ret = arena->head->rest + arena->head->used; + /* fast path.. */ + ret = arena->head->rest + arena->head->used; arena->used += size; arena->wasted -= size; arena->head->used += size; arena->head->free -= size; - return ret; + +#ifdef DETAILED_ARENA_STATS + ++(arena->arena_malloc_count); + arena->arena_malloc_bytes += size; + if (need_zero) { + ++(arena->arena_si_malloc_count); + arena->arena_si_malloc_bytes += size; + } else { + ++(arena->arena_su_malloc_count); + arena->arena_su_malloc_bytes += size; + } +#endif } else if (size > arena->block_size) { - // We need a new, dedicated block for it, because it won't fit in a standard sized one. - // This involves some annoying casting... - arena->used += size; - arena->wasted += sizeof(struct arena_link*); - void* link = alloc_block(arena, size + sizeof(struct arena_link*)); + /* + * We need a new, dedicated block for it, because it won't fit in a + * standard sized one. + * + * NOTE: + * + * We used to do a silly casting dance to treat blocks like this + * as special cases and make the used/free fields part of the allocated + * block, but the old code was not really proper portable C and depended + * on a bunch of implementation-specific behavior. We could have done it + * better with a union in struct arena_link, but the memory savings is + * only 0.39% for a 64-bit machine, a 4096-byte block size and all + * large allocations *only just one byte* over the block size, so I + * question the utility of it. We do still slip the large block in + * one position behind the list head so it doesn't cut off a partially + * filled list head. + * + * -- andrea + */ + link = alloc_block(arena, size + sizeof(struct arena_link)); assert(link != NULL); - memset(link, 0, size + sizeof(struct arena_link*)); - *(struct arena_link**)link = arena->head->next; - arena->head->next = (struct arena_link*)link; - return (void*)(((uint8_t*)link) + sizeof(struct arena_link*)); + arena->used += size; + arena->wasted += sizeof(struct arena_link); + link->used = size; + link->free = 0; + link->next = arena->head->next; + arena->head->next = link; + ret = link->rest; + +#ifdef DETAILED_ARENA_STATS + ++(arena->arena_malloc_count); + arena->arena_malloc_bytes += size; + if (need_zero) { + ++(arena->arena_li_malloc_count); + arena->arena_li_malloc_bytes += size; + } else { + ++(arena->arena_lu_malloc_count); + arena->arena_lu_malloc_bytes += size; + } +#endif } else { - // we just need to allocate an ordinary new block. - struct arena_link *link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size); + /* we just need to allocate an ordinary new block. */ + link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size); assert(link != NULL); - memset(link, 0, sizeof(struct arena_link) + arena->block_size); +#ifdef DETAILED_ARENA_STATS + ++(arena->mm_malloc_count); + arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size; +#endif link->free = arena->block_size - size; link->used = size; link->next = arena->head; arena->head = link; arena->used += size; arena->wasted += sizeof(struct arena_link) + arena->block_size - size; - return link->rest; + ret = link->rest; + +#ifdef DETAILED_ARENA_STATS + ++(arena->arena_malloc_count); + arena->arena_malloc_bytes += size; + if (need_zero) { + ++(arena->arena_si_malloc_count); + arena->arena_si_malloc_bytes += size; + } else { + ++(arena->arena_su_malloc_count); + arena->arena_su_malloc_bytes += size; + } +#endif } + + /* + * Zeroize if necessary + */ + if (need_zero && !(arena->malloc_zeros)) { + memset(ret, 0, size); +#ifdef DETAILED_ARENA_STATS + ++(arena->memset_count); + arena->memset_bytes += size; +#endif + } + + return ret; } void h_arena_free(HArena *arena, void* ptr) { @@ -146,4 +250,20 @@ void h_delete_arena(HArena *arena) { void h_allocator_stats(HArena *arena, HArenaStats *stats) { stats->used = arena->used; stats->wasted = arena->wasted; +#ifdef DETAILED_ARENA_STATS + stats->mm_malloc_count = arena->mm_malloc_count; + stats->mm_malloc_bytes = arena->mm_malloc_bytes; + stats->memset_count = arena->memset_count; + stats->memset_bytes = arena->memset_bytes; + stats->arena_malloc_count = arena->arena_malloc_count; + stats->arena_malloc_bytes = arena->arena_malloc_bytes; + stats->arena_su_malloc_count = arena->arena_su_malloc_count; + stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes; + stats->arena_si_malloc_count = arena->arena_si_malloc_count; + stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes; + stats->arena_lu_malloc_count = arena->arena_lu_malloc_count; + stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes; + stats->arena_li_malloc_count = arena->arena_li_malloc_count; + stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes; +#endif } diff --git a/src/allocator.h b/src/allocator.h index dc88af68f22895f584065a491463b3f8576c09e9..06d1e6f59dd32987979079c4a7b01d09b13547e6 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -38,6 +38,8 @@ extern "C" { # define ATTR_MALLOC(n) #endif +/* #define DETAILED_ARENA_STATS */ + // TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer. typedef struct HAllocator_ { void* (*alloc)(struct HAllocator_* allocator, size_t size); @@ -51,6 +53,7 @@ typedef struct HArena_ HArena ; // hidden implementation HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... +void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2); void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2); void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); @@ -59,6 +62,26 @@ void h_arena_set_except(HArena *arena, jmp_buf *except); typedef struct { size_t used; size_t wasted; +#ifdef DETAILED_ARENA_STATS + size_t mm_malloc_count; + size_t mm_malloc_bytes; + size_t memset_count; + size_t memset_bytes; + size_t arena_malloc_count; + size_t arena_malloc_bytes; + /* small, uninited */ + size_t arena_su_malloc_count; + size_t arena_su_malloc_bytes; + /* small, inited */ + size_t arena_si_malloc_count; + size_t arena_si_malloc_bytes; + /* large, uninited */ + size_t arena_lu_malloc_count; + size_t arena_lu_malloc_bytes; + /* large, inited */ + size_t arena_li_malloc_count; + size_t arena_li_malloc_bytes; +#endif } HArenaStats; void h_allocator_stats(HArena *arena, HArenaStats *stats); diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 276dfd171f4c8a13ab68953a69f5bbd733c522ab..ab0f8748d963b141375f54525b7217e978f43ff4 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -3,6 +3,17 @@ #include "../internal.h" #include "../parsers/parser_internal.h" +/* #define DETAILED_PACKRAT_STATISTICS */ + +#ifdef DETAILED_PACKRAT_STATISTICS +static size_t packrat_hash_count = 0; +static size_t packrat_hash_bytes = 0; +static size_t packrat_cmp_count = 0; +static size_t packrat_cmp_bytes = 0; +#endif + +static uint32_t cache_key_hash(const void* key); + // short-hand for creating lowlevel parse cache values (parse result case) static HParserCacheValue * cached_result(HParseState *state, HParseResult *result) { @@ -56,31 +67,38 @@ static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HPa return tmp_res; } -HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { - HParserCacheValue *cached = h_hashtable_get(state->cache, k); +HParserCacheValue* recall(HParserCacheKey *k, HParseState *state, HHashValue keyhash) { + HParserCacheValue *cached = h_hashtable_get_precomp(state->cache, k, keyhash); HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos); - if (!head) { // No heads found + + if (!head) { + /* No heads found */ return cached; - } else { // Some heads found + } else { + /* Some heads found */ if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { - // Nothing in the cache, and the key parser is not involved + /* Nothing in the cache, and the key parser is not involved */ cached = cached_result(state, NULL); cached->input_stream = k->input_pos; } if (h_slist_find(head->eval_set, k->parser)) { - // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. + /* + * Something is in the cache, and the key parser is in the eval set. + * Remove the key parser from the eval set of the head. + */ head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // update the cache + /* update the cache */ if (!cached) { - cached = cached_result(state, tmp_res); - h_hashtable_put(state->cache, k, cached); + cached = cached_result(state, tmp_res); + h_hashtable_put_precomp(state->cache, k, cached, keyhash); } else { - cached->value_type = PC_RIGHT; - cached->right = tmp_res; - cached->input_stream = state->input_stream; + cached->value_type = PC_RIGHT; + cached->right = tmp_res; + cached->input_stream = state->input_stream; } } + return cached; } } @@ -180,36 +198,50 @@ HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growab /* Warth's recursion. Hi Alessandro! */ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { HParserCacheKey *key = a_new(HParserCacheKey, 1); + HHashValue keyhash; + HLeftRec *base = NULL; + HParserCacheValue *m = NULL, *cached = NULL; + key->input_pos = state->input_stream; key->parser = parser; - HParserCacheValue *m = NULL; + keyhash = cache_key_hash(key); + if (parser->vtable->higher) { - m = recall(key, state); + m = recall(key, state, keyhash); } - // check to see if there is already a result for this object... + + /* check to see if there is already a result for this object... */ if (!m) { - // It doesn't exist, so create a dummy result to cache - HLeftRec *base = NULL; - // But only cache it now if there's some chance it could grow; primitive parsers can't + /* + * But only cache it now if there's some chance it could grow; primitive + * parsers can't + */ if (parser->vtable->higher) { base = a_new(HLeftRec, 1); base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); - // cache it - h_hashtable_put(state->cache, key, cached_lr(state, base)); - // parse the input + /* cache it */ + h_hashtable_put_precomp(state->cache, key, + cached_lr(state, base), keyhash); } + + /* parse the input */ HParseResult *tmp_res = perform_lowlevel_parse(state, parser); if (parser->vtable->higher) { - // the base variable has passed equality tests with the cache + /* the base variable has passed equality tests with the cache */ h_slist_pop(state->lr_stack); - // update the cached value to our new position - HParserCacheValue *cached = h_hashtable_get(state->cache, key); + /* update the cached value to our new position */ + cached = h_hashtable_get_precomp(state->cache, key, keyhash); assert(cached != NULL); cached->input_stream = state->input_stream; } - // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one + + /* + * setupLR, used below, mutates the LR to have a head if appropriate, + * so we check to see if we have one + */ if (!base || NULL == base->head) { - h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); + h_hashtable_put_precomp(state->cache, key, + cached_result(state, tmp_res), keyhash); return tmp_res; } else { base->seed = tmp_res; @@ -217,7 +249,7 @@ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { return res; } } else { - // it exists! + /* it exists! */ state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); @@ -239,17 +271,34 @@ void h_packrat_free(HParser *parser) { } static uint32_t cache_key_hash(const void* key) { +#ifdef DETAILED_PACKRAT_STATISTICS + ++(packrat_hash_count); + packrat_hash_bytes += sizeof(HParserCacheKey); +#endif return h_djbhash(key, sizeof(HParserCacheKey)); } + static bool cache_key_equal(const void* key1, const void* key2) { +#ifdef DETAILED_PACKRAT_STATISTICS + ++(packrat_cmp_count); + packrat_cmp_bytes += sizeof(HParserCacheKey); +#endif return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } static uint32_t pos_hash(const void* key) { +#ifdef DETAILED_PACKRAT_STATISTICS + ++(packrat_hash_count); + packrat_hash_bytes += sizeof(HInputStream); +#endif return h_djbhash(key, sizeof(HInputStream)); } static bool pos_equal(const void* key1, const void* key2) { +#ifdef DETAILED_PACKRAT_STATISTICS + ++(packrat_cmp_count); + packrat_cmp_bytes += sizeof(HInputStream); +#endif return memcmp(key1, key2, sizeof(HInputStream)) == 0; } @@ -271,6 +320,7 @@ HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HInputStr parse_state->lr_stack = h_slist_new(arena); parse_state->recursion_heads = h_hashtable_new(arena, pos_equal, pos_hash); parse_state->arena = arena; + parse_state->symbol_table = NULL; HParseResult *res = h_do_parse(parser, parse_state); h_slist_free(parse_state->lr_stack); h_hashtable_free(parse_state->recursion_heads); diff --git a/src/backends/regex.c b/src/backends/regex.c index c10c25890fd5bfdf5e3e9b37a64e988fd3010749..f26abfda67af76900010053c6a6003fad1df55e7 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -7,6 +7,8 @@ #undef a_new #define a_new(typ, count) a_new_(arena, typ, count) +#undef a_new0 +#define a_new0(typ, count) a_new0_(arena, typ, count) // Stack VM typedef enum HSVMOp_ { SVM_PUSH, // Push a mark. There is no VM insn to push an object. @@ -67,13 +69,13 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ goto end; HSArray *heads_n = heads_a, *heads_p = heads_b; - uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued - HRVMThread *ip_queue = a_new(HRVMThread, prog->length); + uint8_t *insn_seen = a_new0(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued + HRVMThread *ip_queue = a_new0(HRVMThread, prog->length); size_t ipq_top; #define THREAD ip_queue[ipq_top-1] #define PUSH_SVM(op_, arg_) do { \ - HRVMTrace *nt = a_new(HRVMTrace, 1); \ + HRVMTrace *nt = a_new0(HRVMTrace, 1); \ nt->arg = (arg_); \ nt->opcode = (op_); \ nt->next = THREAD.trace; \ @@ -81,7 +83,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ THREAD.trace = nt; \ } while(0) - ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread + ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new0(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread size_t off = 0; int live_threads = 1; // May be redundant @@ -257,7 +259,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, if (!svm_stack_ensure_cap(mm__, ctx, 1)) { goto fail; } - tmp_res = a_new(HParsedToken, 1); + tmp_res = a_new0(HParsedToken, 1); tmp_res->token_type = TT_MARK; tmp_res->index = cur->input_pos; tmp_res->bit_offset = 0; @@ -288,7 +290,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, break; case SVM_ACCEPT: assert(ctx->stack_count <= 1); - HParseResult *res = a_new(HParseResult, 1); + HParseResult *res = a_new0(HParseResult, 1); if (ctx->stack_count == 1) { res->ast = ctx->stack[0]; } else { diff --git a/src/datastructures.c b/src/datastructures.c index 451afb94ec39932dfe1f8c58aa82c0777f73b011..6971e0e0bee2fc8bbc644a0c6d9f9967d4ab6a01 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -9,12 +9,14 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) { - HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray)); + /* _noinit here because we init all the elements below */ + HCountedArray *ret = h_arena_malloc_noinit(arena, sizeof(HCountedArray)); if (size == 0) size = 1; ret->used = 0; ret->capacity = size; ret->arena = arena; + /* we actually want to zero these */ ret->elements = h_arena_malloc(arena, sizeof(void*) * size); return ret; } @@ -24,12 +26,21 @@ HCountedArray *h_carray_new(HArena * arena) { } void h_carray_append(HCountedArray *array, void* item) { + HParsedToken **elements; + if (array->used >= array->capacity) { - HParsedToken **elements = h_arena_malloc(array->arena, (array->capacity *= 2) * sizeof(void*)); + /* _noinit here; we init below */ + elements = h_arena_malloc_noinit(array->arena, + (array->capacity *= 2) * sizeof(void*)); for (size_t i = 0; i < array->used; i++) elements[i] = array->elements[i]; for (size_t i = array->used; i < array->capacity; i++) elements[i] = 0; + /* + * XXX I hope we don't use this much, because h_arena_free() doesn't + * quite seem to be there and doing a lot of this would get pretty + * wasteful. + */ h_arena_free(array->arena, array->elements); array->elements = elements; } @@ -38,7 +49,8 @@ void h_carray_append(HCountedArray *array, void* item) { // HSlist HSlist* h_slist_new(HArena *arena) { - HSlist *ret = h_arena_malloc(arena, sizeof(HSlist)); + /* _noinit here; we set every element of ret below */ + HSlist *ret = h_arena_malloc_noinit(arena, sizeof(HSlist)); ret->head = NULL; ret->arena = arena; return ret; @@ -53,8 +65,12 @@ HSlist* h_slist_copy(HSlist *slist) { tail = ret->head; head = head->next; while (head != NULL) { - // append head item to tail in a new node - HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + /* + * append head item to tail in a new node + * + * use _noinit; we set every element of node after we allocate + */ + HSlistNode *node = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode)); node->elem = head->elem; node->next = NULL; tail = tail->next = node; @@ -85,10 +101,11 @@ void* h_slist_pop(HSlist *slist) { } void h_slist_push(HSlist *slist, void* item) { - HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + /* use _noinit; we set every element of node */ + HSlistNode *hnode = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode)); hnode->elem = item; hnode->next = slist->head; - // write memory barrier here. + /* write memory barrier here. */ slist->head = hnode; } @@ -132,30 +149,34 @@ void h_slist_free(HSlist *slist) { } HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) { - HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable)); + /* _noinit because all fields are set below */ + HHashTable *ht = h_arena_malloc_noinit(arena, sizeof(HHashTable)); ht->hashFunc = hashFunc; ht->equalFunc = equalFunc; ht->capacity = 64; // to start; should be tuned later... ht->used = 0; ht->arena = arena; - ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity); + /* _noinit because all fields of all entries are set in the loop */ + ht->contents = h_arena_malloc_noinit(arena, + sizeof(HHashTableEntry) * ht->capacity); for (size_t i = 0; i < ht->capacity; i++) { ht->contents[i].key = NULL; ht->contents[i].value = NULL; ht->contents[i].next = NULL; ht->contents[i].hashval = 0; } - //memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity); + return ht; } -void* h_hashtable_get(const HHashTable* ht, const void* key) { - HHashValue hashval = ht->hashFunc(key); +void * h_hashtable_get_precomp(const HHashTable *ht, const void *key, + HHashValue hashval) { + HHashTableEntry *hte = NULL; + #ifdef CONSISTENCY_CHECK assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 #endif - HHashTableEntry *hte = NULL; for (hte = &ht->contents[hashval & (ht->capacity - 1)]; hte != NULL; hte = hte->next) { @@ -169,35 +190,63 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) { return hte->value; } } + return NULL; } +void * h_hashtable_get(const HHashTable *ht, const void *key) { + HHashValue hashval = ht->hashFunc(key); + + return h_hashtable_get_precomp(ht, key, hashval); +} + void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry); void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) { + HHashTableEntry *old_contents, *new_contents; bool do_resize = false; size_t old_capacity = ht->capacity; while (n * 1.3 > ht->capacity) { ht->capacity *= 2; do_resize = true; } - if (!do_resize) - return; - HHashTableEntry *old_contents = ht->contents; - HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity); - ht->contents = new_contents; - ht->used = 0; - memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity); - for (size_t i = 0; i < old_capacity; ++i) - for (HHashTableEntry *entry = &old_contents[i]; - entry; - entry = entry->next) - if (entry->key) - h_hashtable_put_raw(ht, entry); - //h_arena_free(ht->arena, old_contents); + + if (do_resize) { + old_contents = ht->contents; + /* _noinit because we set the whole thing below */ + new_contents = h_arena_malloc_noinit(ht->arena, + sizeof(HHashTableEntry) * ht->capacity); + ht->contents = new_contents; + ht->used = 0; + memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity); + for (size_t i = 0; i < old_capacity; ++i) { + for (HHashTableEntry *entry = &old_contents[i]; + entry; + entry = entry->next) { + if (entry->key) { + h_hashtable_put_raw(ht, entry); + } + } + } + /* h_arena_free(ht->arena, old_contents); */ + } +} + +void h_hashtable_put_precomp(HHashTable *ht, const void *key, void *value, + HHashValue hashval) { + HHashTableEntry entry = { + .key = key, + .value = value, + .hashval = hashval + }; + + /* Rebalance if necessary */ + h_hashtable_ensure_capacity(ht, ht->used + 1); + /* Insert it */ + h_hashtable_put_raw(ht, &entry); } -void h_hashtable_put(HHashTable* ht, const void* key, void* value) { +void h_hashtable_put(HHashTable *ht, const void *key, void *value) { // # Start with a rebalancing h_hashtable_ensure_capacity(ht, ht->used + 1); @@ -227,7 +276,7 @@ void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) { } // Add a new link... assert (hte->next == NULL); - hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry)); + hte->next = h_arena_malloc_noinit(ht->arena, sizeof(HHashTableEntry)); hte = hte->next; hte->next = NULL; ht->used++; @@ -388,11 +437,26 @@ HHashValue h_hash_ptr(const void *p) { } uint32_t h_djbhash(const uint8_t *buf, size_t len) { - uint32_t hash = 5381; + uint32_t h = 5381; + + while (len >= 16) { + h = h * 33 + buf[0]; h = h * 33 + buf[1]; + h = h * 33 + buf[2]; h = h * 33 + buf[3]; + h = h * 33 + buf[4]; h = h * 33 + buf[5]; + h = h * 33 + buf[6]; h = h * 33 + buf[7]; + h = h * 33 + buf[8]; h = h * 33 + buf[9]; + h = h * 33 + buf[10]; h = h * 33 + buf[11]; + h = h * 33 + buf[12]; h = h * 33 + buf[13]; + h = h * 33 + buf[14]; h = h * 33 + buf[15]; + len -= 16; + buf += 16; + } + while (len--) { - hash = hash * 33 + *buf++; + h = h * 33 + *buf++; } - return hash; + + return h; } void h_symbol_put(HParseState *state, const char* key, void *value) { diff --git a/src/glue.c b/src/glue.c index 37962e849283951972ed60094345bec62b57434f..da2f3af329232fc16c690441e679deefe7ed76e5 100644 --- a/src/glue.c +++ b/src/glue.c @@ -69,6 +69,7 @@ HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) { res->seq = seq; res->index = p->ast->index; res->bit_offset = p->ast->bit_offset; + res->bit_length = p->bit_length; return res; } diff --git a/src/internal.h b/src/internal.h index 324fcbafc5ef7601fac70ceaea04894b8d46010d..07420681275a989925a08f6c596e3bc4a59202c1 100644 --- a/src/internal.h +++ b/src/internal.h @@ -367,16 +367,21 @@ HSlist* h_slist_remove_all(HSlist *slist, const void* item); void h_slist_free(HSlist *slist); static inline bool h_slist_empty(const HSlist *sl) { return (sl->head == NULL); } -HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc); -void* h_hashtable_get(const HHashTable* ht, const void* key); -void h_hashtable_put(HHashTable* ht, const void* key, void* value); -void h_hashtable_update(HHashTable* dst, const HHashTable *src); -void h_hashtable_merge(void *(*combine)(void *v1, const void *v2), +HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, + HHashFunc hashFunc); +void * h_hashtable_get_precomp(const HHashTable *ht, const void *key, + HHashValue hashval); +void * h_hashtable_get(const HHashTable *ht, const void *key); +void h_hashtable_put_precomp(HHashTable *ht, const void *key, + void *value, HHashValue hashval); +void h_hashtable_put(HHashTable *ht, const void *key, void *value); +void h_hashtable_update(HHashTable *dst, const HHashTable *src); +void h_hashtable_merge(void *(*combine)(void *v1, const void *v2), HHashTable *dst, const HHashTable *src); -int h_hashtable_present(const HHashTable* ht, const void* key); -void h_hashtable_del(HHashTable* ht, const void* key); -void h_hashtable_free(HHashTable* ht); -static inline bool h_hashtable_empty(const HHashTable* ht) { return (ht->used == 0); } +int h_hashtable_present(const HHashTable *ht, const void *key); +void h_hashtable_del(HHashTable *ht, const void *key); +void h_hashtable_free(HHashTable *ht); +static inline bool h_hashtable_empty(const HHashTable *ht) { return (ht->used == 0); } typedef HHashTable HHashSet; #define h_hashset_new(a,eq,hash) h_hashtable_new(a,eq,hash) diff --git a/src/parsers/bits.c b/src/parsers/bits.c index 288e3e95a45a7709e942d9987cda37c7e55819b5..2b977a27401610a071f0bac42b7404f828351eed 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -14,6 +14,9 @@ static HParseResult* parse_bits(void* env, HParseState *state) { result->sint = h_read_bits(&state->input_stream, env_->length, true); else result->uint = h_read_bits(&state->input_stream, env_->length, false); + result->index = 0; + result->bit_length = 0; + result->bit_offset = 0; return make_result(state->arena, result); } diff --git a/src/parsers/ch.c b/src/parsers/ch.c index 3da1091a4b71505aebdc6ed5b396084d12b1fde4..c878f9d08659a1cb6ae39f95b06c522c1a5a185f 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -8,6 +8,9 @@ static HParseResult* parse_ch(void* env, HParseState *state) { if (c == r) { HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_UINT; tok->uint = r; + tok->index = 0; + tok->bit_length = 0; + tok->bit_offset = 0; return make_result(state->arena, tok); } else { return NULL; diff --git a/src/parsers/charset.c b/src/parsers/charset.c index a4b8c89c7daca326cf77ee9bf5c8ae4660884c56..01657386f68e9788e69635790df6efb600de4c94 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -10,6 +10,9 @@ static HParseResult* parse_charset(void *env, HParseState *state) { if (charset_isset(cs, in)) { HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_UINT; tok->uint = in; + tok->index = 0; + tok->bit_length = 0; + tok->bit_offset = 0; return make_result(state->arena, tok); } else return NULL; diff --git a/src/parsers/end.c b/src/parsers/end.c index 85499d9348cd1df6503428a55d7a2ab878d1ef63..35e4186d430d8b48fe5cd1e41552403d6f95e562 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -4,6 +4,8 @@ static HParseResult* parse_end(void *env, HParseState *state) { if (state->input_stream.index == state->input_stream.length) { HParseResult *ret = a_new(HParseResult, 1); ret->ast = NULL; + ret->bit_length = 0; + ret->arena = state->arena; return ret; } else { return NULL; diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index bb6e8beb31cca3ff09a565171b4e554e07f2ffad..be614489cecfec6f30e4c2bfdd18c323be894446 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -5,6 +5,7 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) { HParseResult* res = a_new(HParseResult, 1); res->ast = NULL; res->arena = state->arena; + res->bit_length = 0; return res; } diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index c56802ac0885fc11429925f353a516d622b88a9d..7eda13d23eecfc771eb82d40db90c81387be146f 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -8,6 +8,7 @@ static HParseResult* parse_ignore(void* env, HParseState* state) { HParseResult *res = a_new(HParseResult, 1); res->ast = NULL; res->arena = state->arena; + res->bit_length = 0; return res; } diff --git a/src/parsers/many.c b/src/parsers/many.c index 77b9dd8be220d92eac36b18ddbcd2fe263945448..655dd1497667f44b3de6694a0109c6f58804167d 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -37,6 +37,9 @@ static HParseResult *parse_many(void* env, HParseState *state) { HParsedToken *res = a_new(HParsedToken, 1); res->token_type = TT_SEQUENCE; res->seq = seq; + res->index = 0; + res->bit_length = 0; + res->bit_offset = 0; return make_result(state->arena, res); err0: if (count >= env_->count) { @@ -85,6 +88,7 @@ static HParsedToken *reshape_many(const HParseResult *p, void *user) res->seq = seq; res->index = p->ast->index; res->bit_offset = p->ast->bit_offset; + res->bit_length = p->bit_length; return res; } diff --git a/src/parsers/optional.c b/src/parsers/optional.c index 726606643056b103f9481cb882dadc19417dd607..6a2789e2d0a86a8c4e6b141825fd3abc075af5d0 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -9,6 +9,9 @@ static HParseResult* parse_optional(void* env, HParseState* state) { state->input_stream = bak; HParsedToken *ast = a_new(HParsedToken, 1); ast->token_type = TT_NONE; + ast->index = 0; + ast->bit_length = 0; + ast->bit_offset = 0; return make_result(state->arena, ast); } diff --git a/src/parsers/parser_internal.h b/src/parsers/parser_internal.h index 9a3b6de3898b42336a84bfe565448c27315e29bb..ebc5f4b32992d77cae4e6b512d17e3a39729e14f 100644 --- a/src/parsers/parser_internal.h +++ b/src/parsers/parser_internal.h @@ -10,12 +10,13 @@ #include "../backends/regex.h" #include "../backends/contextfree.h" -#define a_new_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) +#define a_new_(arena, typ, count) ((typ*)h_arena_malloc_noinit((arena), sizeof(typ)*(count))) #define a_new(typ, count) a_new_(state->arena, typ, count) -// we can create a_new0 if necessary. It would allocate some memory and immediately zero it out. +#define a_new0_(arena, typ, count) ((typ*)h_arena_malloc((arena), sizeof(typ)*(count))) +#define a_new0(typ, count) a_new0_(state->arena, typ, count) static inline HParseResult* make_result(HArena *arena, HParsedToken *tok) { - HParseResult *ret = h_arena_malloc(arena, sizeof(HParseResult)); + HParseResult *ret = h_arena_malloc_noinit(arena, sizeof(HParseResult)); ret->ast = tok; ret->arena = arena; ret->bit_length = 0; // This way it gets overridden in h_do_parse diff --git a/src/parsers/permutation.c b/src/parsers/permutation.c index ec256c4af1f76292847102d0a07eca5cb19e5bae..c40f99da2d52eea8dcbce93882d4aed7a0307360 100644 --- a/src/parsers/permutation.c +++ b/src/parsers/permutation.c @@ -89,6 +89,9 @@ static HParseResult *parse_permutation(void *env, HParseState *state) HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_SEQUENCE; tok->seq = seq; + tok->index = 0; + tok->bit_length = 0; + tok->bit_offset = 0; return make_result(state->arena, tok); } else { // no parse diff --git a/src/parsers/seek.c b/src/parsers/seek.c index 027098b59424a2f78c9b54a0683e66111c02863f..d5bc02840f0145dc3089e82c11068359932b454c 100644 --- a/src/parsers/seek.c +++ b/src/parsers/seek.c @@ -49,6 +49,9 @@ static HParseResult *parse_seek(void *env, HParseState *state) HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_UINT; tok->uint = pos; + tok->index = 0; + tok->bit_length = 0; + tok->bit_offset = 0; return make_result(state->arena, tok); } @@ -57,6 +60,9 @@ static HParseResult *parse_tell(void *env, HParseState *state) HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_UINT; tok->uint = h_input_stream_pos(&state->input_stream); + tok->index = 0; + tok->bit_length = 0; + tok->bit_offset = 0; return make_result(state->arena, tok); } diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 786ba62e43683f32ca0cc244bc0695cdb04a76fd..2e7b4bc7286ec0ac32af012126e4289226297be0 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -22,6 +22,9 @@ static HParseResult* parse_sequence(void *env, HParseState *state) { } HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_SEQUENCE; tok->seq = seq; + tok->index = 0; + tok->bit_offset = 0; + tok->bit_length = 0; return make_result(state->arena, tok); } @@ -60,6 +63,7 @@ static HParsedToken *reshape_sequence(const HParseResult *p, void* user_data) { res->seq = seq; res->index = p->ast->index; res->bit_offset = p->ast->bit_offset; + res->bit_length = p->bit_length; return res; } diff --git a/src/parsers/token.c b/src/parsers/token.c index 19029726ad11a52fa0eadf62b67a7b15cd2e4744..b589d58c60e39bb895395a2eedb48984a0b8669f 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -16,6 +16,9 @@ static HParseResult* parse_token(void *env, HParseState *state) { } HParsedToken *tok = a_new(HParsedToken, 1); tok->token_type = TT_BYTES; tok->bytes.token = t->str; tok->bytes.len = t->len; + tok->index = 0; + tok->bit_offset = 0; + tok->bit_length = 0; return make_result(state->arena, tok); } diff --git a/src/t_parser.c b/src/t_parser.c index cb67901ed9227787d5580079112c410df000dd94..2d933ef1d3a025fd15fa8a1e247dc8ced3ba63ea 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -205,12 +205,12 @@ HParsedToken* upcase(const HParseResult *p, void* user_data) { switch(p->ast->token_type) { case TT_SEQUENCE: { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1); HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); ret->token_type = TT_SEQUENCE; for (size_t i=0; i<p->ast->seq->used; ++i) { if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { - HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); + HParsedToken *tmp = a_new0_(p->arena, HParsedToken, 1); tmp->token_type = TT_UINT; tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); h_carray_append(seq, tmp); @@ -223,7 +223,7 @@ HParsedToken* upcase(const HParseResult *p, void* user_data) { } case TT_UINT: { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1); ret->token_type = TT_UINT; ret->uint = toupper(p->ast->uint); return ret; @@ -674,7 +674,7 @@ static void test_endianness(gconstpointer backend) { } HParsedToken* act_get(const HParseResult *p, void* user_data) { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HParsedToken *ret = a_new0_(p->arena, HParsedToken, 1); ret->token_type = TT_UINT; ret->uint = 3 * (1 << p->ast->uint); return ret;