From 1ffd8d9276151cf0ad539d6769ec9a06b3be3689 Mon Sep 17 00:00:00 2001 From: "Meredith L. Patterson" <mlp@thesmartpolitenerd.com> Date: Sun, 2 Aug 2015 21:32:47 +0200 Subject: [PATCH] Fix undefined behaviour around system_allocator, issue #133. --- src/allocator.c | 12 ++++++++++++ src/backends/contextfree.h | 15 ++++++++++++++- src/backends/regex.c | 23 +++++++++++++++++++---- src/backends/regex.h | 4 ++++ src/backends/regex_debug.c | 4 ++-- src/benchmark.c | 2 +- src/bitwriter.c | 10 +++++++++- src/hammer.h | 1 + src/internal.h | 1 + src/pprint.c | 38 +++++++++++++++++++++++++++----------- src/registry.c | 15 +++++++++++---- src/test_suite.h | 4 +++- 12 files changed, 104 insertions(+), 25 deletions(-) diff --git a/src/allocator.c b/src/allocator.c index 80fa9217..258edfa5 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -49,6 +49,10 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) { block_size = 4096; struct HArena_ *ret = h_new(struct HArena_, 1); struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, sizeof(struct arena_link) + block_size); link->free = block_size; link->used = 0; @@ -76,6 +80,10 @@ void* h_arena_malloc(HArena *arena, size_t size) { arena->used += size; arena->wasted += sizeof(struct arena_link*); void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*)); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, size + sizeof(struct arena_link*)); *(struct arena_link**)link = arena->head->next; arena->head->next = (struct arena_link*)link; @@ -83,6 +91,10 @@ void* h_arena_malloc(HArena *arena, size_t size) { } else { // we just need to allocate an ordinary new block. struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size); + if (!link) { + // TODO: error-reporting -- let user know that arena link couldn't be allocated + return NULL; + } memset(link, 0, sizeof(struct arena_link) + arena->block_size); link->free = arena->block_size - size; link->used = size; diff --git a/src/backends/contextfree.h b/src/backends/contextfree.h index ab04ab52..9105636f 100644 --- a/src/backends/contextfree.h +++ b/src/backends/contextfree.h @@ -18,6 +18,7 @@ struct HCFStack_ { HCFChoice *last_completed; // Last completed choice. // XXX is last_completed still needed? HCFChoice *prealloc; // If not NULL, will be used for the outermost choice. + char error; }; #ifndef UNUSED @@ -33,6 +34,7 @@ static HCFStack* h_cfstack_new(HAllocator *mm__) { stack->cap = 4; stack->stack = h_new(HCFChoice*, stack->cap); stack->prealloc = NULL; + stack->error = 0; return stack; } @@ -55,8 +57,12 @@ static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFCh for (int j = 0;; j++) { if (cur_top->seq[i]->items[j] == NULL) { cur_top->seq[i]->items = mm__->realloc(mm__, cur_top->seq[i]->items, sizeof(HCFChoice*) * (j+2)); + if (!cur_top->seq[i]->items) { + stk__->error = 1; + } cur_top->seq[i]->items[j] = item; cur_top->seq[i]->items[j+1] = NULL; + assert(!stk__->error); return; } } @@ -111,8 +117,11 @@ static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) { assert(stk__->cap > 0); stk__->cap *= 2; stk__->stack = mm__->realloc(mm__, stk__->stack, stk__->cap * sizeof(HCFChoice*)); + if (!stk__->stack) { + stk__->error = 1; + } } - assert(stk__->cap >= 1); + assert(stk__->cap >= 1 && !stk__->error); stk__->stack[stk__->count++] = choice; } @@ -121,6 +130,10 @@ static inline void h_cfstack_begin_seq(HAllocator *mm__, HCFStack *stk__) { for (int i = 0;; i++) { if (top->seq[i] == NULL) { top->seq = mm__->realloc(mm__, top->seq, sizeof(HCFSequence*) * (i+2)); + if (!top->seq) { + stk__->error = 1; + return; + } HCFSequence *seq = top->seq[i] = h_new(HCFSequence, 1); top->seq[i+1] = NULL; seq->items = h_new(HCFChoice*, 1); diff --git a/src/backends/regex.c b/src/backends/regex.c index c4f6a2bf..cba50e65 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -187,7 +187,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) { if (ctx->stack_count + addl >= ctx->stack_capacity) { ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2)); - // TODO: check for realloc failure + if (!ctx->stack) { + ctx->error = 1; + } } } @@ -197,6 +199,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, HArena *arena = h_new_arena(mm__, 0); ctx.stack_count = 0; ctx.stack_capacity = 16; + ctx.error = 0; ctx.stack = h_new(HParsedToken*, ctx.stack_capacity); HParsedToken *tmp_res; @@ -205,6 +208,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace, switch (cur->opcode) { case SVM_PUSH: svm_stack_ensure_cap(mm__, &ctx, 1); + if (ctx.error) { + goto fail; + } tmp_res = a_new(HParsedToken, 1); tmp_res->token_type = TT_MARK; tmp_res->index = cur->input_pos; @@ -264,7 +270,9 @@ uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* e size_t array_size = (prog->action_count + 1) * 2; // action_count+1 is a // power of two prog->actions = prog->allocator->realloc(prog->allocator, prog->actions, array_size * sizeof(*prog->actions)); - // TODO: Handle the allocation failed case nicely. + if (!prog->actions) { + longjmp(prog->except, 1); + } } HSVMAction *action = &prog->actions[prog->action_count]; @@ -280,7 +288,9 @@ uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg) { size_t array_size = (prog->length + 1) * 2; // action_count+1 is a // power of two prog->insns = prog->allocator->realloc(prog->allocator, prog->insns, array_size * sizeof(*prog->insns)); - // TODO: Handle the allocation failed case nicely. + if (!prog->insns) { + longjmp(prog->except, 1); + } } prog->insns[prog->length].op = op; @@ -338,7 +348,12 @@ bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) { // Glue regex backend to rest of system bool h_compile_regex(HRVMProg *prog, const HParser *parser) { - return parser->vtable->compile_to_rvm(prog, parser->env); + if (setjmp(prog->except)) { + return false; + } + bool ret = parser->vtable->compile_to_rvm(prog, parser->env); + memset(prog->except, 0, sizeof(prog->except)); + return ret; } static void h_regex_free(HParser *parser) { diff --git a/src/backends/regex.h b/src/backends/regex.h index 4ea85a88..5a19b07e 100644 --- a/src/backends/regex.h +++ b/src/backends/regex.h @@ -7,6 +7,8 @@ #ifndef HAMMER_BACKEND_REGEX__H #define HAMMER_BACKEND_REGEX__H +#include <setjmp.h> + // each insn is an 8-bit opcode and a 16-bit parameter // [a] are actions; they add an instruction to the stackvm that is being output. // [m] are match ops; they can either succeed or fail, depending on the current character @@ -41,6 +43,7 @@ typedef struct HSVMContext_ { HParsedToken **stack; size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack. size_t stack_capacity; + char error; } HSVMContext; // These actions all assume that the items on the stack are not @@ -57,6 +60,7 @@ struct HRVMProg_ { size_t action_count; HRVMInsn *insns; HSVMAction *actions; + jmp_buf except; }; // Returns true IFF the provided parser could be compiled. diff --git a/src/backends/regex_debug.c b/src/backends/regex_debug.c index 5ca6ca42..8e54f5ca 100644 --- a/src/backends/regex_debug.c +++ b/src/backends/regex_debug.c @@ -59,7 +59,7 @@ void dump_rvm_prog(HRVMProg *prog) { symref = getsym(prog->actions[insn->arg].action); // TODO: somehow format the argument to action printf("%s\n", symref); - free(symref); + (&system_allocator)->free(&system_allocator, symref); break; case RVM_MATCH: { uint8_t low, high; @@ -95,7 +95,7 @@ void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) { symref = getsym(prog->actions[trace->arg].action); // TODO: somehow format the argument to action printf("%s\n", symref); - free(symref); + (&system_allocator)->free(&system_allocator, symref); break; default: printf("\n"); diff --git a/src/benchmark.c b/src/benchmark.c index ce416dad..8105df3f 100644 --- a/src/benchmark.c +++ b/src/benchmark.c @@ -121,7 +121,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest ret->results[backend].failed_testcases++; } h_parse_result_free(res); - free(res_unamb); + (&system_allocator)->free(&system_allocator, res_unamb); } if (tc_failed > 0) { diff --git a/src/bitwriter.c b/src/bitwriter.c index 451815bd..74e27344 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -13,10 +13,14 @@ HBitWriter *h_bit_writer_new(HAllocator* mm__) { HBitWriter *writer = h_new(HBitWriter, 1); memset(writer, 0, sizeof(*writer)); writer->buf = mm__->alloc(mm__, writer->capacity = 8); + if (!writer) { + return NULL; + } memset(writer->buf, 0, writer->capacity); writer->mm__ = mm__; writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN; - + writer->error = 0; + return writer; } @@ -37,6 +41,10 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) { size_t old_capacity = w->capacity; while (w->index + nbytes >= w->capacity) { w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2); + if (!w->buf) { + w->error = 1; + return; + } } if (old_capacity != w->capacity) diff --git a/src/hammer.h b/src/hammer.h index ce4eeca5..50b3de5c 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -48,6 +48,7 @@ typedef enum HParserBackend_ { typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; + TT_INVALID = 0, TT_NONE = 1, TT_BYTES = 2, TT_SINT = 4, diff --git a/src/internal.h b/src/internal.h index 0c4d4dc2..092c158c 100644 --- a/src/internal.h +++ b/src/internal.h @@ -282,6 +282,7 @@ struct HBitWriter_ { // of used bits in the current byte. i.e., 0 always // means that 8 bits are available for use. char flags; + char error; }; // }}} diff --git a/src/pprint.c b/src/pprint.c index a91d9d84..b2290dda 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -85,20 +85,33 @@ struct result_buf { size_t capacity; }; -static inline void ensure_capacity(struct result_buf *buf, int amt) { - while (buf->len + amt >= buf->capacity) - buf->output = realloc(buf->output, buf->capacity *= 2); +static inline bool ensure_capacity(struct result_buf *buf, int amt) { + while (buf->len + amt >= buf->capacity) { + buf->output = (&system_allocator)->realloc(&system_allocator, buf->output, buf->capacity *= 2); + if (!buf->output) { + return false; + } + } + return true; } -static inline void append_buf(struct result_buf *buf, const char* input, int len) { - ensure_capacity(buf, len); - memcpy(buf->output + buf->len, input, len); - buf->len += len; +static inline bool append_buf(struct result_buf *buf, const char* input, int len) { + if (ensure_capacity(buf, len)) { + memcpy(buf->output + buf->len, input, len); + buf->len += len; + return true; + } else { + return false; + } } -static inline void append_buf_c(struct result_buf *buf, char v) { - ensure_capacity(buf, 1); - buf->output[buf->len++] = v; +static inline bool append_buf_c(struct result_buf *buf, char v) { + if (ensure_capacity(buf, 1)) { + buf->output[buf->len++] = v; + return true; + } else { + return false; + } } static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { @@ -161,10 +174,13 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { char* h_write_result_unamb(const HParsedToken* tok) { struct result_buf buf = { - .output = malloc(16), + .output = (&system_allocator)->alloc(&system_allocator, 16), .len = 0, .capacity = 16 }; + if (!buf.output) { + return NULL; + } unamb_sub(tok, &buf); append_buf_c(&buf, 0); return buf.output; diff --git a/src/registry.c b/src/registry.c index 60aa8863..2ebac1a9 100644 --- a/src/registry.c +++ b/src/registry.c @@ -46,24 +46,31 @@ static int compare_entries(const void* v1, const void* v2) { } HTokenType h_allocate_token_type(const char* name) { - Entry* new_entry = malloc(sizeof(*new_entry)); + Entry* new_entry = (&system_allocator)->alloc(&system_allocator, sizeof(*new_entry)); + if (!new_entry) { + return TT_INVALID; + } new_entry->name = name; new_entry->value = 0; Entry* probe = *(Entry**)tsearch(new_entry, &tt_registry, compare_entries); if (probe->value != 0) { // Token type already exists... // TODO: treat this as a bug? - free(new_entry); + (&system_allocator)->free(&system_allocator, new_entry); return probe->value; } else { // new value probe->name = strdup(probe->name); // drop ownership of name probe->value = tt_next++; if ((probe->value - TT_START) >= tt_by_id_sz) { - if (tt_by_id_sz == 0) + if (tt_by_id_sz == 0) { tt_by_id = malloc(sizeof(*tt_by_id) * ((tt_by_id_sz = (tt_next - TT_START) * 16))); - else + } else { tt_by_id = realloc(tt_by_id, sizeof(*tt_by_id) * ((tt_by_id_sz *= 2))); + } + if (!tt_by_id) { + return TT_INVALID; + } } assert(probe->value - TT_START < tt_by_id_sz); tt_by_id[probe->value - TT_START] = probe; diff --git a/src/test_suite.h b/src/test_suite.h index 9a58a20f..82fe4955 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -21,6 +21,8 @@ #include <stdlib.h> #include <inttypes.h> +#include "internal.h" + // Equivalent to g_assert_*, but not using g_assert... #define g_check_inttype(fmt, typ, n1, op, n2) do { \ typ _n1 = (n1); \ @@ -132,7 +134,7 @@ } else { \ char* cres = h_write_result_unamb(res->ast); \ g_check_string(cres, ==, result); \ - free(cres); \ + (&system_allocator)->free(&system_allocator, cres); \ HArenaStats stats; \ h_allocator_stats(res->arena, &stats); \ g_test_message("Parse used %zd bytes, wasted %zd bytes. " \ -- GitLab