From 1ffd8d9276151cf0ad539d6769ec9a06b3be3689 Mon Sep 17 00:00:00 2001
From: "Meredith L. Patterson" <mlp@thesmartpolitenerd.com>
Date: Sun, 2 Aug 2015 21:32:47 +0200
Subject: [PATCH] Fix undefined behaviour around system_allocator, issue #133.

---
 src/allocator.c            | 12 ++++++++++++
 src/backends/contextfree.h | 15 ++++++++++++++-
 src/backends/regex.c       | 23 +++++++++++++++++++----
 src/backends/regex.h       |  4 ++++
 src/backends/regex_debug.c |  4 ++--
 src/benchmark.c            |  2 +-
 src/bitwriter.c            | 10 +++++++++-
 src/hammer.h               |  1 +
 src/internal.h             |  1 +
 src/pprint.c               | 38 +++++++++++++++++++++++++++-----------
 src/registry.c             | 15 +++++++++++----
 src/test_suite.h           |  4 +++-
 12 files changed, 104 insertions(+), 25 deletions(-)

diff --git a/src/allocator.c b/src/allocator.c
index 80fa9217..258edfa5 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -49,6 +49,10 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
     block_size = 4096;
   struct HArena_ *ret = h_new(struct HArena_, 1);
   struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
+  if (!link) {
+    // TODO: error-reporting -- let user know that arena link couldn't be allocated
+    return NULL;
+  }
   memset(link, 0, sizeof(struct arena_link) + block_size);
   link->free = block_size;
   link->used = 0;
@@ -76,6 +80,10 @@ void* h_arena_malloc(HArena *arena, size_t size) {
     arena->used += size;
     arena->wasted += sizeof(struct arena_link*);
     void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
+    if (!link) {
+      // TODO: error-reporting -- let user know that arena link couldn't be allocated
+      return NULL;
+    }
     memset(link, 0, size + sizeof(struct arena_link*));
     *(struct arena_link**)link = arena->head->next;
     arena->head->next = (struct arena_link*)link;
@@ -83,6 +91,10 @@ void* h_arena_malloc(HArena *arena, size_t size) {
   } else {
     // we just need to allocate an ordinary new block.
     struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
+    if (!link) {
+      // TODO: error-reporting -- let user know that arena link couldn't be allocated
+      return NULL;
+    }
     memset(link, 0, sizeof(struct arena_link) + arena->block_size);
     link->free = arena->block_size - size;
     link->used = size;
diff --git a/src/backends/contextfree.h b/src/backends/contextfree.h
index ab04ab52..9105636f 100644
--- a/src/backends/contextfree.h
+++ b/src/backends/contextfree.h
@@ -18,6 +18,7 @@ struct HCFStack_ {
   HCFChoice *last_completed; // Last completed choice.
                              // XXX is last_completed still needed?
   HCFChoice *prealloc; // If not NULL, will be used for the outermost choice.
+  char error;
 };
 
 #ifndef UNUSED
@@ -33,6 +34,7 @@ static HCFStack* h_cfstack_new(HAllocator *mm__) {
   stack->cap = 4;
   stack->stack = h_new(HCFChoice*, stack->cap);
   stack->prealloc = NULL;
+  stack->error = 0;
   return stack;
 }
 
@@ -55,8 +57,12 @@ static inline void h_cfstack_add_to_seq(HAllocator *mm__, HCFStack *stk__, HCFCh
       for (int j = 0;; j++) {
 	if (cur_top->seq[i]->items[j] == NULL) {
 	  cur_top->seq[i]->items = mm__->realloc(mm__, cur_top->seq[i]->items, sizeof(HCFChoice*) * (j+2));
+	  if (!cur_top->seq[i]->items) {
+	    stk__->error = 1;
+	  }
 	  cur_top->seq[i]->items[j] = item;
 	  cur_top->seq[i]->items[j+1] = NULL;
+	  assert(!stk__->error);
 	  return;
 	}
       }
@@ -111,8 +117,11 @@ static inline void h_cfstack_begin_choice(HAllocator *mm__, HCFStack *stk__) {
     assert(stk__->cap > 0);
     stk__->cap *= 2;
     stk__->stack = mm__->realloc(mm__, stk__->stack, stk__->cap * sizeof(HCFChoice*));
+    if (!stk__->stack) {
+      stk__->error = 1;
+    }
   }
-  assert(stk__->cap >= 1);
+  assert(stk__->cap >= 1 && !stk__->error);
   stk__->stack[stk__->count++] = choice;
 }
 
@@ -121,6 +130,10 @@ static inline void h_cfstack_begin_seq(HAllocator *mm__, HCFStack *stk__) {
   for (int i = 0;; i++) {
     if (top->seq[i] == NULL) {
       top->seq = mm__->realloc(mm__, top->seq, sizeof(HCFSequence*) * (i+2));
+      if (!top->seq) {
+	stk__->error = 1;
+	return;
+      }
       HCFSequence *seq = top->seq[i] = h_new(HCFSequence, 1);
       top->seq[i+1] = NULL;
       seq->items = h_new(HCFChoice*, 1);
diff --git a/src/backends/regex.c b/src/backends/regex.c
index c4f6a2bf..cba50e65 100644
--- a/src/backends/regex.c
+++ b/src/backends/regex.c
@@ -187,7 +187,9 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
 void svm_stack_ensure_cap(HAllocator *mm__, HSVMContext *ctx, size_t addl) {
   if (ctx->stack_count + addl >= ctx->stack_capacity) {
     ctx->stack = mm__->realloc(mm__, ctx->stack, sizeof(*ctx->stack) * (ctx->stack_capacity *= 2));
-    // TODO: check for realloc failure
+    if (!ctx->stack) {
+      ctx->error = 1;
+    }
   }
 }
 
@@ -197,6 +199,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
   HArena *arena = h_new_arena(mm__, 0);
   ctx.stack_count = 0;
   ctx.stack_capacity = 16;
+  ctx.error = 0;
   ctx.stack = h_new(HParsedToken*, ctx.stack_capacity);
 
   HParsedToken *tmp_res;
@@ -205,6 +208,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
     switch (cur->opcode) {
     case SVM_PUSH:
       svm_stack_ensure_cap(mm__, &ctx, 1);
+      if (ctx.error) {
+	goto fail;
+      }
       tmp_res = a_new(HParsedToken, 1);
       tmp_res->token_type = TT_MARK;
       tmp_res->index = cur->input_pos;
@@ -264,7 +270,9 @@ uint16_t h_rvm_create_action(HRVMProg *prog, HSVMActionFunc action_func, void* e
     size_t array_size = (prog->action_count + 1) * 2; // action_count+1 is a
 					       // power of two
     prog->actions = prog->allocator->realloc(prog->allocator, prog->actions, array_size * sizeof(*prog->actions));
-    // TODO: Handle the allocation failed case nicely.
+    if (!prog->actions) {
+      longjmp(prog->except, 1);
+    }
   }
 
   HSVMAction *action = &prog->actions[prog->action_count];
@@ -280,7 +288,9 @@ uint16_t h_rvm_insert_insn(HRVMProg *prog, HRVMOp op, uint16_t arg) {
     size_t array_size = (prog->length + 1) * 2; // action_count+1 is a
 						// power of two
     prog->insns = prog->allocator->realloc(prog->allocator, prog->insns, array_size * sizeof(*prog->insns));
-    // TODO: Handle the allocation failed case nicely.
+    if (!prog->insns) {
+      longjmp(prog->except, 1);
+    }
   }
 
   prog->insns[prog->length].op = op;
@@ -338,7 +348,12 @@ bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
 // Glue regex backend to rest of system
 
 bool h_compile_regex(HRVMProg *prog, const HParser *parser) {
-  return parser->vtable->compile_to_rvm(prog, parser->env);
+  if (setjmp(prog->except)) {
+    return false;
+  }
+  bool ret = parser->vtable->compile_to_rvm(prog, parser->env);
+  memset(prog->except, 0, sizeof(prog->except));
+  return ret;
 }
 
 static void h_regex_free(HParser *parser) {
diff --git a/src/backends/regex.h b/src/backends/regex.h
index 4ea85a88..5a19b07e 100644
--- a/src/backends/regex.h
+++ b/src/backends/regex.h
@@ -7,6 +7,8 @@
 #ifndef HAMMER_BACKEND_REGEX__H
 #define HAMMER_BACKEND_REGEX__H
 
+#include <setjmp.h>
+
 // each insn is an 8-bit opcode and a 16-bit parameter
 // [a] are actions; they add an instruction to the stackvm that is being output.
 // [m] are match ops; they can either succeed or fail, depending on the current character
@@ -41,6 +43,7 @@ typedef struct HSVMContext_ {
   HParsedToken **stack;
   size_t stack_count; // number of items on the stack. Thus stack[stack_count] is the first unused item on the stack.
   size_t stack_capacity;
+  char error;
 } HSVMContext;
 
 // These actions all assume that the items on the stack are not
@@ -57,6 +60,7 @@ struct HRVMProg_ {
   size_t action_count;
   HRVMInsn *insns;
   HSVMAction *actions;
+  jmp_buf except;
 };
 
 // Returns true IFF the provided parser could be compiled.
diff --git a/src/backends/regex_debug.c b/src/backends/regex_debug.c
index 5ca6ca42..8e54f5ca 100644
--- a/src/backends/regex_debug.c
+++ b/src/backends/regex_debug.c
@@ -59,7 +59,7 @@ void dump_rvm_prog(HRVMProg *prog) {
       symref = getsym(prog->actions[insn->arg].action);
       // TODO: somehow format the argument to action
       printf("%s\n", symref);
-      free(symref);
+      (&system_allocator)->free(&system_allocator, symref);
       break;
     case RVM_MATCH: {
       uint8_t low, high;
@@ -95,7 +95,7 @@ void dump_svm_prog(HRVMProg *prog, HRVMTrace *trace) {
       symref = getsym(prog->actions[trace->arg].action);
       // TODO: somehow format the argument to action
       printf("%s\n", symref);
-      free(symref);
+      (&system_allocator)->free(&system_allocator, symref);
       break;
     default:
       printf("\n");
diff --git a/src/benchmark.c b/src/benchmark.c
index ce416dad..8105df3f 100644
--- a/src/benchmark.c
+++ b/src/benchmark.c
@@ -121,7 +121,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
 	ret->results[backend].failed_testcases++;
       }
       h_parse_result_free(res);
-      free(res_unamb);
+      (&system_allocator)->free(&system_allocator, res_unamb);
     }
 
     if (tc_failed > 0) {
diff --git a/src/bitwriter.c b/src/bitwriter.c
index 451815bd..74e27344 100644
--- a/src/bitwriter.c
+++ b/src/bitwriter.c
@@ -13,10 +13,14 @@ HBitWriter *h_bit_writer_new(HAllocator* mm__) {
   HBitWriter *writer = h_new(HBitWriter, 1);
   memset(writer, 0, sizeof(*writer));
   writer->buf = mm__->alloc(mm__, writer->capacity = 8);
+  if (!writer) {
+    return NULL;
+  }
   memset(writer->buf, 0, writer->capacity);
   writer->mm__ = mm__;
   writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
-
+  writer->error = 0;
+  
   return writer;
 }
 
@@ -37,6 +41,10 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) {
   size_t old_capacity = w->capacity;
   while (w->index + nbytes >= w->capacity) {
     w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2);
+    if (!w->buf) {
+      w->error = 1;
+      return;
+    }
   }
 
   if (old_capacity != w->capacity)
diff --git a/src/hammer.h b/src/hammer.h
index ce4eeca5..50b3de5c 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -48,6 +48,7 @@ typedef enum HParserBackend_ {
 
 typedef enum HTokenType_ {
   // Before you change the explicit values of these, think of the poor bindings ;_;
+  TT_INVALID = 0,
   TT_NONE = 1,
   TT_BYTES = 2,
   TT_SINT = 4,
diff --git a/src/internal.h b/src/internal.h
index 0c4d4dc2..092c158c 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -282,6 +282,7 @@ struct HBitWriter_ {
 		   // of used bits in the current byte. i.e., 0 always
 		   // means that 8 bits are available for use.
   char flags;
+  char error;
 };
 
 // }}}
diff --git a/src/pprint.c b/src/pprint.c
index a91d9d84..b2290dda 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -85,20 +85,33 @@ struct result_buf {
   size_t capacity;
 };
 
-static inline void ensure_capacity(struct result_buf *buf, int amt) {
-  while (buf->len + amt >= buf->capacity)
-    buf->output = realloc(buf->output, buf->capacity *= 2);
+static inline bool ensure_capacity(struct result_buf *buf, int amt) {
+  while (buf->len + amt >= buf->capacity) {
+    buf->output = (&system_allocator)->realloc(&system_allocator, buf->output, buf->capacity *= 2);
+    if (!buf->output) {
+      return false;
+    }
+  }
+  return true;
 }
 
-static inline void append_buf(struct result_buf *buf, const char* input, int len) {
-  ensure_capacity(buf, len);
-  memcpy(buf->output + buf->len, input, len);
-  buf->len += len;
+static inline bool append_buf(struct result_buf *buf, const char* input, int len) {
+  if (ensure_capacity(buf, len)) {
+    memcpy(buf->output + buf->len, input, len);
+    buf->len += len;
+    return true;
+  } else {
+    return false;
+  }
 }
 
-static inline void append_buf_c(struct result_buf *buf, char v) {
-  ensure_capacity(buf, 1);
-  buf->output[buf->len++] = v;
+static inline bool append_buf_c(struct result_buf *buf, char v) {
+  if (ensure_capacity(buf, 1)) {
+    buf->output[buf->len++] = v;
+    return true;
+  } else {
+    return false;
+  }
 }
 
 static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
@@ -161,10 +174,13 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) {
 
 char* h_write_result_unamb(const HParsedToken* tok) {
   struct result_buf buf = {
-    .output = malloc(16),
+    .output = (&system_allocator)->alloc(&system_allocator, 16),
     .len = 0,
     .capacity = 16
   };
+  if (!buf.output) {
+    return NULL;
+  }
   unamb_sub(tok, &buf);
   append_buf_c(&buf, 0);
   return buf.output;
diff --git a/src/registry.c b/src/registry.c
index 60aa8863..2ebac1a9 100644
--- a/src/registry.c
+++ b/src/registry.c
@@ -46,24 +46,31 @@ static int compare_entries(const void* v1, const void* v2) {
 }
 
 HTokenType h_allocate_token_type(const char* name) {
-  Entry* new_entry = malloc(sizeof(*new_entry));
+  Entry* new_entry = (&system_allocator)->alloc(&system_allocator, sizeof(*new_entry));
+  if (!new_entry) {
+    return TT_INVALID;
+  }
   new_entry->name = name;
   new_entry->value = 0;
   Entry* probe = *(Entry**)tsearch(new_entry, &tt_registry, compare_entries);
   if (probe->value != 0) {
     // Token type already exists...
     // TODO: treat this as a bug?
-    free(new_entry);
+    (&system_allocator)->free(&system_allocator, new_entry);
     return probe->value;
   } else {
     // new value
     probe->name = strdup(probe->name); // drop ownership of name
     probe->value = tt_next++;
     if ((probe->value - TT_START) >= tt_by_id_sz) {
-      if (tt_by_id_sz == 0)
+      if (tt_by_id_sz == 0) {
 	tt_by_id = malloc(sizeof(*tt_by_id) * ((tt_by_id_sz = (tt_next - TT_START) * 16)));
-      else
+      } else {
 	tt_by_id = realloc(tt_by_id, sizeof(*tt_by_id) * ((tt_by_id_sz *= 2)));
+      }
+      if (!tt_by_id) {
+	return TT_INVALID;
+      }
     }
     assert(probe->value - TT_START < tt_by_id_sz);
     tt_by_id[probe->value - TT_START] = probe;
diff --git a/src/test_suite.h b/src/test_suite.h
index 9a58a20f..82fe4955 100644
--- a/src/test_suite.h
+++ b/src/test_suite.h
@@ -21,6 +21,8 @@
 #include <stdlib.h>
 #include <inttypes.h>
 
+#include "internal.h"
+
 // Equivalent to g_assert_*, but not using g_assert...
 #define g_check_inttype(fmt, typ, n1, op, n2) do {				\
     typ _n1 = (n1);							\
@@ -132,7 +134,7 @@
     } else {								\
       char* cres = h_write_result_unamb(res->ast);			\
       g_check_string(cres, ==, result);					\
-      free(cres);							\
+      (&system_allocator)->free(&system_allocator, cres);		\
       HArenaStats stats;						\
       h_allocator_stats(res->arena, &stats);				\
       g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
-- 
GitLab