From 764d0d707152f620f353b5f43bd115e27e816ba4 Mon Sep 17 00:00:00 2001
From: Dan Hirsch <thequux@thequux.com>
Date: Thu, 17 May 2012 15:51:19 +0200
Subject: [PATCH] Cleaned up some memory leaks, got rid of gsequence, improved
 test macro to free memory that it allocated

---
 common.mk            |  2 +-
 src/Makefile         |  5 +++-
 src/allocator.c      |  8 +++---
 src/datastructures.c | 32 ++++++++++++++++++++++
 src/hammer.c         | 63 ++++++++++++++++++++++----------------------
 src/hammer.h         | 11 +++++++-
 src/internal.h       | 10 +++++++
 src/pprint.c         | 22 +++++-----------
 src/test_suite.h     |  3 +++
 9 files changed, 102 insertions(+), 54 deletions(-)
 create mode 100644 src/datastructures.c

diff --git a/common.mk b/common.mk
index de00ecbf..5a1fa501 100644
--- a/common.mk
+++ b/common.mk
@@ -1,4 +1,4 @@
-CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter
+CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes
 LDFLAGS := $(shell pkg-config --libs glib-2.0)
 CC := gcc
 # Set V=1 for verbose mode...
diff --git a/src/Makefile b/src/Makefile
index 0e71c189..d362118c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -2,6 +2,9 @@
 OUTPUTS := bitreader.o \
 	   hammer.o \
 	   libhammer.a \
+	   pprint.o \
+	   allocator.o \
+	   datastructures.o \
 	   test_suite
 
 TOPLEVEL := ../
@@ -14,7 +17,7 @@ all: libhammer.a test_suite
 test_suite: test_suite.o libhammer.a
 	$(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS)
 
-libhammer.a: bitreader.o hammer.o pprint.o allocator.o
+libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o
 
 bitreader.o: test_suite.h
 hammer.o: hammer.h
diff --git a/src/allocator.c b/src/allocator.c
index 2d446f1a..ba35fd34 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -60,10 +60,10 @@ void* arena_malloc(arena_t arena, size_t size) {
   if (size <= arena->head->free) {
     // fast path..
     void* ret = arena->head->rest + arena->head->used;
-    arena->used += size;
+    arena->used += size + 1;
     arena->wasted -= size;
-    arena->head->used += size;
-    arena->head->free -= size;
+    arena->head->used += size + 1;
+    arena->head->free -= size + 1;
     return ret;
   } else if (size > arena->block_size) {
     // We need a new, dedicated block for it, because it won't fit in a standard sized one.
@@ -73,7 +73,7 @@ void* arena_malloc(arena_t arena, size_t size) {
     void* link = g_malloc(size + sizeof(struct arena_link*));
     *(struct arena_link**)link = arena->head->next;
     arena->head->next = (struct arena_link*)link;
-    return (void*)((uint8_t*)link + sizeof(struct arena_link*));
+    return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
   } else {
     // we just need to allocate an ordinary new block.
     struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size);
diff --git a/src/datastructures.c b/src/datastructures.c
new file mode 100644
index 00000000..7954c434
--- /dev/null
+++ b/src/datastructures.c
@@ -0,0 +1,32 @@
+#include "internal.h"
+#include "hammer.h"
+#include "allocator.h"
+#include <assert.h>
+#include <malloc.h>
+// {{{ counted arrays
+
+
+counted_array_t *carray_new_sized(arena_t arena, size_t size) {
+  counted_array_t *ret = arena_malloc(arena, sizeof(counted_array_t));
+  assert(size > 0);
+  ret->used = 0;
+  ret->capacity = size;
+  ret->arena = arena;
+  ret->elements = arena_malloc(arena, sizeof(void*) * size);
+  return ret;
+}
+counted_array_t *carray_new(arena_t arena) {
+  return carray_new_sized(arena, 4);
+}
+
+void carray_append(counted_array_t *array, void* item) {
+  if (array->used >= array->capacity) {
+    void **elements = arena_malloc(array->arena, (array->capacity *= 2) * sizeof(counted_array_t*));
+    for (size_t i = 0; i < array->used; i++)
+      elements[i] = array->elements[i];
+    for (size_t i = array->used; i < array->capacity; i++)
+      elements[i] = 0;
+    array->elements = elements;
+  }
+  array->elements[array->used++] = item;
+}
diff --git a/src/hammer.c b/src/hammer.c
index ddafaabe..0aed6a88 100644
--- a/src/hammer.c
+++ b/src/hammer.c
@@ -37,7 +37,9 @@ guint djbhash(const uint8_t *buf, size_t len) {
 
 parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) {
   // TODO(thequux): add caching here.
-  parser_cache_key_t *key = a_new(parser_cache_key_t, 1);
+  parser_cache_key_t *key;
+  key = a_new(parser_cache_key_t, 1);
+  memset(key, 0, sizeof(*key));
   key->input_pos = state->input_stream;
   key->parser = parser;
   
@@ -232,7 +234,7 @@ typedef struct {
 
 static parse_result_t* parse_sequence(void *env, parse_state_t *state) {
   sequence_t *s = (sequence_t*)env;
-  GSequence *seq = g_sequence_new(NULL);
+  counted_array_t *seq = carray_new_sized(state->arena, (s->len > 0) ? s->len : 4);
   for (size_t i=0; i<s->len; ++i) {
     parse_result_t *tmp = do_parse(s->p_array[i], state);
     // if the interim parse fails, the whole thing fails
@@ -240,7 +242,7 @@ static parse_result_t* parse_sequence(void *env, parse_state_t *state) {
       return NULL;
     } else {
       if (tmp->ast)
-	g_sequence_append(seq, (void*)tmp->ast);
+	carray_append(seq, (void*)tmp->ast);
     }
   }
   parsed_token_t *tok = a_new(parsed_token_t, 1);
@@ -320,36 +322,34 @@ typedef struct {
   const parser_t *p2;
 } two_parsers_t;
 
-void accumulate_size(gpointer pr, gpointer acc) {
-  size_t tmp = GPOINTER_TO_SIZE(acc);
+size_t accumulate_size(parse_result_t *pr) {
   if (NULL != ((parse_result_t*)pr)->ast) {
-    switch(((parse_result_t*)pr)->ast->token_type) {
+    switch (pr->ast->token_type) {
     case TT_BYTES:
-      tmp += ((parse_result_t*)pr)->ast->bytes.len;
-      acc = GSIZE_TO_POINTER(tmp);
-      break;
+      return pr->ast->bytes.len;
     case TT_SINT:
     case TT_UINT:
-      tmp += 8;
-      acc = GSIZE_TO_POINTER(tmp);
-      break;
-    case TT_SEQUENCE:
-      g_sequence_foreach(((parse_result_t*)pr)->ast->seq, accumulate_size, acc);
-      break;
+      return sizeof(pr->ast->uint);
+    case TT_SEQUENCE: {
+      counted_array_t *arr = pr->ast->seq;
+      size_t ret = 0;
+      for (size_t i = 0; i < arr->used; i++)
+	ret += accumulate_size(arr->elements[i]);
+      return ret;
+    }
     default:
-      break;
+      return 0;
     }
   } // no else, if the AST is null then acc doesn't change
+  return 0;
 }
 
 size_t token_length(parse_result_t *pr) {
-  size_t ret = 0;
   if (NULL == pr) {
-    return ret;
+    return 0;
   } else {
-    accumulate_size(pr, GSIZE_TO_POINTER(ret));
+    return accumulate_size(pr);
   }
-  return ret;
 }
 
 static parse_result_t* parse_butnot(void *env, parse_state_t *state) {
@@ -464,9 +464,10 @@ typedef struct {
   size_t count;
   bool min_p;
 } repeat_t;
+
 static parse_result_t *parse_many(void* env, parse_state_t *state) {
   repeat_t *env_ = (repeat_t*) env;
-  GSequence *seq = g_sequence_new(NULL);
+  counted_array_t *seq = carray_new_sized(state->arena, (env_->count > 0 ? env_->count : 4));
   size_t count = 0;
   input_stream_t bak;
   while (env_->min_p || env_->count > count) {
@@ -480,7 +481,7 @@ static parse_result_t *parse_many(void* env, parse_state_t *state) {
     if (!elem)
       goto err0;
     if (elem->ast)
-      g_sequence_append(seq, (gpointer)elem->ast);
+      carray_append(seq, (void*)elem->ast);
     count++;
   }
   if (count < env_->count)
@@ -497,7 +498,6 @@ static parse_result_t *parse_many(void* env, parse_state_t *state) {
     goto succ;
   }
  err:
-  g_sequence_free(seq);
   state->input_stream = bak;
   return NULL;
 }
@@ -653,10 +653,10 @@ parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t lengt
   parse_state->input_stream.length = length;
   parse_state->arena = arena;
   parse_result_t *res = do_parse(parser, parse_state);
-  // tear down the parse state. For now, leak like a sieve.
-  // BUG: Leaks like a sieve.
-  // TODO(thequux): Pull in the arena allocator.
+  // tear down the parse state
   g_hash_table_destroy(parse_state->cache);
+  if (!res)
+    delete_arena(parse_state->arena);
 
   return res;
 }
@@ -851,11 +851,12 @@ static void test_xor(void) {
 
 static void test_many(void) {
   const parser_t *many_ = many(choice(ch('a'), ch('b'), NULL));
-
-  g_check_parse_ok(many_, "adef", 4, "(s0x61)");
-  g_check_parse_ok(many_, "bdef", 4, "(s0x62)");
-  g_check_parse_ok(many_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)");
-  g_check_parse_ok(many_, "daabbabadef", 11, "()");
+  for (int i = 0; i < 100; i++) {
+    g_check_parse_ok(many_, "adef", 4, "(s0x61)");
+    g_check_parse_ok(many_, "bdef", 4, "(s0x62)");
+    g_check_parse_ok(many_, "aabbabadef", 10, "(s0x61 s0x61 s0x62 s0x62 s0x61 s0x62 s0x61)");
+    g_check_parse_ok(many_, "daabbabadef", 11, "()");
+  }
 }
 
 static void test_many1(void) {
diff --git a/src/hammer.h b/src/hammer.h
index 054e3f82..adbfcc72 100644
--- a/src/hammer.h
+++ b/src/hammer.h
@@ -61,6 +61,13 @@ typedef enum token_type {
   TT_MAX
 } token_type_t;
 
+typedef struct counted_array {
+  size_t capacity;
+  size_t used;
+  arena_t arena;
+  void **elements;
+} counted_array_t;
+
 typedef struct parsed_token {
   token_type_t token_type;
   union {
@@ -72,10 +79,12 @@ typedef struct parsed_token {
     uint64_t uint;
     double dbl;
     float flt;
-    GSequence *seq; // a sequence of parsed_token_t's
+    counted_array_t *seq; // a sequence of parsed_token_t's
   };
 } parsed_token_t;
 
+
+
 /* If a parse fails, the parse result will be NULL.
  * If a parse is successful but there's nothing there (i.e., if end_p succeeds) then there's a parse result but its ast is NULL.
  */
diff --git a/src/internal.h b/src/internal.h
index 1a65a071..6f500791 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -32,6 +32,7 @@
 #define false 0
 #define true 1
 
+
 typedef struct parser_cache_key {
   input_stream_t input_pos;
   const parser_t *parser;
@@ -81,4 +82,13 @@ guint djbhash(const uint8_t *buf, size_t len);
 char* write_result_unamb(const parsed_token_t* tok);
 void pprint(const parsed_token_t* tok, int indent, int delta);
 
+counted_array_t *carray_new_sized(arena_t arena, size_t size);
+counted_array_t *carray_new(arena_t arena);
+void carray_append(counted_array_t *array, void* item);
+
+#if 0
+#include <malloc.h>
+#define arena_malloc(a, s) malloc(s)
+#endif
+
 #endif // #ifndef HAMMER_INTERNAL__H
diff --git a/src/pprint.c b/src/pprint.c
index 106e7844..c7f58c91 100644
--- a/src/pprint.c
+++ b/src/pprint.c
@@ -52,13 +52,9 @@ void pprint(const parsed_token_t* tok, int indent, int delta) {
      printf("%*ss %#lx\n", indent, "", tok->uint);
     break;
   case TT_SEQUENCE: {
-    GSequenceIter *it;
     printf("%*s[\n", indent, "");
-    for (it = g_sequence_get_begin_iter(tok->seq);
-	 !g_sequence_iter_is_end(it);
-	 it = g_sequence_iter_next(it)) {
-      parsed_token_t* subtok = g_sequence_get(it);
-      pprint(subtok, indent + delta, delta);
+    for (size_t i = 0; i < tok->seq->used; i++) {
+      pprint(tok->seq->elements[i], indent + delta, delta);
     }
     printf("%*s]\n", indent, "");
   } // TODO: implement this
@@ -123,18 +119,11 @@ static void unamb_sub(const parsed_token_t* tok, struct result_buf *buf) {
     append_buf(buf, "ERR", 3);
     break;
   case TT_SEQUENCE: {
-    GSequenceIter *it;
-    int at_begin = 1;
     append_buf_c(buf, '(');
-    for (it = g_sequence_get_begin_iter(tok->seq);
-	 !g_sequence_iter_is_end(it);
-	 it = g_sequence_iter_next(it)) {
-      parsed_token_t* subtok = g_sequence_get(it);
-      if (at_begin)
-	at_begin = 0;
-      else
+    for (size_t i = 0; i < tok->seq->used; i++) {
+      if (i > 0)
 	append_buf_c(buf, ' ');
-      unamb_sub(subtok, buf);
+      unamb_sub(tok->seq->elements[i], buf);
     }
     append_buf_c(buf, ')');
   }
@@ -153,6 +142,7 @@ char* write_result_unamb(const parsed_token_t* tok) {
     .capacity = 16
   };
   unamb_sub(tok, &buf);
+  append_buf_c(&buf, 0);
   return buf.output;
 }
   
diff --git a/src/test_suite.h b/src/test_suite.h
index f313e5d3..d27d283c 100644
--- a/src/test_suite.h
+++ b/src/test_suite.h
@@ -17,6 +17,7 @@
 
 #ifndef HAMMER_TEST_SUITE__H
 #define HAMMER_TEST_SUITE__H
+#include <malloc.h>
 
 // Equivalent to g_assert_*, but not using g_assert...
 #define g_check_inttype(fmt, typ, n1, op, n2) do {				\
@@ -76,12 +77,14 @@
     } else {								\
       char* cres = write_result_unamb(res->ast);			\
       g_check_string(cres, ==, result);					\
+      g_free(cres);							\
       arena_stats_t stats;						\
       allocator_stats(res->arena, &stats);				\
       g_test_message("Parse used %zd bytes, wasted %zd bytes. "		\
                      "Inefficiency: %5f%%",				\
 		     stats.used, stats.wasted,				\
 		     stats.wasted * 100. / (stats.used+stats.wasted));	\
+      delete_arena(res->arena);						\
     }									\
   } while(0)
 
-- 
GitLab