diff --git a/src/allocator.c b/src/allocator.c
index cc259e605c56573b506f39194793e804ab4bf8b6..2ff5cacaa0e05da47ac851ef1ff71239ed5cde3b 100644
--- a/src/allocator.c
+++ b/src/allocator.c
@@ -29,24 +29,35 @@ struct arena_link {
   // For efficiency, we should probably allocate the arena links in 
   // their own slice, and link to a block directly. That can be
   // implemented later, though, with no change in interface.
-  struct arena_link *next; // It is crucial that this be the first item; so that 
-                           // any arena link can be casted to struct arena_link**.
-
+  struct arena_link *next;
   size_t free;
   size_t used;
   uint8_t rest[];
-} ;
+};
 
 struct HArena_ {
   struct arena_link *head;
   struct HAllocator_ *mm__;
+  /* does mm__ zero blocks for us? */
+  bool malloc_zeros;
   size_t block_size;
   size_t used;
   size_t wasted;
+#ifdef DETAILED_ARENA_STATS
+  size_t mm_malloc_count, mm_malloc_bytes;
+  size_t memset_count, memset_bytes;
+  size_t arena_malloc_count, arena_malloc_bytes;
+  size_t arena_su_malloc_count, arena_su_malloc_bytes;
+  size_t arena_si_malloc_count, arena_si_malloc_bytes;
+  size_t arena_lu_malloc_count, arena_lu_malloc_bytes;
+  size_t arena_li_malloc_count, arena_li_malloc_bytes;
+#endif
 
   jmp_buf *except;
 };
 
+static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
+
 void* h_alloc(HAllocator* mm__, size_t size) {
   void *p = mm__->alloc(mm__, size);
   if(!p)
@@ -61,7 +72,6 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
   struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size);
   assert(ret != NULL);
   assert(link != NULL);
-  memset(link, 0, sizeof(struct arena_link) + block_size);
   link->free = block_size;
   link->used = 0;
   link->next = NULL;
@@ -69,6 +79,19 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
   ret->block_size = block_size;
   ret->used = 0;
   ret->mm__ = mm__;
+#ifdef DETAILED_ARENA_STATS
+  ret->mm_malloc_count = 2;
+  ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size;
+  ret->memset_count = 0;
+  ret->memset_bytes = 0;
+  ret->arena_malloc_count = ret->arena_malloc_bytes = 0;
+  ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0;
+  ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0;
+  ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0;
+  ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0;
+#endif
+  /* XXX provide a mechanism to indicate mm__ returns zeroed blocks */
+  ret->malloc_zeros = false;
   ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
   ret->except = NULL;
   return ret;
@@ -90,39 +113,120 @@ static void *alloc_block(HArena *arena, size_t size)
   return block;
 }
 
-void* h_arena_malloc(HArena *arena, size_t size) {
+void * h_arena_malloc_noinit(HArena *arena, size_t size) {
+  return h_arena_malloc_raw(arena, size, false);
+}
+
+void * h_arena_malloc(HArena *arena, size_t size) {
+  return h_arena_malloc_raw(arena, size, true);
+}
+
+static void * h_arena_malloc_raw(HArena *arena, size_t size,
+                                 bool need_zero) {
+  struct arena_link *link = NULL;
+  void *ret = NULL;
+
   if (size <= arena->head->free) {
-    // fast path..
-    void* ret = arena->head->rest + arena->head->used;
+    /* fast path.. */
+    ret = arena->head->rest + arena->head->used;
     arena->used += size;
     arena->wasted -= size;
     arena->head->used += size;
     arena->head->free -= size;
-    return ret;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_si_malloc_count);
+      arena->arena_si_malloc_bytes += size;
+    } else {
+      ++(arena->arena_su_malloc_count);
+      arena->arena_su_malloc_bytes += size;
+    }
+#endif
   } else if (size > arena->block_size) {
-    // We need a new, dedicated block for it, because it won't fit in a standard sized one.
-    // This involves some annoying casting...
-    arena->used += size;
-    arena->wasted += sizeof(struct arena_link*);
-    void* link = alloc_block(arena, size + sizeof(struct arena_link*));
+    /*
+     * We need a new, dedicated block for it, because it won't fit in a
+     * standard sized one.
+     *
+     * NOTE:
+     *
+     * We used to do a silly casting dance to treat blocks like this
+     * as special cases and make the used/free fields part of the allocated
+     * block, but the old code was not really proper portable C and depended
+     * on a bunch of implementation-specific behavior.  We could have done it
+     * better with a union in struct arena_link, but the memory savings is
+     * only 0.39% for a 64-bit machine, a 4096-byte block size and all
+     * large allocations *only just one byte* over the block size, so I
+     * question the utility of it.  We do still slip the large block in
+     * one position behind the list head so it doesn't cut off a partially
+     * filled list head.
+     *
+     * -- andrea
+     */
+    link = alloc_block(arena, size + sizeof(struct arena_link));
     assert(link != NULL);
-    memset(link, 0, size + sizeof(struct arena_link*));
-    *(struct arena_link**)link = arena->head->next;
-    arena->head->next = (struct arena_link*)link;
-    return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
+    arena->used += size;
+    arena->wasted += sizeof(struct arena_link);
+    link->used = size;
+    link->free = 0;
+    link->next = arena->head->next;
+    arena->head->next = link;
+    ret = link->rest;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_li_malloc_count);
+      arena->arena_li_malloc_bytes += size;
+    } else {
+      ++(arena->arena_lu_malloc_count);
+      arena->arena_lu_malloc_bytes += size;
+    }
+#endif
   } else {
-    // we just need to allocate an ordinary new block.
-    struct arena_link *link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
+    /* we just need to allocate an ordinary new block. */
+    link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
     assert(link != NULL);
-    memset(link, 0, sizeof(struct arena_link) + arena->block_size);
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->mm_malloc_count);
+    arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size;
+#endif
     link->free = arena->block_size - size;
     link->used = size;
     link->next = arena->head;
     arena->head = link;
     arena->used += size;
     arena->wasted += sizeof(struct arena_link) + arena->block_size - size;
-    return link->rest;
+    ret = link->rest;
+
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->arena_malloc_count);
+    arena->arena_malloc_bytes += size;
+    if (need_zero) {
+      ++(arena->arena_si_malloc_count);
+      arena->arena_si_malloc_bytes += size;
+    } else {
+      ++(arena->arena_su_malloc_count);
+      arena->arena_su_malloc_bytes += size;
+    }
+#endif
   }
+
+  /*
+   * Zeroize if necessary
+   */
+  if (need_zero && !(arena->malloc_zeros)) {
+    memset(ret, 0, size);
+#ifdef DETAILED_ARENA_STATS
+    ++(arena->memset_count);
+    arena->memset_bytes += size;
+#endif
+  }
+
+  return ret;
 }
 
 void h_arena_free(HArena *arena, void* ptr) {
@@ -146,4 +250,20 @@ void h_delete_arena(HArena *arena) {
 void h_allocator_stats(HArena *arena, HArenaStats *stats) {
   stats->used = arena->used;
   stats->wasted = arena->wasted;
+#ifdef DETAILED_ARENA_STATS
+  stats->mm_malloc_count = arena->mm_malloc_count;
+  stats->mm_malloc_bytes = arena->mm_malloc_bytes;
+  stats->memset_count = arena->memset_count;
+  stats->memset_bytes = arena->memset_bytes;
+  stats->arena_malloc_count = arena->arena_malloc_count;
+  stats->arena_malloc_bytes = arena->arena_malloc_bytes;
+  stats->arena_su_malloc_count = arena->arena_su_malloc_count;
+  stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes;
+  stats->arena_si_malloc_count = arena->arena_si_malloc_count;
+  stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes;
+  stats->arena_lu_malloc_count = arena->arena_lu_malloc_count;
+  stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes;
+  stats->arena_li_malloc_count = arena->arena_li_malloc_count;
+  stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
+#endif
 }
diff --git a/src/allocator.h b/src/allocator.h
index dc88af68f22895f584065a491463b3f8576c09e9..06d1e6f59dd32987979079c4a7b01d09b13547e6 100644
--- a/src/allocator.h
+++ b/src/allocator.h
@@ -38,6 +38,8 @@ extern "C" {
 # define ATTR_MALLOC(n)
 #endif
 
+/* #define DETAILED_ARENA_STATS */
+
 // TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
 typedef struct HAllocator_ {
   void* (*alloc)(struct HAllocator_* allocator, size_t size);
@@ -51,6 +53,7 @@ typedef struct HArena_ HArena ; // hidden implementation
 
 HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
 
+void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
 void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
 void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
 void h_delete_arena(HArena *arena);
@@ -59,6 +62,26 @@ void h_arena_set_except(HArena *arena, jmp_buf *except);
 typedef struct {
   size_t used;
   size_t wasted;
+#ifdef DETAILED_ARENA_STATS
+  size_t mm_malloc_count;
+  size_t mm_malloc_bytes;
+  size_t memset_count;
+  size_t memset_bytes;
+  size_t arena_malloc_count;
+  size_t arena_malloc_bytes;
+  /* small, uninited */
+  size_t arena_su_malloc_count;
+  size_t arena_su_malloc_bytes;
+  /* small, inited */
+  size_t arena_si_malloc_count;
+  size_t arena_si_malloc_bytes;
+  /* large, uninited */
+  size_t arena_lu_malloc_count;
+  size_t arena_lu_malloc_bytes;
+  /* large, inited */
+  size_t arena_li_malloc_count;
+  size_t arena_li_malloc_bytes;
+#endif
 } HArenaStats;
 
 void h_allocator_stats(HArena *arena, HArenaStats *stats);
diff --git a/src/datastructures.c b/src/datastructures.c
index 99d821f1a93e8fbfa2189a581b6fd59afb367bc7..ba012b76b6a6af23929e47af655a68cd294bf268 100644
--- a/src/datastructures.c
+++ b/src/datastructures.c
@@ -9,12 +9,14 @@
 
 
 HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
-  HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray));
+  /* _noinit here because we init all the elements below */
+  HCountedArray *ret = h_arena_malloc_noinit(arena, sizeof(HCountedArray));
   if (size == 0)
     size = 1;
   ret->used = 0;
   ret->capacity = size;
   ret->arena = arena;
+  /* we actually want to zero these */
   ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
   return ret;
 }
@@ -24,12 +26,21 @@ HCountedArray *h_carray_new(HArena * arena) {
 }
 
 void h_carray_append(HCountedArray *array, void* item) {
+  HParsedToken **elements;
+
   if (array->used >= array->capacity) {
-    HParsedToken **elements = h_arena_malloc(array->arena, (array->capacity *= 2) * sizeof(void*));
+    /* _noinit here; we init below */
+    elements = h_arena_malloc_noinit(array->arena,
+        (array->capacity *= 2) * sizeof(void*));
     for (size_t i = 0; i < array->used; i++)
       elements[i] = array->elements[i];
     for (size_t i = array->used; i < array->capacity; i++)
       elements[i] = 0;
+    /*
+     * XXX I hope we don't use this much, because h_arena_free() doesn't
+     * quite seem to be there and doing a lot of this would get pretty
+     * wasteful.
+     */
     h_arena_free(array->arena, array->elements);
     array->elements = elements;
   }
@@ -38,7 +49,8 @@ void h_carray_append(HCountedArray *array, void* item) {
 
 // HSlist
 HSlist* h_slist_new(HArena *arena) {
-  HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
+  /* _noinit here; we set every element of ret below */
+  HSlist *ret = h_arena_malloc_noinit(arena, sizeof(HSlist));
   ret->head = NULL;
   ret->arena = arena;
   return ret;
@@ -53,8 +65,12 @@ HSlist* h_slist_copy(HSlist *slist) {
     tail = ret->head;
     head = head->next;
     while (head != NULL) {
-      // append head item to tail in a new node
-      HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
+      /*
+       * append head item to tail in a new node
+       *
+       * use _noinit; we set every element of node after we allocate
+       */
+      HSlistNode *node = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
       node->elem = head->elem;
       node->next = NULL;
       tail = tail->next = node;
@@ -85,10 +101,11 @@ void* h_slist_pop(HSlist *slist) {
 }
 
 void h_slist_push(HSlist *slist, void* item) {
-  HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
+  /* use _noinit; we set every element of node */
+  HSlistNode *hnode = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
   hnode->elem = item;
   hnode->next = slist->head;
-  // write memory barrier here.
+  /* write memory barrier here. */
   slist->head = hnode;
 }
 
@@ -132,20 +149,23 @@ void h_slist_free(HSlist *slist) {
 }
 
 HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
-  HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
+  /* _noinit because all fields are set below */
+  HHashTable *ht = h_arena_malloc_noinit(arena, sizeof(HHashTable));
   ht->hashFunc = hashFunc;
   ht->equalFunc = equalFunc;
   ht->capacity = 64; // to start; should be tuned later...
   ht->used = 0;
   ht->arena = arena;
-  ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
+  /* _noinit because all fields of all entries are set in the loop */
+  ht->contents = h_arena_malloc_noinit(arena,
+      sizeof(HHashTableEntry) * ht->capacity);
   for (size_t i = 0; i < ht->capacity; i++) {
     ht->contents[i].key = NULL;
     ht->contents[i].value = NULL;
     ht->contents[i].next = NULL;
     ht->contents[i].hashval = 0;
   }
-  //memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
+
   return ht;
 }
 
@@ -183,26 +203,33 @@ void * h_hashtable_get(const HHashTable *ht, const void *key) {
 void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
 
 void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
+  HHashTableEntry *old_contents, *new_contents;
   bool do_resize = false;
   size_t old_capacity = ht->capacity;
   while (n * 1.3 > ht->capacity) {
     ht->capacity *= 2;
     do_resize = true;
   }
-  if (!do_resize)
-    return;
-  HHashTableEntry *old_contents = ht->contents;
-  HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
-  ht->contents = new_contents;
-  ht->used = 0;
-  memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
-  for (size_t i = 0; i < old_capacity; ++i)
-    for (HHashTableEntry *entry = &old_contents[i];
-	 entry;
-	 entry = entry->next)
-      if (entry->key)
-	h_hashtable_put_raw(ht, entry);
-  //h_arena_free(ht->arena, old_contents);
+
+  if (do_resize) {
+    old_contents = ht->contents;
+    /* _noinit because we set the whole thing below */
+    new_contents = h_arena_malloc_noinit(ht->arena,
+        sizeof(HHashTableEntry) * ht->capacity);
+    ht->contents = new_contents;
+    ht->used = 0;
+    memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
+    for (size_t i = 0; i < old_capacity; ++i) {
+      for (HHashTableEntry *entry = &old_contents[i];
+           entry;
+           entry = entry->next) {
+        if (entry->key) {
+          h_hashtable_put_raw(ht, entry);
+        }
+      }
+    }
+    /* h_arena_free(ht->arena, old_contents); */
+  }
 }
 
 void h_hashtable_put_precomp(HHashTable *ht, const void *key, void *value,
@@ -249,7 +276,7 @@ void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
     }
     // Add a new link...
     assert (hte->next == NULL);
-    hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
+    hte->next = h_arena_malloc_noinit(ht->arena, sizeof(HHashTableEntry));
     hte = hte->next;
     hte->next = NULL;
     ht->used++;