From ce74cf79396d3afdf726dd603d3c678373750e29 Mon Sep 17 00:00:00 2001
From: Dan Hirsch <thequux@upstandinghackers.com>
Date: Mon, 24 Jun 2013 21:23:28 +0200
Subject: [PATCH] Refactored regex backend to use a sparse thread list

---
 src/backends/regex.c | 30 ++++++++--------------
 src/datastructures.c | 16 ++++++++++++
 src/internal.h       | 60 ++++++++++++++++++++++++++++++++++++++++++++
 src/parsers/many.c   |  5 ++--
 4 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/src/backends/regex.c b/src/backends/regex.c
index 6f069bec..a3c073c8 100644
--- a/src/backends/regex.c
+++ b/src/backends/regex.c
@@ -50,8 +50,8 @@ HRVMTrace *invert_trace(HRVMTrace *trace) {
 
 void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) {
   HArena *arena = h_new_arena(mm__, 0);
-  HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
-    **heads_n = a_new(HRVMTrace*, prog->length);
+  HSArray *heads_n = h_sarray_new(mm__, prog->length), // Both of these contain HRVMTrace*'s
+    *heads_p = h_sarray_new(mm__, prog->length);
 
   HRVMTrace *ret_trace = NULL;
   
@@ -59,10 +59,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
   HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
   size_t ipq_top;
 
-  
-  
-  
-
 #define THREAD ip_queue[ipq_top-1]
 #define PUSH_SVM(op_, arg_) do { \
 	  HRVMTrace *nt = a_new(HRVMTrace, 1); \
@@ -72,34 +68,30 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
 	  nt->input_pos = off;		       \
 	  THREAD.trace = nt;		       \
   } while(0)
-    
-  heads_n[0] = a_new(HRVMTrace, 1); // zeroing
-  heads_n[0]->opcode = SVM_NOP;
 
+  ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread
+  
   size_t off = 0;
-  int live_threads = 1;
+  int live_threads = 1; // May be redundant
   for (off = 0; off <= len; off++) {
     uint8_t ch = ((off == len) ? 0 : input[off]);
-    size_t ip_s; // BUG: there was an unused variable ip. Not sure if
-		 // I intended to use it somewhere.
     /* scope */ {
-      HRVMTrace **heads_t;
+      HSArray *heads_t;
       heads_t = heads_n;
       heads_n = heads_p;
       heads_p = heads_t;
-      memset(heads_n, 0, prog->length * sizeof(*heads_n));
+      h_sarray_clear(heads_n);
     }
     memset(insn_seen, 0, prog->length); // no insns seen yet
     if (!live_threads)
       goto match_fail;
     live_threads = 0;
-    for (ip_s = 0; ip_s < prog->length; ip_s++) {
+    HRVMTrace *tr_head;
+    H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) {
       ipq_top = 1;
       // TODO: Write this as a threaded VM
-      if (!heads_p[ip_s])
-	continue;
       THREAD.ip = ip_s;
-      THREAD.trace = heads_p[ip_s];
+      THREAD.trace = tr_head;
       uint8_t hi, lo;
       uint16_t arg;
       while(ipq_top > 0) {
@@ -155,7 +147,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
 	case RVM_STEP:
 	  // save thread
 	  live_threads++;
-	  heads_n[++THREAD.ip] = THREAD.trace;
+	  h_sarray_set(heads_n, ++THREAD.ip, THREAD.trace);
 	  ipq_top--;
 	  goto next_insn;
 	}
diff --git a/src/datastructures.c b/src/datastructures.c
index 99b4ca57..1ddd6203 100644
--- a/src/datastructures.c
+++ b/src/datastructures.c
@@ -284,3 +284,19 @@ HHashValue h_hash_ptr(const void *p) {
   // XXX just djbhash it
   return (uintptr_t)p >> 4;
 }
+
+HSArray *h_sarray_new(HAllocator *mm__, size_t size) {
+  HSArray *ret = h_new(HSArray, 1);
+  ret->capacity = size;
+  ret->used = 0;
+  ret->nodes = h_new(HSArrayNode, size); // Does not actually need to be initialized.
+  ret->mm__ = mm__;
+  // TODO: Add the valgrind hooks to mark this initialized.
+  return ret;
+}
+
+void h_sarray_free(HSArray *arr) {
+  HAllocator *mm__ = arr->mm__;
+  h_free(arr->nodes);
+  h_free(arr);
+}
diff --git a/src/internal.h b/src/internal.h
index d0fb53a0..b7fe6213 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -17,6 +17,7 @@
 
 #ifndef HAMMER_INTERNAL__H
 #define HAMMER_INTERNAL__H
+#include <assert.h>
 #include <err.h>
 #include <string.h>
 #include "hammer.h"
@@ -72,6 +73,65 @@ typedef struct HSlist_ {
   struct HArena_ *arena;
 } HSlist;
 
+// {{{ HSArray
+
+typedef struct HSArrayNode_ {
+  size_t elem;
+  size_t index;
+  void* content;
+} HSArrayNode;
+
+typedef struct HSArray_ {
+  // Sparse array
+  // Element n is valid iff arr->nodes[n].index < arr.used && arr.nodes[arr.nodes[n].index].elem == n
+  HSArrayNode *nodes; // content for node at index n is stored at position n.
+  size_t capacity;
+  size_t used;
+  HAllocator *mm__;
+} HSArray;
+
+HSArray *h_sarray_new(HAllocator *mm__, size_t size);
+void h_sarray_free(HSArray *arr);
+static inline bool h_sarray_isset(HSArray *arr, size_t n) {
+  assert(n < arr->capacity);
+  return (arr->nodes[n].index < arr->used && arr->nodes[arr->nodes[n].index].elem == n);
+}
+static inline void* h_sarray_get(HSArray *arr, size_t n) {
+  assert(n < arr->capacity);
+  if (h_sarray_isset(arr, n))
+    return arr->nodes[n].content;
+  return NULL;
+}
+
+static inline void* h_sarray_set(HSArray *arr, size_t n, void* val) {
+  assert(n < arr->capacity);
+  arr->nodes[n].content = val;
+  if (h_sarray_isset(arr, n))
+    return val;
+  arr->nodes[arr->used].elem = n;
+  arr->nodes[n].index = arr->used++;
+  return val;
+}
+
+static inline void h_sarray_clear(HSArray *arr) {
+  arr->used = 0;
+}
+
+#define H__APPEND2(a,b) a##b
+#define H__APPEND(a,b) H__APPEND2(a,b)
+#define H__INTVAR(pfx) H__APPEND(intvar__##pfx##__,__COUNTER__)
+
+#define H_SARRAY_FOREACH_KV_(var,idx,arr,intvar)			\
+  for (size_t intvar = 0, idx = (var = (arr)->nodes[(arr)->nodes[intvar].elem].content,(arr)->nodes[intvar].elem); \
+       intvar < (arr)->used;						\
+       idx = (arr)->nodes[intvar].elem, var = (arr)->nodes[(arr)->nodes[intvar].elem].content, intvar=intvar+1)
+
+#define H_SARRAY_FOREACH_KV(var,index,arr) H_SARRAY_FOREACH_KV_(var,index,arr,H__INTVAR(idx))
+#define H_SARRAY_FOREACH_V(var,arr) H_SARRAY_FOREACH_KV_(var,H__INTVAR(elem),arr,H__INTVAR(idx))
+#define H_SARRAY_FOREACH_K(index,arr) H_SARRAY_FOREACH_KV_(H__INTVAR(val),index,arr,H__INTVAR(idx))
+
+// }}}
+
 typedef unsigned int *HCharset;
 
 static inline HCharset new_charset(HAllocator* mm__) {
diff --git a/src/parsers/many.c b/src/parsers/many.c
index a095940c..1e3b0221 100644
--- a/src/parsers/many.c
+++ b/src/parsers/many.c
@@ -130,7 +130,7 @@ static bool many_ctrvm(HRVMProg *prog, void *env) {
   if (repeat->min_p) {
   h_rvm_insert_insn(prog, RVM_PUSH, 0);
     assert(repeat->count < 2); // TODO: The other cases should be supported later.
-    uint16_t end_fork;
+    uint16_t end_fork = 0xFFFF; // Shut up GCC
     if (repeat->count == 0)
       end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF);
     uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF);
@@ -145,7 +145,8 @@ static bool many_ctrvm(HRVMProg *prog, void *env) {
     if (!h_compile_regex(prog, repeat->p))
       return false;
     h_rvm_insert_insn(prog, RVM_FORK, nxt);
-    h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog));
+    if (repeat->count == 0)
+      h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog));
     
     h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
     return true;
-- 
GitLab