From ce74cf79396d3afdf726dd603d3c678373750e29 Mon Sep 17 00:00:00 2001 From: Dan Hirsch <thequux@upstandinghackers.com> Date: Mon, 24 Jun 2013 21:23:28 +0200 Subject: [PATCH] Refactored regex backend to use a sparse thread list --- src/backends/regex.c | 30 ++++++++-------------- src/datastructures.c | 16 ++++++++++++ src/internal.h | 60 ++++++++++++++++++++++++++++++++++++++++++++ src/parsers/many.c | 5 ++-- 4 files changed, 90 insertions(+), 21 deletions(-) diff --git a/src/backends/regex.c b/src/backends/regex.c index 6f069bec..a3c073c8 100644 --- a/src/backends/regex.c +++ b/src/backends/regex.c @@ -50,8 +50,8 @@ HRVMTrace *invert_trace(HRVMTrace *trace) { void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_t len) { HArena *arena = h_new_arena(mm__, 0); - HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length), - **heads_n = a_new(HRVMTrace*, prog->length); + HSArray *heads_n = h_sarray_new(mm__, prog->length), // Both of these contain HRVMTrace*'s + *heads_p = h_sarray_new(mm__, prog->length); HRVMTrace *ret_trace = NULL; @@ -59,10 +59,6 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ HRVMThread *ip_queue = a_new(HRVMThread, prog->length); size_t ipq_top; - - - - #define THREAD ip_queue[ipq_top-1] #define PUSH_SVM(op_, arg_) do { \ HRVMTrace *nt = a_new(HRVMTrace, 1); \ @@ -72,34 +68,30 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ nt->input_pos = off; \ THREAD.trace = nt; \ } while(0) - - heads_n[0] = a_new(HRVMTrace, 1); // zeroing - heads_n[0]->opcode = SVM_NOP; + ((HRVMTrace*)h_sarray_set(heads_n, 0, a_new(HRVMTrace, 1)))->opcode = SVM_NOP; // Initial thread + size_t off = 0; - int live_threads = 1; + int live_threads = 1; // May be redundant for (off = 0; off <= len; off++) { uint8_t ch = ((off == len) ? 0 : input[off]); - size_t ip_s; // BUG: there was an unused variable ip. Not sure if - // I intended to use it somewhere. /* scope */ { - HRVMTrace **heads_t; + HSArray *heads_t; heads_t = heads_n; heads_n = heads_p; heads_p = heads_t; - memset(heads_n, 0, prog->length * sizeof(*heads_n)); + h_sarray_clear(heads_n); } memset(insn_seen, 0, prog->length); // no insns seen yet if (!live_threads) goto match_fail; live_threads = 0; - for (ip_s = 0; ip_s < prog->length; ip_s++) { + HRVMTrace *tr_head; + H_SARRAY_FOREACH_KV(tr_head,ip_s,heads_p) { ipq_top = 1; // TODO: Write this as a threaded VM - if (!heads_p[ip_s]) - continue; THREAD.ip = ip_s; - THREAD.trace = heads_p[ip_s]; + THREAD.trace = tr_head; uint8_t hi, lo; uint16_t arg; while(ipq_top > 0) { @@ -155,7 +147,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_ case RVM_STEP: // save thread live_threads++; - heads_n[++THREAD.ip] = THREAD.trace; + h_sarray_set(heads_n, ++THREAD.ip, THREAD.trace); ipq_top--; goto next_insn; } diff --git a/src/datastructures.c b/src/datastructures.c index 99b4ca57..1ddd6203 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -284,3 +284,19 @@ HHashValue h_hash_ptr(const void *p) { // XXX just djbhash it return (uintptr_t)p >> 4; } + +HSArray *h_sarray_new(HAllocator *mm__, size_t size) { + HSArray *ret = h_new(HSArray, 1); + ret->capacity = size; + ret->used = 0; + ret->nodes = h_new(HSArrayNode, size); // Does not actually need to be initialized. + ret->mm__ = mm__; + // TODO: Add the valgrind hooks to mark this initialized. + return ret; +} + +void h_sarray_free(HSArray *arr) { + HAllocator *mm__ = arr->mm__; + h_free(arr->nodes); + h_free(arr); +} diff --git a/src/internal.h b/src/internal.h index d0fb53a0..b7fe6213 100644 --- a/src/internal.h +++ b/src/internal.h @@ -17,6 +17,7 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H +#include <assert.h> #include <err.h> #include <string.h> #include "hammer.h" @@ -72,6 +73,65 @@ typedef struct HSlist_ { struct HArena_ *arena; } HSlist; +// {{{ HSArray + +typedef struct HSArrayNode_ { + size_t elem; + size_t index; + void* content; +} HSArrayNode; + +typedef struct HSArray_ { + // Sparse array + // Element n is valid iff arr->nodes[n].index < arr.used && arr.nodes[arr.nodes[n].index].elem == n + HSArrayNode *nodes; // content for node at index n is stored at position n. + size_t capacity; + size_t used; + HAllocator *mm__; +} HSArray; + +HSArray *h_sarray_new(HAllocator *mm__, size_t size); +void h_sarray_free(HSArray *arr); +static inline bool h_sarray_isset(HSArray *arr, size_t n) { + assert(n < arr->capacity); + return (arr->nodes[n].index < arr->used && arr->nodes[arr->nodes[n].index].elem == n); +} +static inline void* h_sarray_get(HSArray *arr, size_t n) { + assert(n < arr->capacity); + if (h_sarray_isset(arr, n)) + return arr->nodes[n].content; + return NULL; +} + +static inline void* h_sarray_set(HSArray *arr, size_t n, void* val) { + assert(n < arr->capacity); + arr->nodes[n].content = val; + if (h_sarray_isset(arr, n)) + return val; + arr->nodes[arr->used].elem = n; + arr->nodes[n].index = arr->used++; + return val; +} + +static inline void h_sarray_clear(HSArray *arr) { + arr->used = 0; +} + +#define H__APPEND2(a,b) a##b +#define H__APPEND(a,b) H__APPEND2(a,b) +#define H__INTVAR(pfx) H__APPEND(intvar__##pfx##__,__COUNTER__) + +#define H_SARRAY_FOREACH_KV_(var,idx,arr,intvar) \ + for (size_t intvar = 0, idx = (var = (arr)->nodes[(arr)->nodes[intvar].elem].content,(arr)->nodes[intvar].elem); \ + intvar < (arr)->used; \ + idx = (arr)->nodes[intvar].elem, var = (arr)->nodes[(arr)->nodes[intvar].elem].content, intvar=intvar+1) + +#define H_SARRAY_FOREACH_KV(var,index,arr) H_SARRAY_FOREACH_KV_(var,index,arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_V(var,arr) H_SARRAY_FOREACH_KV_(var,H__INTVAR(elem),arr,H__INTVAR(idx)) +#define H_SARRAY_FOREACH_K(index,arr) H_SARRAY_FOREACH_KV_(H__INTVAR(val),index,arr,H__INTVAR(idx)) + +// }}} + typedef unsigned int *HCharset; static inline HCharset new_charset(HAllocator* mm__) { diff --git a/src/parsers/many.c b/src/parsers/many.c index a095940c..1e3b0221 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -130,7 +130,7 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (repeat->min_p) { h_rvm_insert_insn(prog, RVM_PUSH, 0); assert(repeat->count < 2); // TODO: The other cases should be supported later. - uint16_t end_fork; + uint16_t end_fork = 0xFFFF; // Shut up GCC if (repeat->count == 0) end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF); uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF); @@ -145,7 +145,8 @@ static bool many_ctrvm(HRVMProg *prog, void *env) { if (!h_compile_regex(prog, repeat->p)) return false; h_rvm_insert_insn(prog, RVM_FORK, nxt); - h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); + if (repeat->count == 0) + h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); return true; -- GitLab