diff --git a/src/allocator.h b/src/allocator.h index e83cae7cbfecebc58dd810671385b1d63f72d9fb..2dfc14e689f825efabc0d7c46b515217ccd90abb 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -19,6 +19,7 @@ #define HAMMER_ALLOCATOR__H__ #include <sys/types.h> +// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer. typedef struct HAllocator_ { void* (*alloc)(struct HAllocator_* allocator, size_t size); void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size); diff --git a/src/backends/regex.c b/src/backends/regex.c new file mode 100644 index 0000000000000000000000000000000000000000..63c799597cf8b3fd6cfae0003efb23da043491c4 --- /dev/null +++ b/src/backends/regex.c @@ -0,0 +1,138 @@ +#include "../internal.h" +#include "../parsers/parser_internal.h" + +#undef a_new +#define a_new(typ, count) a_new_(arena, typ, count); +// Stack VM +typedef enum HSVMOp_ { + SVM_PUSH, // Push a mark. There is no VM insn to push an object. + SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing. + SVM_ACTION, // Same meaning as RVM_ACTION + SVM_CAPTURE, // Same meaning as RVM_CAPTURE + SVM_ACCEPT, +} HSVMOp; + +typedef struct HRVMTrace_ { + struct HRVMTrace_ *next; // When parsing, these are + // reverse-threaded. There is a postproc + // step that inverts all the pointers. + uint16_t arg; + uint8_t opcode; +} HRVMTrace; + +typedef struct HRVMThread_ { + HRVMTrace *trace; + uint16_t ip; +} HRVMThread; + +void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const char* input, size_t len) { + HArena *arena = h_new_arena(mm__, 0); + HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length), + **heads_n = a_new(HRVMTrace*, prog->length), **heads_t; + + + uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued + HRVMThread *ip_queue = a_new(HRVMThread, prog->length); + size_t ipq_top; + +#define THREAD ip_queue[ipq_top-1] +#define PUSH_SVM(op_, arg_) do { \ + HRVMTrace *nt = a_new(HRVMTrace, 1); \ + nt->arg = (arg_); \ + nt->opcode = (op_); \ + nt->next = THREAD.trace; \ + THREAD.trace = nt; \ + } while(0) + + heads_n[0] = a_new(HRVMTrace, 1); // zeroing + heads_n[0]->opcode = SVM_NOP; + + size_t off = 0; + int live_threads = 1; + for (off = 0; off <= len; off++) { + uint8_t ch = ((off == len) ? 0 : input[off]); + size_t ip_s, ip; + /* scope */ { + HRVMTrace **heads_t; + heads_t = heads_n; + heads_n = heads_p; + heads_p = heads_t; + memset(heads_n, 0, prog->length * sizeof(*heads_n)); + } + memset(insn_seen, 0, prog->length); // no insns seen yet + if (!live_threads) + return NULL; + live_threads = 0; + for (ip_s = 0; ip_s < prog->length; ip_s++) { + ipq_top = 1; + // TODO: Write this as a threaded VM + if (!heads_p[ip_s]) + continue; + THREAD.ip = ip_s; + + uint8_t hi, lo; + uint16_t arg; + while(ipq_top > 0) { + if (insns_seen[THREAD.ip] == 1) + continue; + insns_seen[THREAD.ip] = 1; + arg = prog->insns[THREAD.ip].arg; + switch(prog->insns[THREAD.ip].op) { + case RVM_ACCEPT: + // TODO: save current SVM pos, and jump to end + abort(); + case RVM_MATCH: + // Doesn't actually validate the "must be followed by MATCH + // or STEP. It should. Preproc perhaps? + hi = (arg >> 8) & 0xff; + lo = arg & 0xff; + THREAD.ip++; + if (ch < lo && ch > hi) + ipq_top--; // terminate thread + goto next_insn; + case RVM_GOTO: + THREAD.ip = arg; + goto next_insn; + case RVM_FORK: + THREAD.ip++; + if (!insns_seen[arg]) { + insns_seen[THREAD.ip] = 2; + HRVMTrace* tr = THREAD.trace; + ipq_top++; + THREAD.ip = arg; + THREAD.trace = tr; + } + goto next_insn; + case RVM_PUSH: + PUSH_SVM(SVM_PUSH, off); + THREAD.ip++; + goto next_insn; + case RVM_ACTION: + PUSH_SVM(SVM_ACTION, arg); + THREAD.ip++; + goto next_insn; + case RVM_CAPTURE: + PUSH_SVM(SVM_CAPTURE, 0); + THREAD.ip++; + goto next_insn; + case RVM_EOF: + THREAD.ip++; + if (off != len) + ipq_top--; // Terminate thread + goto next_insn; + case RVM_STEP: + // save thread + live_threads++; + heads_n[THREAD.ip++] = THREAD.trace; + ipq_top--; + goto next_insn; + } + next_insn: + + } + + + + + +} diff --git a/src/backends/regex.h b/src/backends/regex.h new file mode 100644 index 0000000000000000000000000000000000000000..c406c8420140652562ba9a07798885119472a6b9 --- /dev/null +++ b/src/backends/regex.h @@ -0,0 +1,38 @@ +// Internal defs +#ifndef HAMMER_BACKEND_REGEX__H +#define HAMMER_BACKEND_REGEX__H + +// each insn is an 8-bit opcode and a 16-bit parameter +// [a] are actions; they add an instruction to the stackvm that is being output. +// [m] are match ops; they can either succeed or fail, depending on the current character +// [c] are control ops. They affect the pc non-linearly. +typedef enum HRVMOp_ { + RVM_ACCEPT, // [a] + RVM_GOTO, // [c] parameter is an offset into the instruction table + RVM_FORK, // [c] parameter is an offset into the instruction table + RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack + RVM_ACTION, // [a] argument is an action ID + RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg. + RVM_EOF, // [m] Succeeds only if at EOF. + RVM_MATCH, // [m] The high byte of the parameter is an upper bound + // and the low byte is a lower bound, both + // inclusive. An inverted match should be handled + // as two ranges. + RVM_STEP, // [a] Step to the next byte of input + RVM_OPCOUNT +} HRVMOp; + +typedef struct HRVMInsn_{ + uint8_t op; + uint16_t arg; +} HRVMInsn; + + +typedef struct HRVMProg_ { + size_t length; + size_t action_count; + HAction *actions; + HRVMInsn *insns; +}; + +#endif diff --git a/src/hammer.h b/src/hammer.h index ecb606545e56dd2e3bf8e17041d20bba1f4df02a..451268569fed2af6d7f6d02b51d5d909c1d079e8 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -34,6 +34,10 @@ typedef struct HParseState_ HParseState; typedef enum HParserBackend_ { PB_MIN = 0, PB_PACKRAT = PB_MIN, // PB_MIN is always the default. + PB_LALR, // Not Implemented + PB_LLk, // Not Implemented + PB_GLR, // Not Implemented + PB_REGULAR, // Not Implemented PB_MAX } HParserBackend; @@ -114,6 +118,7 @@ typedef struct HParserVtable_ { HParseResult* (*parse)(void *env, HParseState *state); bool (*isValidRegular)(void *env); bool (*isValidCF)(void *env); + } HParserVtable; typedef struct HParser_ { diff --git a/src/internal.h b/src/internal.h index 67ecb22e4cbf227a8334479ec898cbab83e6d778..d35ebaabc90ae093bbce40e0c85442eee764e098 100644 --- a/src/internal.h +++ b/src/internal.h @@ -193,6 +193,7 @@ struct HBitWriter_ { // }}} + // Backends {{{ extern HParserBackendVTable h__packrat_backend_vtable; // }}} diff --git a/src/test_suite.h b/src/test_suite.h index 24932bb4e370672f104e27ea8439889d3eba67b5..3a76beec8888e7778550fc107364b77b4abed8bb 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -52,6 +52,28 @@ } \ } while(0) +#define g_check_regular(lang) do { \ + if (!lang->isValidRegular(lang->env)) { \ + g_test_message("Language is not regular"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_contextfree(lang) do { \ + if (!lang->isValidCF(lang->env)) { \ + g_test_message("Language is not context-free"); \ + g_test_fail(); \ + } \ + } while(0) + +#define g_check_compilable(lang, backend, params) do { \ + if (!h_compile(lang, backend, params)) { \ + g_test_message("Language is not %s(%s)", #backend, params); \ + g_test_fail(); \ + } \ + } while(0) + + // TODO: replace uses of this with g_check_parse_failed #define g_check_failed(res) do { \ const HParseResult *result = (res); \ @@ -99,4 +121,5 @@ #define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2) + #endif // #ifndef HAMMER_TEST_SUITE__H