diff --git a/HACKING b/HACKING index 869c327574673b97402e235c576bc756740fa1dd..acee9c2ce8310722e20ec0be60708972da1cced4 100644 --- a/HACKING +++ b/HACKING @@ -10,3 +10,42 @@ In particular, these names, and the macros that use them, are: Used by a_new and company. Should be an HParseState* - mm__: Used by h_new and h_free. Should be an HAllocator* + +Function suffixes +================= + +Many functions come in several variants, to handle receiving optional +parameters or parameters in multiple different forms. For example, +often, you have a global memory manager that is used for an entire +program. In this case, you can leave off the memory manager arguments +off, letting them be implicit instead. Further, it is often convenient +to pass an array or va_list to a function instead of listing the +arguments inline (eg, for wrapping a function, generating the +arguments programattically, or writing bindings for another language. + +Because we have found that most variants fall into a fairly small set +of forms, and to minimize the amount of API calls that users need to +remember, there is a consistent naming scheme for these function +variants: the function name is followed by two underscores and a set +of single-character "flags" indicating what optional features that +particular variant has (in alphabetical order, of course): + + __a: takes variadic arguments as a void*[] + __m: takes a memory manager as the first argument, to override the system memory manager. + __v: Takes the variadic argument list as a va_list + + +Memory managers +=============== + +If the __m function variants are used or system_allocator is +overridden, there come some difficult questions to answer, +particularly regarding the behavior when multiple memory managers are +combined. As a general rule of thumb (exceptions will be explicitly +documented), assume that + + If you have a function f, which is passed a memory manager m and + returns a value r, any function that uses r as a parameter must + also be told to use m as a memory manager. + +In other words, don't let the (memory manager) streams cross. \ No newline at end of file diff --git a/NOTES b/NOTES index 84b8c4637aea271c54bb505aae4680208c3610f4..77d899db433cd143136658209f1e02ecdd2fbb6e 100644 --- a/NOTES +++ b/NOTES @@ -35,4 +35,3 @@ what the comments say. TODO: implement datastructure linearization func TODO: implement free func for parsers -TODO: Remove glib dependency (i.e., GQueue and GHashtable) \ No newline at end of file diff --git a/TODO b/TODO new file mode 100644 index 0000000000000000000000000000000000000000..10ad174f868d2d283e681144d5df3d12dbb84868 --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +- Make h_action functions be called only after parse is complete. +- Allow alternative input streams (eg, zlib, base64) + - Bonus points if layered... \ No newline at end of file diff --git a/src/Makefile b/src/Makefile index de340e7b3226864a12656d02c5f8562ad2152bdf..21b7ac5164f930f0c3de993f8fd9892e4633cf48 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,17 +25,29 @@ PARSERS := \ attr_bool \ indirect -OUTPUTS := bitreader.o \ - hammer.o \ - bitwriter.o \ - libhammer.a \ - pprint.o \ - allocator.o \ - datastructures.o \ - system_allocator.o \ +BACKENDS := \ + packrat + +HAMMER_PARTS := \ + bitreader.o \ + hammer.o \ + bitwriter.o \ + pprint.o \ + allocator.o \ + datastructures.o \ + system_allocator.o \ + benchmark.o \ + compile.o \ + $(PARSERS:%=parsers/%.o) \ + $(BACKENDS:%=backends/%.o) + +TESTS := t_benchmark.o + +OUTPUTS := libhammer.a \ test_suite.o \ test_suite \ - $(PARSERS:%=parsers/%.o) + $(HAMMER_PARTS) \ + $(TESTS) TOPLEVEL := ../ @@ -44,8 +56,7 @@ include ../common.mk all: libhammer.a -libhammer.a: bitreader.o hammer.o pprint.o allocator.o datastructures.o bitwriter.o system_allocator.o \ - $(PARSERS:%=parsers/%.o) +libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h @@ -56,6 +67,6 @@ all: test_suite test: test_suite ./test_suite -v -test_suite: test_suite.o libhammer.a +test_suite: test_suite.o $(TESTS) libhammer.a $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) endif diff --git a/src/backends/packrat.c b/src/backends/packrat.c new file mode 100644 index 0000000000000000000000000000000000000000..b884b73c3d0d99c0558dc16f12ab758f87186627 --- /dev/null +++ b/src/backends/packrat.c @@ -0,0 +1,15 @@ +#include "../internal.h" + +int h_packrat_compile(HAllocator* mm__, HParser* parser, const void* params) { + return 0; // No compilation necessary, and everything should work + // out of the box. +} + +HParseResult *h_packrat_parse(HAllocator* mm__, HParser* parser, HParseState* parse_state) { + return NULL; // TODO: fill this in. +} + +HParserBackendVTable h__packrat_backend_vtable = { + .compile = h_packrat_compile, + .parse = h_packrat_parse +}; diff --git a/src/benchmark.c b/src/benchmark.c new file mode 100644 index 0000000000000000000000000000000000000000..62d89d9eaae5f2dae253f674dcfe59f8126210a2 --- /dev/null +++ b/src/benchmark.c @@ -0,0 +1,88 @@ +#include <stdio.h> +#include <time.h> +#include <string.h> +#include "hammer.h" + +/* + Usage: + Create your parser (i.e., HParser*), and then call + + HBenchmarkResults* results = h_benchmark(parser, testcases); + + Then, you can format a report with: + + h_benchmark_report(stdout, results); + + or just generate code to make the parser run as fast as possible with: + + h_benchmark_dump_optimized_code(stdout, results); + +*/ + + +HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases) { + // For now, just output the results to stderr + HParserTestcase* tc = testcases; + HParserBackend backend = PB_MIN; + + for (backend = PB_MIN; backend < PB_MAX; backend++) { + fprintf(stderr, "Compiling for backend %d ... ", backend); + // Step 1: Compile grammar for given parser... + if (h_compile(parser, PB_MIN, NULL) == -1) { + // backend inappropriate for grammar... + fprintf(stderr, "failed\n"); + continue; + } + int tc_failed = 0; + // Step 1: verify all test cases. + for (tc = testcases; tc->input != NULL; tc++) { + HParseResult *res = h_parse(parser, tc->input, tc->length); + char* res_unamb; + if (res != NULL) { + res_unamb = h_write_result_unamb(res->ast); + } else + res_unamb = NULL; + if ((res_unamb == NULL && tc->output_unambiguous == NULL) + || (strcmp(res_unamb, tc->output_unambiguous) != 0)) { + // test case failed... + fprintf(stderr, "failed\n"); + // We want to run all testcases, for purposes of generating a + // report. (eg, if users are trying to fix a grammar for a + // faster backend) + tc_failed++; + } + h_parse_result_free(res); + } + + if (tc_failed > 0) { + // Can't use this parser; skip to the next + fprintf(stderr, "Backend failed testcases; skipping benchmark\n"); + continue; + } + + for (tc = testcases; tc->input != NULL; tc++) { + // The goal is to run each testcase for at least 50ms each + // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) + int count = 1, cur; + struct timespec ts_start, ts_end; + long long time_diff; + do { + count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start); + for (cur = 0; cur < count; cur++) { + h_parse_result_free(h_parse(parser, tc->input, tc->length)); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end); + + // time_diff is in ns + time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); + } while (time_diff < 100000000); + fprintf(stderr, "Case %d: %lld ns/parse\n", (int)(tc - testcases), time_diff / count); + } + } + return NULL; +} + +void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { + // TODO: fill in this function +} diff --git a/src/compile.c b/src/compile.c new file mode 100644 index 0000000000000000000000000000000000000000..e24839dea2ffbae091dd395ec1b4d2ca8a0c3034 --- /dev/null +++ b/src/compile.c @@ -0,0 +1,15 @@ +// This file contains functions related to managing multiple parse backends +#include "hammer.h" +#include "internal.h" + +static HParserBackendVTable *backends[PB_MAX] = { + &h__packrat_backend_vtable, +}; + +int h_compile(HParser* parser, HParserBackend backend, const void* params) { + return h_compile__m(&system_allocator, parser, backend, params); +} + +int h_compile__m(HAllocator* mm__, HParser* parser, HParserBackend backend, const void* params) { + return backends[backend]->compile(mm__, parser, params); +} diff --git a/src/hammer.h b/src/hammer.h index 1c1a6ceef17124904969f9fcd1136a26d8a51d05..15ff6ec631fab4a8b2dc5c3911ee2cb8cc977a85 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -30,6 +30,12 @@ typedef int bool; typedef struct HParseState_ HParseState; +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = PB_MIN, // PB_MIN is always the default. + PB_MAX +} HParserBackend; + typedef enum HTokenType_ { TT_NONE, TT_BYTES, @@ -112,6 +118,17 @@ typedef struct HParser_ { void *env; } HParser; +// {{{ Stuff for benchmarking +typedef struct HParserTestcase_ { + unsigned char* input; + size_t length; + char* output_unambiguous; +} HParserTestcase; + +typedef struct HBenchmarkResults_ { +} HBenchmarkResults; +// }}} + // {{{ Preprocessor definitions #define HAMMER_FN_DECL_NOARG(rtype_t, name) \ rtype_t name(void); \ @@ -519,6 +536,15 @@ HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok); */ HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta); +/** + * Build parse tables for the given parser backend. See the + * documentation for the parser backend in question for information + * about the [params] parameter, or just pass in NULL for the defaults. + * + * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. + */ +HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params); + /** * TODO: Document me */ @@ -541,4 +567,10 @@ const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); */ void h_bit_writer_free(HBitWriter* w); +// {{{ Benchmark functions +HBenchmarkResults *h_benchmark(HParser* parser, HParserTestcase* testcases); +void h_benchmark_report(FILE* stream, HBenchmarkResults* results); +void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); +// }}} + #endif // #ifndef HAMMER_HAMMER__H diff --git a/src/internal.h b/src/internal.h index a24cc0e7a518b03e9051e7e1495a7e9a013d82b2..8a8f97209f202c7247fce0db4e3d77506fc986a7 100644 --- a/src/internal.h +++ b/src/internal.h @@ -109,6 +109,12 @@ struct HParseState_ { HHashTable *recursion_heads; }; +typedef struct HParserBackendVTable_ { + int (*compile)(HAllocator *mm__, HParser* parser, const void* params); + HParseResult* (*parse)(HAllocator *mm__, HParser* parser, HParseState* parse_state); +} HParserBackendVTable; + + /* The (location, parser) tuple used to key the cache. */ @@ -173,6 +179,10 @@ typedef struct HParserCacheValue_t { }; } HParserCacheValue; +// Backends {{{ +extern HParserBackendVTable h__packrat_backend_vtable; +// }}} + // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. long long h_read_bits(HInputStream* state, int count, char signed_p); diff --git a/src/t_benchmark.c b/src/t_benchmark.c new file mode 100644 index 0000000000000000000000000000000000000000..ad682b886d9c9cbc9a183182ebc84451c2b9604c --- /dev/null +++ b/src/t_benchmark.c @@ -0,0 +1,14 @@ +// At this point, this is just a compile/link test. +#include "hammer.h" + +HParserTestcase testcases[] = { + {NULL, 0, NULL} +}; + +void test_benchmark_1() { + HParser *parser = NULL; // TODO: fill this in. + + HBenchmarkResults *res = h_benchmark(parser, testcases); + h_benchmark_report(stderr, res); + +}