diff --git a/src/hammer.c b/src/hammer.c index 94a39980e1c75712d9307ffab0ba06e3683ebbb0..0b342e6b7d53ff7b16ede78430375ea357cb1a6b 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -35,7 +35,36 @@ guint djbhash(const uint8_t *buf, size_t len) { return hash; } -void setupLR(const parser_t *p, GQueue *stack, LR_t *recDetect) { +parser_cache_value_t* recall(parser_cache_key_t *k, parse_state_t *state) { + parser_cache_value_t *cached = g_hash_table_lookup(state->cache, k); + head_t *head = g_hash_table_lookup(state->recursion_heads, &(state->input_stream)); + if (!head) { // No heads found + return cached; + } else { // Some heads found + if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) { + // Nothing in the cache, and the key parser is not involved + return /* TODO(mlp): figure out what to return here instead of Some(MemoEntry(Right(Failure("dummy", in")))) */ NULL; + } + if (g_slist_find(head->eval_set, k->parser)) { + // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. + head->eval_set = g_slist_remove_all(head->eval_set, k->parser); + parse_result_t *tmp_res = k->parser->fn(k->parser->env, state); + if (tmp_res) + tmp_res->arena = state->arena; + // we know that cached has an entry here, modify it + cached->value_type = PC_RIGHT; + cached->right = tmp_res; + } + return cached; + } +} + +void setupLR(const parser_t *p, GQueue *stack, LR_t *rec_detect) { + if (!rec_detect->head) { + head_t *some = g_new(head_t, 1); + some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; + rec_detect->head = some; + } } @@ -47,6 +76,7 @@ parse_result_t* grow(const parser_t *p, parse_state_t *state, head_t *head) { return NULL; } +/* Warth's recursion. Hi Alessandro! */ parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) { // TODO(thequux): add caching here. parser_cache_key_t *key = a_new(parser_cache_key_t, 1); @@ -67,7 +97,8 @@ parse_result_t* do_parse(const parser_t* parser, parse_state_t *state) { parse_result_t *tmp_res; if (parser) { tmp_res = parser->fn(parser->env, state); - tmp_res->arena = state->arena; + if (tmp_res) + tmp_res->arena = state->arena; } else tmp_res = NULL; if (state->input_stream.overrun) diff --git a/src/hammer.h b/src/hammer.h index 8eb901c9c769d8d92347b75035f42d0602a6b7ab..62a6f7153dd70c0043c775a1c670cd561db0828f 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -20,37 +20,15 @@ #include <glib.h> #include <stdint.h> #include "allocator.h" -/* The state of the parser. - * - * Members: - * input - the entire string being parsed - * index - current position in input - * length - size of input - * cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parse_state_t. - * - */ + #define BYTE_BIG_ENDIAN 0x1 #define BIT_BIG_ENDIAN 0x2 #define BIT_LITTLE_ENDIAN 0x0 #define BYTE_LITTLE_ENDIAN 0x0 typedef int bool; -typedef struct input_stream { - // This should be considered to be a really big value type. - const uint8_t *input; - size_t index; - size_t length; - char bit_offset; - char endianness; - char overrun; -} input_stream_t; - -typedef struct parse_state { - GHashTable *cache; - input_stream_t input_stream; - arena_t arena; - GQueue *lr_stack; -} parse_state_t; + +typedef struct parse_state parse_state_t; typedef enum token_type { TT_NONE, diff --git a/src/internal.h b/src/internal.h index 8b6818e7d4be461d5d1b0ed5537bd806c60abc60..1c88367a48ed95af2cd26e5be26c103125a92857 100644 --- a/src/internal.h +++ b/src/internal.h @@ -32,28 +32,85 @@ #define false 0 #define true 1 +typedef struct input_stream { + // This should be considered to be a really big value type. + const uint8_t *input; + size_t index; + size_t length; + char bit_offset; + char endianness; + char overrun; +} input_stream_t; + +/* The state of the parser. + * + * Members: + * cache - a hash table describing the state of the parse, including partial parse_results. It's a hash table from parser_cache_key_t to parser_cache_value_t. + * input_stream - the input stream at this state. + * arena - the arena that has been allocated for the parse this state is in. + * lr_stack - used in Warth's recursion + * recursion_heads - used in Warth's recursion + * + */ + +typedef struct parse_state { + GHashTable *cache; + input_stream_t input_stream; + arena_t arena; + GQueue *lr_stack; + GHashTable *recursion_heads; +} parse_state_t; + +/* The (location, parser) tuple used to key the cache. + */ + typedef struct parser_cache_key { input_stream_t input_pos; const parser_t *parser; } parser_cache_key_t; +/* A value in the cache is either of value Left or Right (this is a + * holdover from Scala, which used Either here). Left corresponds to + * LR_t, which is for left recursion; Right corresponds to + * parse_result_t. + */ + typedef enum parser_cache_value_type { PC_LEFT, PC_RIGHT } parser_cache_value_type_t; + +/* A recursion head. + * + * Members: + * head_parser - + * involved_set - + * eval_set - + */ typedef struct head { - parser_t *head_parser; + const parser_t *head_parser; GSList *involved_set; GSList *eval_set; } head_t; + +/* A left recursion. + * + * Members: + * seed - + * rule - + * head - + */ typedef struct LR { parse_result_t *seed; const parser_t *rule; head_t *head; } LR_t; +/* Tagged union for values in the cache: either LR's (Left) or + * parse_result_t's (Right). + */ typedef struct parser_cache_value { parser_cache_value_type_t value_type; union {