Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Showing with 2705 additions and 180 deletions
File moved
local ffi = require("ffi")
ffi.cdef[[
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_REGULAR,
PB_LLk,
PB_LALR,
PB_GLR,
PB_MAX = PB_GLR
} HParserBackend;
typedef enum HTokenType_ {
TT_NONE = 1,
TT_BYTES = 2,
TT_SINT = 4,
TT_UINT = 8,
TT_SEQUENCE = 16,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_ERR = 32,
TT_USER = 64,
TT_MAX
} HTokenType;
typedef struct HBytes_ {
const uint8_t *token;
size_t len;
} HBytes;
typedef struct HArena_ HArena ; // hidden implementation
typedef struct HCountedArray_ {
size_t capacity;
size_t used;
HArena * arena;
struct HParsedToken_ **elements;
} HCountedArray;
typedef struct HParsedToken_ {
HTokenType token_type;
union {
HBytes bytes;
int64_t sint;
uint64_t uint;
double dbl;
float flt;
HCountedArray *seq; // a sequence of HParsedToken's
void *user;
};
size_t index;
size_t bit_length;
char bit_offset;
} HParsedToken;
typedef struct HParseResult_ {
const HParsedToken *ast;
int64_t bit_length;
HArena * arena;
} HParseResult;
typedef struct HParserVtable_ HParserVtable;
typedef struct HCFChoice_ HCFChoice;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
void* backend_data;
void *env;
HCFChoice *desugared;
} HParser;
typedef struct HAllocator_ HAllocator;
typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data);
typedef bool (*HPredicate)(HParseResult *p, void* user_data);
typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length);
HParser* h_token(const uint8_t *str, const size_t len);
HParser* h_ch(const uint8_t c);
HParser* h_ch_range(const uint8_t lower, const uint8_t upper);
HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
HParser* h_bits(size_t len, bool sign);
HParser* h_int64();
HParser* h_int32();
HParser* h_int16();
HParser* h_int8();
HParser* h_uint64();
HParser* h_uint32();
HParser* h_uint16();
HParser* h_uint8();
HParser* h_whitespace(const HParser* p);
HParser* h_left(const HParser* p, const HParser* q);
HParser* h_right(const HParser* p, const HParser* q);
HParser* h_middle(const HParser* p, const HParser* x, const HParser* q);
HParser* h_action(const HParser* p, const HAction a, void* user_data);
HParser* h_in(const uint8_t *charset, size_t length);
HParser* h_not_in(const uint8_t *charset, size_t length);
HParser* h_end_p();
HParser* h_nothing_p();
HParser* h_sequence(HParser* p, ...);
HParser* h_choice(HParser* p, ...);
HParser* h_permutation(HParser* p, ...);
HParser* h_butnot(const HParser* p1, const HParser* p2);
HParser* h_difference(const HParser* p1, const HParser* p2);
HParser* h_xor(const HParser* p1, const HParser* p2);
HParser* h_many(const HParser* p);
HParser* h_many1(const HParser* p);
HParser* h_repeat_n(const HParser* p, const size_t n);
HParser* h_optional(const HParser* p);
HParser* h_ignore(const HParser* p);
HParser* h_sepBy(const HParser* p);
HParser* h_sepBy1(const HParser* p);
HParser* h_epsilon_p();
HParser* h_length_value(const HParser* length, const HParser* value);
HParser* h_attr_bool(const HParser* p, HPredicate pred, void* user_data);
HParser* h_and(const HParser* p);
HParser* h_not(const HParser* p);
HParser* h_indirect(const HParser* p);
void h_bind_indirect(HParser* indirect, const HParser* inner);
HParser* h_with_endianness(char endianness, const HParser* p);
HParser* h_put_value(const HParser* p, const char* name);
HParser* h_get_value(const char* name);
HParser* h_bind(const HParser *p, HContinuation k, void *env);
int h_compile(HParser* parser, HParserBackend backend, const void* params);
static const uint8_t BYTE_BIG_ENDIAN = 0x1;
static const uint8_t BIT_BIG_ENDIAN = 0x2;
static const uint8_t BYTE_LITTLE_ENDIAN = 0x0;
static const uint8_t BIT_LITTLE_ENDIAN = 0x0;
]]
local h = ffi.load("hammer")
local function helper(a, n, b, ...)
if n == 0 then return a
else return b, helper(a, n-1, ...) end
end
local function append(a, ...)
return helper(a, select('#', ...), ...)
end
local mt = {
__index = {
parse = function(p, str) return h.h_parse(p, str, #str) end,
},
}
local hammer = {}
hammer.parser = ffi.metatype("HParser", mt)
local counted_array
local arr_mt = {
__index = function(table, key)
return table.elements[key]
end,
__len = function(table) return table.used end,
__ipairs = function(table)
local i, n = 0, #table
return function()
i = i + 1
if i <= n then
return i, table.elements[i]
end
end
end,
__call = function(self)
ret = {}
for i, v in ipairs(self)
do ret[#ret+1] = v
end
return ret
end
}
counted_array = ffi.metatype("HCountedArray", arr_mt)
local bytes_mt = {
__call = function(self)
local ret = ""
for i = 0, tonumber(ffi.cast("uintptr_t", ffi.cast("void *", self.len)))-1
do ret = ret .. string.char(self.token[i])
end
return ret
end
}
local byte_string = ffi.metatype("HBytes", bytes_mt)
local token_types = ffi.new("HTokenType")
local parsed_token
local tok_mt = {
__call = function(self)
if self.token_type == ffi.C.TT_BYTES then
return self.bytes()
elseif self.token_type == ffi.C.TT_SINT then
return tonumber(ffi.cast("intptr_t", ffi.cast("void *", self.sint)))
elseif self.token_type == ffi.C.TT_UINT then
return tonumber(ffi.cast("uintptr_t", ffi.cast("void *", self.uint)))
elseif self.token_type == ffi.C.TT_SEQUENCE then
return self.seq()
end
end
}
parsed_token = ffi.metatype("HParsedToken", tok_mt)
function hammer.token(str)
return h.h_token(str, #str)
end
function hammer.ch(c)
if type(c) == "number" then
return h.h_ch(c)
else
return h.h_ch(c:byte())
end
end
function hammer.ch_range(lower, upper)
if type(lower) == "number" and type(upper) == "number" then
return h.h_ch_range(lower, upper)
-- FIXME this is really not thorough type checking
else
return h.h_ch_range(lower:byte(), upper:byte())
end
end
function hammer.int_range(parser, lower, upper)
return h.h_int_range(parser, lower, upper)
end
function hammer.bits(len, sign)
return h.h_bits(len, sign)
end
function hammer.int64()
return h.h_int64()
end
function hammer.int32()
return h.h_int32()
end
function hammer.int16()
return h.h_int16()
end
function hammer.int8()
return h.h_int8()
end
function hammer.uint64()
return h.h_uint64()
end
function hammer.uint32()
return h.h_uint32()
end
function hammer.uint16()
return h.h_uint16()
end
function hammer.uint8()
return h.h_uint8()
end
function hammer.whitespace(parser)
return h.h_whitespace(parser)
end
function hammer.left(parser1, parser2)
return h.h_left(parser1, parser2)
end
function hammer.right(parser1, parser2)
return h.h_right(parser1, parser2)
end
function hammer.middle(parser1, parser2, parser3)
return h.h_middle(parser1, parser2, parser3)
end
-- There could also be an overload of this that doesn't
-- bother with the env pointer, and passes it as NIL by
-- default, but I'm not going to deal with overloads now.
function hammer.action(parser, action, user_data)
local cb = ffi.cast("HAction", action)
return h.h_action(parser, cb, user_data)
end
function hammer.in_(charset)
local cs = ffi.new("const unsigned char[" .. #charset .. "]", charset)
return h.h_in(cs, #charset)
end
function hammer.not_in(charset)
return h.h_not_in(charset, #charset)
end
function hammer.end_p()
return h.h_end_p()
end
function hammer.nothing_p()
return h.h_nothing_p()
end
function hammer.sequence(parser, ...)
local parsers = append(nil, ...)
return h.h_sequence(parser, parsers)
end
function hammer.choice(parser, ...)
local parsers = append(nil, ...)
return h.h_choice(parser, parsers)
end
function hammer.permutation(parser, ...)
local parsers = append(nil, ...)
return h.h_permutation(parser, parsers)
end
function hammer.butnot(parser1, parser2)
return h.h_butnot(parser1, parser2)
end
function hammer.difference(parser1, parser2)
return h.h_difference(parser1, parser2)
end
function hammer.xor(parser1, parser2)
return h.h_xor(parser1, parser2)
end
function hammer.many(parser)
return h.h_many(parser)
end
function hammer.many1(parser)
return h.h_many1(parser)
end
function hammer.repeat_n(parser, n)
return h.h_repeat_n(parser, n)
end
function hammer.optional(parser)
return h.h_optional(parser)
end
function hammer.ignore(parser)
return h.h_ignore(parser)
end
function hammer.sepBy(parser)
return h.h_sepBy(parser)
end
function hammer.sepBy1(parser)
return h.h_sepBy1(parser)
end
function hammer.epsilon_p()
return h.h_epsilon_p()
end
function hammer.length_value(length, value)
return h.h_length_value(length, value)
end
function hammer.attr_bool(parser, predicate, user_data)
local cb = ffi.cast("HPredicate", predicate)
return h.h_attr_bool(parser, cb, user_data)
end
function hammer.and_(parser)
return h.h_and(parser)
end
function hammer.not_(parser)
return h.h_not(parser)
end
function hammer.indirect(parser)
return h.h_indirect(parser)
end
function hammer.bind_indirect(indirect, inner)
return h.h_bind_indirect(indirect, inner)
end
function hammer.with_endianness(endianness, parser)
return h.h_with_endianness(endianness, parser)
end
function hammer.put_value(parser, name)
return h.h_put_value(parser, name)
end
function hammer.get_value(name)
return h.h_get_value(name)
end
function hammer.bind(parser, continuation, env)
local cb = ffi.cast("HContinuation", continuation)
return h.h_bind(parser, cb, env)
end
function hammer.compile(parser, backend, params)
return h.h_compile(parser, backend, params)
end
hammer.BYTE_BIG_ENDIAN = 0x1;
hammer.BIT_BIG_ENDIAN = 0x2;
hammer.BYTE_LITTLE_ENDIAN = 0x0;
hammer.BIT_LITTLE_ENDIAN = 0x0;
return hammer
\ No newline at end of file
This diff is collapsed.
......@@ -108,3 +108,77 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
out <<= final_shift;
return (out ^ msb) - msb; // perform sign extension
}
void h_skip_bits(HInputStream* stream, size_t count) {
size_t left;
if (count == 0)
return;
if (stream->overrun)
return;
if (stream->index == stream->length) {
stream->overrun = true;
return;
}
// consume from a partial byte?
left = 8 - stream->bit_offset - stream->margin;
if (count < left) {
stream->bit_offset += count;
return;
}
if (left < 8) {
stream->index += 1;
stream->bit_offset = 0;
stream->margin = 0;
count -= left;
}
assert(stream->bit_offset == 0);
assert(stream->margin == 0);
// consume full bytes
left = stream->length - stream->index;
if (count / 8 <= left) {
stream->index += count / 8;
count = count % 8;
} else {
stream->index = stream->length;
stream->overrun = true;
return;
}
assert(count < 8);
// final partial byte
if (count > 0 && stream->index == stream->length)
stream->overrun = true;
else
stream->bit_offset = count;
}
void h_seek_bits(HInputStream* stream, size_t pos) {
size_t pos_index = pos / 8;
size_t pos_offset = pos % 8;
/* seek within the current byte? */
if (pos_index == stream->index) {
stream->bit_offset = pos_offset;
return;
}
stream->margin = 0;
/* seek past the end? */
if ((pos_index > stream->length) ||
(pos_index == stream->length && pos_offset > 0)) {
stream->index = stream->length;
stream->bit_offset = 0;
stream->overrun = true;
return;
}
stream->index = pos_index;
stream->bit_offset = pos_offset;
stream->margin = 0;
}
......@@ -6,10 +6,25 @@
#include <ctype.h>
// type of pairs used as memoization keys by h_follow and h_first
struct k_nt {size_t k; const HCFChoice *nt;};
// a special map value for use when the map is used to represent a set
static void * const INSET = (void *)(uintptr_t)1;
static bool eq_k_nt(const void *p, const void *q)
{
const struct k_nt *a=p, *b=q;
return a->k == b->k && a->nt == b->nt;
}
static HHashValue hash_k_nt(const void *p)
{
const struct k_nt *x = p;
return h_hash_ptr(x->nt) * x->k;
}
HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
{
HCFGrammar *g = h_new(HCFGrammar, 1);
......@@ -20,14 +35,17 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
g->start = NULL;
g->geneps = NULL;
g->first = NULL;
g->follow = NULL;
g->kmax = 0; // will be increased as needed by ensure_k
g->first = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
g->follow = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
HStringMap *eps = h_stringmap_new(g->arena);
h_stringmap_put_epsilon(eps, INSET);
g->singleton_epsilon = eps;
HStringMap *end = h_stringmap_new(g->arena);
h_stringmap_put_end(end, INSET);
g->singleton_end = end;
return g;
}
......@@ -42,6 +60,7 @@ void h_cfgrammar_free(HCFGrammar *g)
// helpers
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
static void collect_geneps(HCFGrammar *grammar);
static void eliminate_dead_rules(HCFGrammar *g);
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
......@@ -83,6 +102,9 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
g->start = desugared;
}
// simplifications
eliminate_dead_rules(g);
// determine which nonterminals generate epsilon
collect_geneps(g);
......@@ -128,42 +150,6 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
}
}
/* Increase g->kmax if needed, allocating enough first/follow slots. */
static void ensure_k(HCFGrammar *g, size_t k)
{
if (k <= g->kmax) {
return;
}
// NB: we don't actually use first/follow[0] but allocate it anyway
// so indices of the array correspond neatly to values of k
// allocate the new arrays
HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
if (g->kmax > 0) {
// we are resizing, copy the old tables over
for(size_t i=0; i<=g->kmax; i++) {
first[i] = g->first[i];
follow[i] = g->follow[i];
}
} else {
// we are initializing, allocate the first (in fact, dummy) tables
first[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
follow[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
}
// allocate the new tables
for(size_t i=g->kmax+1; i<=k; i++) {
first[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
follow[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
}
g->first = first;
g->follow = follow;
g->kmax = k;
}
bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
{
// XXX this can now also be implemented in terms of h_first:
......@@ -232,6 +218,76 @@ static void collect_geneps(HCFGrammar *g)
} while(g->geneps->used != prevused);
}
static bool mentions_symbol(HCFChoice **s, const HCFChoice *x)
{
for(; *s; s++) {
if (*s == x)
return true;
}
return false;
}
static void remove_productions_with(HCFGrammar *g, const HCFChoice *x)
{
HHashTableEntry *hte;
const HCFChoice *symbol;
size_t i;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL)
continue;
symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
HCFSequence **p, **q;
for(p = symbol->seq; *p != NULL; ) {
if (mentions_symbol((*p)->items, x)) {
// remove production p
for(q=p; *(q+1) != NULL; q++); // q = last production
*p = *q; // move q over p
*q = NULL; // delete old q
} else {
p++;
}
}
}
}
}
static void eliminate_dead_rules(HCFGrammar *g)
{
HHashTableEntry *hte;
const HCFChoice *symbol = NULL;
size_t i;
bool found;
do {
found = false;
for(i=0; !found && i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; !found && hte; hte = hte->next) {
if (hte->key == NULL)
continue;
symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
// this NT is dead if it has no productions
if (*symbol->seq == NULL)
found = true;
}
}
if (found) {
h_hashtable_del(g->nts, symbol);
remove_productions_with(g, symbol);
}
} while(found); // until nothing left to remove
// rebuild g->nts. there may now be symbols that no longer appear in any
// productions. we also might have removed g->start.
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
collect_nts(g, g->start);
}
HStringMap *h_stringmap_new(HArena *a)
{
......@@ -240,6 +296,7 @@ HStringMap *h_stringmap_new(HArena *a)
m->end_branch = NULL;
m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
m->arena = a;
m->taint = false;
return m;
}
......@@ -396,30 +453,65 @@ bool h_stringmap_empty(const HStringMap *m)
&& h_hashtable_empty(m->char_branches));
}
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
static bool eq_stringmap(const void *a, const void *b)
{
return h_stringmap_equal(a, b);
}
bool h_stringmap_equal(const HStringMap *a, const HStringMap *b)
{
if (a->epsilon_branch != b->epsilon_branch)
return false;
if (a->end_branch != b->end_branch)
return false;
return h_hashtable_equal(a->char_branches, b->char_branches, eq_stringmap);
}
// helper for h_follow and h_first
bool workset_equal(HHashTable *a, HHashTable *b)
{
if (a == NULL || b == NULL)
return (a == b);
else
return h_hashtable_equal(a, b, eq_stringmap);
}
static const HStringMap *
h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s);
static const HStringMap *
h_first_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
{
HHashTable *ws = *pws;
HStringMap *ret;
HCFSequence **p;
uint8_t c;
struct k_nt kx = {k,x};
struct k_nt *pkx = NULL;
bool taint = false;
// shortcut: first_0(X) is always {""}
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->first
ensure_k(g, k);
ret = h_hashtable_get(g->first[k], x);
// shortcut: first_k($) is always {$}
if (x->type == HCF_END) {
return g->singleton_end;
}
// check memoization and workset
ret = h_hashtable_get(g->first, &kx);
if (ret == NULL && ws != NULL)
ret = h_hashtable_get(ws, &kx);
if (ret != NULL) {
return ret;
}
// not found, create result
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->first[k], x, ret);
switch(x->type) {
case HCF_END:
h_stringmap_put_end(ret, INSET);
break;
case HCF_CHAR:
h_stringmap_put_char(ret, x->chr, INSET);
break;
......@@ -433,30 +525,75 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
break;
case HCF_CHOICE:
// this is a nonterminal
// to avoid recursive loops, taint ret and place it in workset
ret->taint = true;
if (ws == NULL)
ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
h_hashtable_put(ws, pkx, ret);
// return the union of the first sets of all productions
for(p=x->seq; *p; ++p)
h_stringmap_update(ret, h_first_seq(k, g, (*p)->items));
for(p=x->seq; *p; ++p) {
const HStringMap *first_rhs = h_first_seq_work(k, g, pws, (*p)->items);
assert(ws == *pws); // call above did not change the workset pointer
taint |= first_rhs->taint;
h_stringmap_update(ret, first_rhs);
}
break;
default: // should not be reached
assert_message(0, "unknown HCFChoice type");
assert_message(0, "unexpected HCFChoice type");
}
// immediately memoize ret and remove it from ws if untainted by recursion
if (!taint) {
if (pkx == NULL) {
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
} else if (ws != NULL) {
// we already had a key, so ret might (will) be in ws; remove it.
h_hashtable_del(ws, pkx);
}
ret->taint = false;
h_hashtable_put(g->first, pkx, ret);
}
return ret;
}
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_first_work(k, g, &ws, x);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
// helpers for h_first_seq, definitions below
static bool is_singleton_epsilon(const HStringMap *m);
static bool any_string_shorter(size_t k, const HStringMap *m);
// pointer to functions like h_first_seq
typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
typedef const HStringMap *
(*StringSetFun)(size_t, HCFGrammar *, HHashTable **, HCFChoice **);
// helper for h_first_seq and h_follow
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail);
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
static const HStringMap *
h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
// shortcut: the first set of the empty sequence, for any k, is {""}
if (*s == NULL) {
......@@ -467,11 +604,11 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
HCFChoice *x = s[0];
HCFChoice **tail = s+1;
const HStringMap *first_x = h_first(k, g, x);
const HStringMap *first_x = h_first_work(k, g, pws, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail)
if (is_singleton_epsilon(first_x)) {
return h_first_seq(k, g, tail);
return h_first_seq_work(k, g, pws, tail);
}
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
......@@ -483,8 +620,25 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
HStringMap *ret = h_stringmap_new(g->arena);
// extend the elements of first_k(X) up to length k from tail
stringset_extend(g, ret, k, first_x, h_first_seq, tail);
ret->taint = stringset_extend(g, pws, ret, k, first_x, h_first_seq_work, tail);
return ret;
}
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_first_seq_work(k, g, &ws, s);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
......@@ -546,13 +700,25 @@ static void remove_all_shorter(size_t k, HStringMap *m)
}
// h_follow adapted to the signature of StringSetFun
static inline
const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
static const HStringMap *
h_follow_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
assert(pws == NULL);
return h_follow(k, g, *s);
}
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
static const HStringMap *
h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x);
// h_follow_work adapted to the signature of StringSetFun
static const HStringMap *
h_follow_work_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
return h_follow_work(k, g, pws, *s);
}
static const HStringMap *
h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
{
// consider all occurances of X in g
// the follow set of X is the union of:
......@@ -564,28 +730,45 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
// { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
HStringMap *ret;
HHashTable *ws = *pws;
struct k_nt kx = {k,x};
struct k_nt *pkx;
bool taint = false;
// shortcut: follow_0(X) is always {""}
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->follow
ensure_k(g, k);
ret = h_hashtable_get(g->follow[k], x);
// check memoization and workset
ret = h_hashtable_get(g->follow, &kx);
if (ret == NULL && ws != NULL)
ret = h_hashtable_get(ws, &kx);
if (ret != NULL) {
return ret;
}
// not found, create result
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->follow[k], x, ret);
// to avoid recursive loops, taint ret and place it in workset
ret->taint = true;
if (ws == NULL)
ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
h_hashtable_put(ws, pkx, ret);
// if X is the start symbol, the end token is in its follow set
if (x == g->start) {
h_stringmap_put_end(ret, INSET);
}
// iterate over g->nts
// iterate over g->nts, looking for X
size_t i;
HHashTableEntry *hte;
int x_found=0;
for (i=0; i < g->nts->capacity; i++) {
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
......@@ -600,19 +783,46 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
HCFChoice **s = (*p)->items; // production's right-hand side
for (; *s; s++) {
if (*s == x) { // occurance found
if (*s == x) { // occurrence found
x_found=1;
HCFChoice **tail = s+1;
const HStringMap *first_tail = h_first_seq(k, g, tail);
// extend the elems of first_k(tail) up to length k from follow(A)
stringset_extend(g, ret, k, first_tail, h_follow_, &a);
taint |= stringset_extend(g, pws, ret, k,
first_tail, h_follow_work_, &a);
}
}
}
}
}
assert(x_found || x == g->start); // no orphan non-terminals
// immediately memoize ret and remove it from ws if untainted by recursion
if (!taint) {
ret->taint = false;
h_hashtable_del(ws, pkx);
h_hashtable_put(g->follow, pkx, ret);
}
return ret;
}
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_follow_work(k, g, &ws, x);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
......@@ -629,7 +839,7 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
// casting the const off of A below. note: stringset_extend does
// not touch this argument, only passes it through to h_follow
// in this case, which accepts it, once again, as const.
stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
stringset_extend(g, NULL, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
// make sure there are only strings of length _exactly_ k
remove_all_shorter(k, ret);
......@@ -638,13 +848,17 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
}
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail)
{
bool taint = false;
if (as->epsilon_branch) {
// for a="", add f_k(tail) to ret
h_stringmap_update(ret, f(k, g, tail));
const HStringMap *f_tail = f(k, g, pws, tail);
taint |= f_tail->taint;
h_stringmap_update(ret, f_tail);
}
if (as->end_branch) {
......@@ -671,9 +885,11 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
HStringMap *ret_ = h_stringmap_new(g->arena);
h_stringmap_put_after(ret, c, ret_);
stringset_extend(g, ret_, k-1, as_, f, tail);
taint |= stringset_extend(g, pws, ret_, k-1, as_, f, tail);
}
}
return taint;
}
......@@ -818,13 +1034,15 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
fputs(name, f);
i += strlen(name);
for(; i<column; i++) fputc(' ', f);
fputs(" ->", f);
assert(nt->type == HCF_CHOICE);
HCFSequence **p = nt->seq;
if (*p == NULL) {
return; // shouldn't happen
fputs(" -x\n", f); // empty choice, e.g. h_nothing_p()
return;
}
fputs(" ->", f);
pprint_sequence(f, g, *p++); // print first production on the same line
for(; *p; p++) { // print the rest below with "or" bars
for(i=0; i<column; i++) fputc(' ', f); // indent
......@@ -835,6 +1053,8 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
{
HAllocator *mm__ = g->mm__;
if (g->nts->used < 1) {
return;
}
......@@ -842,11 +1062,12 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
// determine maximum string length of symbol names
int len;
size_t s;
for(len=1, s=26; s < g->nts->used; len++, s*=26);
for(len=1, s=26; s < g->nts->used; len++, s*=26);
// iterate over g->nts
// iterate over g->nts and collect its entries in an ordered array
size_t i;
HHashTableEntry *hte;
const HCFChoice **arr = h_new(const HCFChoice *, g->nts->used);
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
......@@ -855,9 +1076,16 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
const HCFChoice *a = hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
pprint_ntrules(file, g, a, indent, len);
size_t id = (uintptr_t)hte->value; // nonterminal id
assert(id < g->nts->used);
arr[id] = a;
}
}
// print rules in alphabetical order
for(i=0; i < g->nts->used; i++)
pprint_ntrules(file, g, arr[i], indent, len);
h_free(arr);
}
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent)
......
......@@ -8,15 +8,15 @@ typedef struct HCFGrammar_ {
HHashSet *nts; // HCFChoices, each representing the alternative
// productions for one nonterminal
HHashSet *geneps; // set of NTs that can generate the empty string
HHashTable **first; // memoized first sets of the grammar's symbols
HHashTable **follow; // memoized follow sets of the grammar's NTs
size_t kmax; // maximum lookahead depth allocated
HHashTable *first; // memoized first sets of the grammar's symbols
HHashTable *follow; // memoized follow sets of the grammar's NTs
HArena *arena;
HAllocator *mm__;
// constant set containing only the empty string.
// this is only a member of HCFGrammar because it needs a pointer to arena.
// constant sets containing only the empty string or end symbol.
// these are only members of HCFGrammar because they need a pointer to arena.
const struct HStringMap_ *singleton_epsilon;
const struct HStringMap_ *singleton_end;
} HCFGrammar;
......@@ -37,6 +37,7 @@ typedef struct HStringMap_ {
void *end_branch; // points to leaf value
HHashTable *char_branches; // maps to inner nodes (HStringMaps)
HArena *arena;
bool taint; // for use by h_follow() and h_first()
} HStringMap;
HStringMap *h_stringmap_new(HArena *a);
......@@ -52,6 +53,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);
bool h_stringmap_equal(const HStringMap *a, const HStringMap *b);
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
......
......@@ -9,12 +9,14 @@
HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray));
/* _noinit here because we init all the elements below */
HCountedArray *ret = h_arena_malloc_noinit(arena, sizeof(HCountedArray));
if (size == 0)
size = 1;
ret->used = 0;
ret->capacity = size;
ret->arena = arena;
/* we actually want to zero these */
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
return ret;
}
......@@ -24,12 +26,21 @@ HCountedArray *h_carray_new(HArena * arena) {
}
void h_carray_append(HCountedArray *array, void* item) {
HParsedToken **elements;
if (array->used >= array->capacity) {
HParsedToken **elements = h_arena_malloc(array->arena, (array->capacity *= 2) * sizeof(void*));
/* _noinit here; we init below */
elements = h_arena_malloc_noinit(array->arena,
(array->capacity *= 2) * sizeof(void*));
for (size_t i = 0; i < array->used; i++)
elements[i] = array->elements[i];
for (size_t i = array->used; i < array->capacity; i++)
elements[i] = 0;
/*
* XXX I hope we don't use this much, because h_arena_free() doesn't
* quite seem to be there and doing a lot of this would get pretty
* wasteful.
*/
h_arena_free(array->arena, array->elements);
array->elements = elements;
}
......@@ -38,7 +49,8 @@ void h_carray_append(HCountedArray *array, void* item) {
// HSlist
HSlist* h_slist_new(HArena *arena) {
HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
/* _noinit here; we set every element of ret below */
HSlist *ret = h_arena_malloc_noinit(arena, sizeof(HSlist));
ret->head = NULL;
ret->arena = arena;
return ret;
......@@ -53,8 +65,12 @@ HSlist* h_slist_copy(HSlist *slist) {
tail = ret->head;
head = head->next;
while (head != NULL) {
// append head item to tail in a new node
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
/*
* append head item to tail in a new node
*
* use _noinit; we set every element of node after we allocate
*/
HSlistNode *node = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
node->elem = head->elem;
node->next = NULL;
tail = tail->next = node;
......@@ -85,10 +101,11 @@ void* h_slist_pop(HSlist *slist) {
}
void h_slist_push(HSlist *slist, void* item) {
HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
/* use _noinit; we set every element of node */
HSlistNode *hnode = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
hnode->elem = item;
hnode->next = slist->head;
// write memory barrier here.
/* write memory barrier here. */
slist->head = hnode;
}
......@@ -132,30 +149,34 @@ void h_slist_free(HSlist *slist) {
}
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
/* _noinit because all fields are set below */
HHashTable *ht = h_arena_malloc_noinit(arena, sizeof(HHashTable));
ht->hashFunc = hashFunc;
ht->equalFunc = equalFunc;
ht->capacity = 64; // to start; should be tuned later...
ht->used = 0;
ht->arena = arena;
ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
/* _noinit because all fields of all entries are set in the loop */
ht->contents = h_arena_malloc_noinit(arena,
sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < ht->capacity; i++) {
ht->contents[i].key = NULL;
ht->contents[i].value = NULL;
ht->contents[i].next = NULL;
ht->contents[i].hashval = 0;
}
//memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
return ht;
}
void* h_hashtable_get(const HHashTable* ht, const void* key) {
HHashValue hashval = ht->hashFunc(key);
void * h_hashtable_get_precomp(const HHashTable *ht, const void *key,
HHashValue hashval) {
HHashTableEntry *hte = NULL;
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
HHashTableEntry *hte = NULL;
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
......@@ -169,35 +190,63 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
return hte->value;
}
}
return NULL;
}
void * h_hashtable_get(const HHashTable *ht, const void *key) {
HHashValue hashval = ht->hashFunc(key);
return h_hashtable_get_precomp(ht, key, hashval);
}
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
HHashTableEntry *old_contents, *new_contents;
bool do_resize = false;
size_t old_capacity = ht->capacity;
while (n * 1.3 > ht->capacity) {
ht->capacity *= 2;
do_resize = true;
}
if (!do_resize)
return;
HHashTableEntry *old_contents = ht->contents;
HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i)
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next)
if (entry->key)
h_hashtable_put_raw(ht, entry);
//h_arena_free(ht->arena, old_contents);
if (do_resize) {
old_contents = ht->contents;
/* _noinit because we set the whole thing below */
new_contents = h_arena_malloc_noinit(ht->arena,
sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i) {
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next) {
if (entry->key) {
h_hashtable_put_raw(ht, entry);
}
}
}
/* h_arena_free(ht->arena, old_contents); */
}
}
void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
void h_hashtable_put_precomp(HHashTable *ht, const void *key, void *value,
HHashValue hashval) {
HHashTableEntry entry = {
.key = key,
.value = value,
.hashval = hashval
};
/* Rebalance if necessary */
h_hashtable_ensure_capacity(ht, ht->used + 1);
/* Insert it */
h_hashtable_put_raw(ht, &entry);
}
void h_hashtable_put(HHashTable *ht, const void *key, void *value) {
// # Start with a rebalancing
h_hashtable_ensure_capacity(ht, ht->used + 1);
......@@ -227,7 +276,7 @@ void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
}
// Add a new link...
assert (hte->next == NULL);
hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
hte->next = h_arena_malloc_noinit(ht->arena, sizeof(HHashTableEntry));
hte = hte->next;
hte->next = NULL;
ht->used++;
......@@ -338,16 +387,18 @@ static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) {
}
// helper for hte_equal: are all elements of xs present in ys?
static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
static bool hte_subset(HEqualFunc eq, HEqualFunc value_eq,
HHashTableEntry *xs, HHashTableEntry *ys)
{
for(; xs; xs=xs->next) {
if(xs->key == NULL) continue; // element not present
HHashTableEntry *hte;
for(hte=ys; hte; hte=hte->next) {
if(hte->key == xs->key) break; // assume an element is equal to itself
// assume an element is equal to itself
if(hte->key == xs->key && hte->value == xs->value) break;
if(hte->hashval != xs->hashval) continue; // shortcut
if(eq(hte->key, xs->key)) break;
if(eq(hte->key, xs->key) && value_eq(hte->value, xs->value)) break;
}
if(hte == NULL) return false; // element not found
}
......@@ -355,19 +406,20 @@ static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
}
// compare two lists of HHashTableEntries
static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) {
return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys));
static inline bool hte_equal(HEqualFunc eq, HEqualFunc value_eq,
HHashTableEntry *xs, HHashTableEntry *ys) {
return (hte_same_length(xs, ys) && hte_subset(eq, value_eq, xs, ys));
}
/* Set equality of HHashSets.
/* Equality of HHashTables.
* Obviously, 'a' and 'b' must use the same equality function.
* Not strictly necessary, but we also assume the same hash function.
*/
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
bool h_hashtable_equal(const HHashSet *a, const HHashSet *b, HEqualFunc value_eq) {
if(a->capacity == b->capacity) {
// iterate over the buckets in parallel
for(size_t i=0; i < a->capacity; i++) {
if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i]))
if(!hte_equal(a->equalFunc, value_eq, &a->contents[i], &b->contents[i]))
return false;
}
} else {
......@@ -377,6 +429,18 @@ bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
return true;
}
static bool eq_dontcare(const void *p, const void *q) {
return true;
}
/* Set equality of HHashSets.
* Obviously, 'a' and 'b' must use the same equality function.
* Not strictly necessary, but we also assume the same hash function.
*/
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
return h_hashtable_equal(a, b, eq_dontcare);
}
bool h_eq_ptr(const void *p, const void *q) {
return (p==q);
}
......@@ -388,11 +452,26 @@ HHashValue h_hash_ptr(const void *p) {
}
uint32_t h_djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
uint32_t h = 5381;
while (len >= 16) {
h = h * 33 + buf[0]; h = h * 33 + buf[1];
h = h * 33 + buf[2]; h = h * 33 + buf[3];
h = h * 33 + buf[4]; h = h * 33 + buf[5];
h = h * 33 + buf[6]; h = h * 33 + buf[7];
h = h * 33 + buf[8]; h = h * 33 + buf[9];
h = h * 33 + buf[10]; h = h * 33 + buf[11];
h = h * 33 + buf[12]; h = h * 33 + buf[13];
h = h * 33 + buf[14]; h = h * 33 + buf[15];
len -= 16;
buf += 16;
}
while (len--) {
hash = hash * 33 + *buf++;
h = h * 33 + *buf++;
}
return hash;
return h;
}
void h_symbol_put(HParseState *state, const char* key, void *value) {
......
......@@ -60,15 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
}
HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) {
HCountedArray *seq = h_carray_new(p->arena);
act_flatten_(seq, p->ast);
HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
res->index = p->ast->index;
res->bit_offset = p->ast->bit_offset;
HParsedToken *res = h_make_seq(p->arena);
act_flatten_(res->seq, p->ast);
return res;
}
......@@ -106,7 +99,7 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n)
return ret;
}
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len)
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len)
{
HParsedToken *ret = h_make_(arena, TT_BYTES);
ret->bytes.len = len;
......@@ -128,6 +121,20 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val)
return ret;
}
HParsedToken *h_make_double(HArena *arena, double val)
{
HParsedToken *ret = h_make_(arena, TT_DOUBLE);
ret->dbl = val;
return ret;
}
HParsedToken *h_make_float(HArena *arena, float val)
{
HParsedToken *ret = h_make_(arena, TT_FLOAT);
ret->flt = val;
return ret;
}
// XXX -> internal
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
{
......
......@@ -195,9 +195,11 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len);
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len);
HParsedToken *h_make_sint(HArena *arena, int64_t val);
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
HParsedToken *h_make_double(HArena *arena, double val);
HParsedToken *h_make_float(HArena *arena, float val);
// Standard short-hands to make tokens in an action.
#define H_MAKE(TYP, VAL) h_make(p->arena, (HTokenType)TT_ ## TYP, VAL)
......@@ -206,6 +208,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN)
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
#define H_MAKE_DOUBLE(VAL) h_make_double(p->arena, VAL)
#define H_MAKE_FLOAT(VAL) h_make_float(p->arena, VAL)
// Extract (cast) type-specific value back from HParsedTokens...
......@@ -218,6 +222,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
#define H_ASSERT_DOUBLE(TOK) h_assert_type(TT_DOUBLE, TOK)
#define H_ASSERT_FLOAT(TOK) h_assert_type(TT_FLOAT, TOK)
// Assert expected type and return contained value.
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
......@@ -225,6 +231,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
#define H_CAST_DOUBLE(TOK) (H_ASSERT_DOUBLE(TOK)->dbl)
#define H_CAST_FLOAT(TOK) (H_ASSERT_FLOAT(TOK)->flt)
// Sequence access...
......@@ -247,7 +255,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
#define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_FLOAT(SEQ, ...) H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1)
// Standard short-hand to access and cast elements on a sequence token.
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
......@@ -255,6 +265,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
#define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__)
#define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__)
#define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__)
// Lower-level helper for h_seq_index.
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
......
This diff is collapsed.
......@@ -41,7 +41,12 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
/*
* Have a backend that always fails to pass around "no such backend"
* indications
*/
PB_INVALID = PB_MIN,
PB_PACKRAT,
PB_REGULAR,
PB_LLk,
PB_LALR,
......@@ -49,6 +54,26 @@ typedef enum HParserBackend_ {
PB_MAX = PB_GLR
} HParserBackend;
typedef struct HParserBackendVTable_ HParserBackendVTable;
typedef struct HParserBackendWithParams_ {
/* Name of backend extracted from a string if the choice of backend was specified in a call using a string */
char *requested_name;
/* The backend (if backend is to be loaded from an external module set to invalid (?))*/
HParserBackend backend;
/* Backend vtable (TODO: use this instead of the enum so we can get rid of that) */
HParserBackendVTable * backend_vtable;
/*
* Backend-specific parameters - if this needs to be freed, the backend
* should provide a free_params method in its vtable; currently no backends
* do this - PB_PACKRAT and PB_REGULAR take no params, and PB_LLk, PB_LALR
* and PB_GLR take an integer cast to void *
*/
void *params;
/* Allocator to use to free this (and the params if necessary) */
HAllocator *mm__;
} HParserBackendWithParams;
typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_;
TT_INVALID = 0,
......@@ -56,6 +81,8 @@ typedef enum HTokenType_ {
TT_BYTES = 2,
TT_SINT = 4,
TT_UINT = 8,
TT_DOUBLE = 12,
TT_FLOAT = 13,
TT_SEQUENCE = 16,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_ERR = 32,
......@@ -135,6 +162,7 @@ typedef struct HParserVtable_ HParserVtable;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
HParserBackendVTable * backend_vtable;
void* backend_data;
void *env;
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
......@@ -173,6 +201,53 @@ typedef bool (*HPredicate)(HParseResult *p, void* user_data);
*/
typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
/*
* For parser used when extracting name and params for backend by name
* TODO: possibly move to its own file?
*/
enum BackendTokenType_ {
TT_backend_with_params_t = TT_USER,
TT_backend_name_t,
TT_backend_param_t,
TT_backend_param_name_t,
TT_backend_param_with_name_t,
TT_backend_params_t
};
typedef struct backend_param {
size_t len;
uint8_t *param;
uint8_t *param_name;
} backend_param_t;
typedef struct backend_param_name {
size_t len;
uint8_t *param_name;
size_t param_id;
} backend_param_name_t;
typedef struct backend_param_with_name {
backend_param_name_t param_name;
backend_param_t param;
} backend_param_with_name_t;
typedef struct {
uint8_t *name;
size_t len;
} backend_name_t;
typedef struct backend_params {
backend_param_with_name_t *params;
size_t len;
} backend_params_t;
typedef struct backend_with_params {
backend_name_t name;
backend_params_t params;
} backend_with_params_t;
// {{{ Stuff for benchmarking
typedef struct HParserTestcase_ {
unsigned char* input;
......@@ -260,6 +335,89 @@ typedef struct HBenchmarkResults_ {
#endif // SWIG
// }}}
/**
* Ask if this backend is available
*/
int h_is_backend_available(HParserBackend backend);
/**
* Ask what the default backend is (currently always PB_PACKRAT)
*/
HParserBackend h_get_default_backend(void);
HParserBackendVTable * h_get_default_backend_vtable(void);
/**
* Copy a backend+params, using the backend-supplied copy method; the
* allocator used is the one passed in, or call the __m version with
* a NULL allocator to use the one from the source HParserBackendWithParams
*/
HAMMER_FN_DECL(HParserBackendWithParams *, h_copy_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Free a backend+params
*/
void h_free_backend_with_params(HParserBackendWithParams *be_with_params);
/**
* Get a name string for a backend; this is constant per backend and so
* need not be freed; it will resolve to the backend under
* h_get_backend_by_name().
*/
const char * h_get_name_for_backend(HParserBackend be);
/**
* Get a name string for a backend with parameters; it is the caller's
* responsibility to free it later. This will resolve to the same
* backend and parameters under h_get_backend_with_params_by_name().
*/
HAMMER_FN_DECL(char *, h_get_name_for_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Get a human-readable descriptive string for a backend; this is constant
* per backend and so need not be freed.
*/
const char * h_get_descriptive_text_for_backend(HParserBackend be);
/**
* Get a human-readable descriptive string for a backend with params; it is
* the caller's responsibility to free it later. Sorry, but it's allowed
* to depend on the params and putting keeping the buffer elsewhere and
* replacing it on the next call wouldn't be thread-safe.
*/
HAMMER_FN_DECL(char *, h_get_descriptive_text_for_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Look up an HParserBackend by name; this should round-trip with
* h_get_name_for_backend().
*/
HParserBackend h_query_backend_by_name(const char *name);
/**
* Get a Hammer Backend with params from a string of the form
* backend_name(params) for example "lalr(1)".
*
* If the backend is one of the existing backends in the HBackend enum,
* HBackend will be populated in the result.
*
* Otherwise the result will save the name for use in attempts later at
* loading the named module.
*
*/
HAMMER_FN_DECL(HParserBackendWithParams *, h_get_backend_with_params_by_name, const char *name_with_params);
/**
* Top-level function to call a parser that has been built over some
......@@ -295,7 +453,7 @@ HParseResult* h_parse_finish(HSuspendedParser* s);
*/
HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
#define h_literal(s) h_token(s, sizeof(s)-1)
#define h_literal(s) h_token(((const uint8_t *)(s)), sizeof(s)-1)
/**
* Given a single character, returns a parser that parses that
......@@ -329,6 +487,14 @@ HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, con
*/
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
/**
* Returns a parser that parses the specified number of octets.
* The input does not have to be aligned to a byte boundary.
*
* Result token type: TT_BYTES
*/
HAMMER_FN_DECL(HParser*, h_bytes, size_t len);
/**
* Returns a parser that parses a signed 8-byte integer value.
*
......@@ -462,6 +628,15 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
*/
HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_sequence, HParser* p);
/**
* Given an `h_sequence` and a list of indices, returns a parser that parses the sequence
* but returns it without the results at the dropped indices. If a negative integer appears
* in the middle of the list, this combinator will silently ignore the rest of the list.
*
* Result token type: TT_SEQUENCE
*/
#define h_drop_from(p, ...) h_drop_from_(p, __VA_ARGS__, -1)
HAMMER_FN_DECL_VARARGS(HParser*, h_drop_from_, HParser* p);
/**
* Given an array of parsers, p_array, apply each parser in order. The
* first parser to succeed is the result; if no parsers succeed, the
......@@ -716,6 +891,32 @@ HAMMER_FN_DECL(HParser*, h_get_value, const char* name);
*/
HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env);
/**
* This parser skips 'n' bits of input.
*
* Result: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(HParser*, h_skip, size_t n);
/**
* The HParser equivalent of fseek(), 'h_seek' modifies the parser's input
* position. Note that contrary to 'fseek', offsets are in bits, not bytes.
* The 'whence' argument uses the same values and semantics: SEEK_SET,
* SEEK_CUR, SEEK_END.
*
* Fails if the new input position would be negative or past the end of input.
*
* Result: TT_UINT. The new input position.
*/
HAMMER_FN_DECL(HParser*, h_seek, ssize_t offset, int whence);
/**
* Report the current position in bits. Consumes no input.
*
* Result: TT_UINT. The current input position.
*/
HAMMER_FN_DECL_NOARG(HParser*, h_tell);
/**
* Free the memory allocated to an HParseResult when it is no longer needed.
*/
......@@ -728,18 +929,38 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
*/
char* h_write_result_unamb(const HParsedToken* tok);
/**
* Format token to the given output stream. Indent starting at
* [indent] spaces, with [delta] spaces between levels.
* Format token to the given output stream. Indent starting at [indent] spaces,
* with [delta] spaces between levels.
*
* Note: This function does not print a trailing newline. It also does not
* print any spaces to indent the initial line of output. This makes it
* suitable for recursive use in the condensed output of larger structures.
*/
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
/**
* Format token to the given output. Print a trailing newline.
*
* This function assumes an initial indentation of 0 and uses 2 spaces between
* indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)'
* followed by 'fputc('\n', stream)' and is provided for convenience.
*/
void h_pprintln(FILE* stream, const HParsedToken* tok);
/**
* Build parse tables for the given parser backend. See the
* documentation for the parser backend in question for information
* about the [params] parameter, or just pass in NULL for the defaults.
*
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
* Returns a nonzero value on error; 0 otherwise. Common return codes include:
*
* -1: parser uses a combinator that is incompatible with the chosen backend.
* -2: parser could not be compiled with the chosen parameters.
* >0: unexpected internal errors.
*
* Consult each backend for details.
*/
HAMMER_FN_DECL(int, h_compile_for_backend_with_params, HParser* parser, HParserBackendWithParams *be_with_params);
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
/**
......@@ -795,7 +1016,8 @@ HTokenType h_allocate_token_type(const char* name);
/// Allocate a new token type with an unambiguous print function.
HTokenType h_allocate_token_new(
const char* name,
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf));
void (*unamb_sub)(const HParsedToken *tok, struct result_buf *buf),
void (*pprint)(FILE* stream, const HParsedToken* tok, int indent, int delta));
/// Get the token type associated with name. Returns -1 if name is unkown
HTokenType h_get_token_type_number(const char* name);
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.