Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
......@@ -9,12 +9,14 @@
HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray));
/* _noinit here because we init all the elements below */
HCountedArray *ret = h_arena_malloc_noinit(arena, sizeof(HCountedArray));
if (size == 0)
size = 1;
ret->used = 0;
ret->capacity = size;
ret->arena = arena;
/* we actually want to zero these */
ret->elements = h_arena_malloc(arena, sizeof(void*) * size);
return ret;
}
......@@ -24,12 +26,21 @@ HCountedArray *h_carray_new(HArena * arena) {
}
void h_carray_append(HCountedArray *array, void* item) {
HParsedToken **elements;
if (array->used >= array->capacity) {
HParsedToken **elements = h_arena_malloc(array->arena, (array->capacity *= 2) * sizeof(void*));
/* _noinit here; we init below */
elements = h_arena_malloc_noinit(array->arena,
(array->capacity *= 2) * sizeof(void*));
for (size_t i = 0; i < array->used; i++)
elements[i] = array->elements[i];
for (size_t i = array->used; i < array->capacity; i++)
elements[i] = 0;
/*
* XXX I hope we don't use this much, because h_arena_free() doesn't
* quite seem to be there and doing a lot of this would get pretty
* wasteful.
*/
h_arena_free(array->arena, array->elements);
array->elements = elements;
}
......@@ -38,7 +49,8 @@ void h_carray_append(HCountedArray *array, void* item) {
// HSlist
HSlist* h_slist_new(HArena *arena) {
HSlist *ret = h_arena_malloc(arena, sizeof(HSlist));
/* _noinit here; we set every element of ret below */
HSlist *ret = h_arena_malloc_noinit(arena, sizeof(HSlist));
ret->head = NULL;
ret->arena = arena;
return ret;
......@@ -53,8 +65,12 @@ HSlist* h_slist_copy(HSlist *slist) {
tail = ret->head;
head = head->next;
while (head != NULL) {
// append head item to tail in a new node
HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode));
/*
* append head item to tail in a new node
*
* use _noinit; we set every element of node after we allocate
*/
HSlistNode *node = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
node->elem = head->elem;
node->next = NULL;
tail = tail->next = node;
......@@ -85,10 +101,11 @@ void* h_slist_pop(HSlist *slist) {
}
void h_slist_push(HSlist *slist, void* item) {
HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode));
/* use _noinit; we set every element of node */
HSlistNode *hnode = h_arena_malloc_noinit(slist->arena, sizeof(HSlistNode));
hnode->elem = item;
hnode->next = slist->head;
// write memory barrier here.
/* write memory barrier here. */
slist->head = hnode;
}
......@@ -132,30 +149,34 @@ void h_slist_free(HSlist *slist) {
}
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) {
HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable));
/* _noinit because all fields are set below */
HHashTable *ht = h_arena_malloc_noinit(arena, sizeof(HHashTable));
ht->hashFunc = hashFunc;
ht->equalFunc = equalFunc;
ht->capacity = 64; // to start; should be tuned later...
ht->used = 0;
ht->arena = arena;
ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity);
/* _noinit because all fields of all entries are set in the loop */
ht->contents = h_arena_malloc_noinit(arena,
sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < ht->capacity; i++) {
ht->contents[i].key = NULL;
ht->contents[i].value = NULL;
ht->contents[i].next = NULL;
ht->contents[i].hashval = 0;
}
//memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity);
return ht;
}
void* h_hashtable_get(const HHashTable* ht, const void* key) {
HHashValue hashval = ht->hashFunc(key);
void * h_hashtable_get_precomp(const HHashTable *ht, const void *key,
HHashValue hashval) {
HHashTableEntry *hte = NULL;
#ifdef CONSISTENCY_CHECK
assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2
#endif
HHashTableEntry *hte = NULL;
for (hte = &ht->contents[hashval & (ht->capacity - 1)];
hte != NULL;
hte = hte->next) {
......@@ -169,35 +190,63 @@ void* h_hashtable_get(const HHashTable* ht, const void* key) {
return hte->value;
}
}
return NULL;
}
void * h_hashtable_get(const HHashTable *ht, const void *key) {
HHashValue hashval = ht->hashFunc(key);
return h_hashtable_get_precomp(ht, key, hashval);
}
void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry* new_entry);
void h_hashtable_ensure_capacity(HHashTable* ht, size_t n) {
HHashTableEntry *old_contents, *new_contents;
bool do_resize = false;
size_t old_capacity = ht->capacity;
while (n * 1.3 > ht->capacity) {
ht->capacity *= 2;
do_resize = true;
}
if (!do_resize)
return;
HHashTableEntry *old_contents = ht->contents;
HHashTableEntry *new_contents = h_arena_malloc(ht->arena, sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i)
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next)
if (entry->key)
h_hashtable_put_raw(ht, entry);
//h_arena_free(ht->arena, old_contents);
if (do_resize) {
old_contents = ht->contents;
/* _noinit because we set the whole thing below */
new_contents = h_arena_malloc_noinit(ht->arena,
sizeof(HHashTableEntry) * ht->capacity);
ht->contents = new_contents;
ht->used = 0;
memset(new_contents, 0, sizeof(HHashTableEntry) * ht->capacity);
for (size_t i = 0; i < old_capacity; ++i) {
for (HHashTableEntry *entry = &old_contents[i];
entry;
entry = entry->next) {
if (entry->key) {
h_hashtable_put_raw(ht, entry);
}
}
}
/* h_arena_free(ht->arena, old_contents); */
}
}
void h_hashtable_put(HHashTable* ht, const void* key, void* value) {
void h_hashtable_put_precomp(HHashTable *ht, const void *key, void *value,
HHashValue hashval) {
HHashTableEntry entry = {
.key = key,
.value = value,
.hashval = hashval
};
/* Rebalance if necessary */
h_hashtable_ensure_capacity(ht, ht->used + 1);
/* Insert it */
h_hashtable_put_raw(ht, &entry);
}
void h_hashtable_put(HHashTable *ht, const void *key, void *value) {
// # Start with a rebalancing
h_hashtable_ensure_capacity(ht, ht->used + 1);
......@@ -227,7 +276,7 @@ void h_hashtable_put_raw(HHashTable* ht, HHashTableEntry *new_entry) {
}
// Add a new link...
assert (hte->next == NULL);
hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry));
hte->next = h_arena_malloc_noinit(ht->arena, sizeof(HHashTableEntry));
hte = hte->next;
hte->next = NULL;
ht->used++;
......@@ -338,16 +387,18 @@ static bool hte_same_length(HHashTableEntry *xs, HHashTableEntry *ys) {
}
// helper for hte_equal: are all elements of xs present in ys?
static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
static bool hte_subset(HEqualFunc eq, HEqualFunc value_eq,
HHashTableEntry *xs, HHashTableEntry *ys)
{
for(; xs; xs=xs->next) {
if(xs->key == NULL) continue; // element not present
HHashTableEntry *hte;
for(hte=ys; hte; hte=hte->next) {
if(hte->key == xs->key) break; // assume an element is equal to itself
// assume an element is equal to itself
if(hte->key == xs->key && hte->value == xs->value) break;
if(hte->hashval != xs->hashval) continue; // shortcut
if(eq(hte->key, xs->key)) break;
if(eq(hte->key, xs->key) && value_eq(hte->value, xs->value)) break;
}
if(hte == NULL) return false; // element not found
}
......@@ -355,19 +406,20 @@ static bool hte_subset(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys)
}
// compare two lists of HHashTableEntries
static inline bool hte_equal(HEqualFunc eq, HHashTableEntry *xs, HHashTableEntry *ys) {
return (hte_same_length(xs, ys) && hte_subset(eq, xs, ys));
static inline bool hte_equal(HEqualFunc eq, HEqualFunc value_eq,
HHashTableEntry *xs, HHashTableEntry *ys) {
return (hte_same_length(xs, ys) && hte_subset(eq, value_eq, xs, ys));
}
/* Set equality of HHashSets.
/* Equality of HHashTables.
* Obviously, 'a' and 'b' must use the same equality function.
* Not strictly necessary, but we also assume the same hash function.
*/
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
bool h_hashtable_equal(const HHashSet *a, const HHashSet *b, HEqualFunc value_eq) {
if(a->capacity == b->capacity) {
// iterate over the buckets in parallel
for(size_t i=0; i < a->capacity; i++) {
if(!hte_equal(a->equalFunc, &a->contents[i], &b->contents[i]))
if(!hte_equal(a->equalFunc, value_eq, &a->contents[i], &b->contents[i]))
return false;
}
} else {
......@@ -377,6 +429,18 @@ bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
return true;
}
static bool eq_dontcare(const void *p, const void *q) {
return true;
}
/* Set equality of HHashSets.
* Obviously, 'a' and 'b' must use the same equality function.
* Not strictly necessary, but we also assume the same hash function.
*/
bool h_hashset_equal(const HHashSet *a, const HHashSet *b) {
return h_hashtable_equal(a, b, eq_dontcare);
}
bool h_eq_ptr(const void *p, const void *q) {
return (p==q);
}
......@@ -388,11 +452,26 @@ HHashValue h_hash_ptr(const void *p) {
}
uint32_t h_djbhash(const uint8_t *buf, size_t len) {
uint32_t hash = 5381;
uint32_t h = 5381;
while (len >= 16) {
h = h * 33 + buf[0]; h = h * 33 + buf[1];
h = h * 33 + buf[2]; h = h * 33 + buf[3];
h = h * 33 + buf[4]; h = h * 33 + buf[5];
h = h * 33 + buf[6]; h = h * 33 + buf[7];
h = h * 33 + buf[8]; h = h * 33 + buf[9];
h = h * 33 + buf[10]; h = h * 33 + buf[11];
h = h * 33 + buf[12]; h = h * 33 + buf[13];
h = h * 33 + buf[14]; h = h * 33 + buf[15];
len -= 16;
buf += 16;
}
while (len--) {
hash = hash * 33 + *buf++;
h = h * 33 + *buf++;
}
return hash;
return h;
}
void h_symbol_put(HParseState *state, const char* key, void *value) {
......
......@@ -60,15 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
}
HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) {
HCountedArray *seq = h_carray_new(p->arena);
act_flatten_(seq, p->ast);
HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
res->index = p->ast->index;
res->bit_offset = p->ast->bit_offset;
HParsedToken *res = h_make_seq(p->arena);
act_flatten_(res->seq, p->ast);
return res;
}
......@@ -106,7 +99,7 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n)
return ret;
}
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len)
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len)
{
HParsedToken *ret = h_make_(arena, TT_BYTES);
ret->bytes.len = len;
......@@ -128,6 +121,20 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val)
return ret;
}
HParsedToken *h_make_double(HArena *arena, double val)
{
HParsedToken *ret = h_make_(arena, TT_DOUBLE);
ret->dbl = val;
return ret;
}
HParsedToken *h_make_float(HArena *arena, float val)
{
HParsedToken *ret = h_make_(arena, TT_FLOAT);
ret->flt = val;
return ret;
}
// XXX -> internal
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
{
......
......@@ -195,9 +195,11 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len);
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len);
HParsedToken *h_make_sint(HArena *arena, int64_t val);
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
HParsedToken *h_make_double(HArena *arena, double val);
HParsedToken *h_make_float(HArena *arena, float val);
// Standard short-hands to make tokens in an action.
#define H_MAKE(TYP, VAL) h_make(p->arena, (HTokenType)TT_ ## TYP, VAL)
......@@ -206,6 +208,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN)
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
#define H_MAKE_DOUBLE(VAL) h_make_double(p->arena, VAL)
#define H_MAKE_FLOAT(VAL) h_make_float(p->arena, VAL)
// Extract (cast) type-specific value back from HParsedTokens...
......@@ -218,6 +222,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
#define H_ASSERT_DOUBLE(TOK) h_assert_type(TT_DOUBLE, TOK)
#define H_ASSERT_FLOAT(TOK) h_assert_type(TT_FLOAT, TOK)
// Assert expected type and return contained value.
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
......@@ -225,6 +231,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
#define H_CAST_DOUBLE(TOK) (H_ASSERT_DOUBLE(TOK)->dbl)
#define H_CAST_FLOAT(TOK) (H_ASSERT_FLOAT(TOK)->flt)
// Sequence access...
......@@ -247,7 +255,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
#define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_FLOAT(SEQ, ...) H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1)
// Standard short-hand to access and cast elements on a sequence token.
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
......@@ -255,6 +265,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
#define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__)
#define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__)
#define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__)
// Lower-level helper for h_seq_index.
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
......
This diff is collapsed.
......@@ -41,7 +41,12 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
/*
* Have a backend that always fails to pass around "no such backend"
* indications
*/
PB_INVALID = PB_MIN,
PB_PACKRAT,
PB_REGULAR,
PB_LLk,
PB_LALR,
......@@ -49,6 +54,26 @@ typedef enum HParserBackend_ {
PB_MAX = PB_GLR
} HParserBackend;
typedef struct HParserBackendVTable_ HParserBackendVTable;
typedef struct HParserBackendWithParams_ {
/* Name of backend extracted from a string if the choice of backend was specified in a call using a string */
char *requested_name;
/* The backend (if backend is to be loaded from an external module set to invalid (?))*/
HParserBackend backend;
/* Backend vtable (TODO: use this instead of the enum so we can get rid of that) */
HParserBackendVTable * backend_vtable;
/*
* Backend-specific parameters - if this needs to be freed, the backend
* should provide a free_params method in its vtable; currently no backends
* do this - PB_PACKRAT and PB_REGULAR take no params, and PB_LLk, PB_LALR
* and PB_GLR take an integer cast to void *
*/
void *params;
/* Allocator to use to free this (and the params if necessary) */
HAllocator *mm__;
} HParserBackendWithParams;
typedef enum HTokenType_ {
// Before you change the explicit values of these, think of the poor bindings ;_;
TT_INVALID = 0,
......@@ -56,6 +81,8 @@ typedef enum HTokenType_ {
TT_BYTES = 2,
TT_SINT = 4,
TT_UINT = 8,
TT_DOUBLE = 12,
TT_FLOAT = 13,
TT_SEQUENCE = 16,
TT_RESERVED_1, // reserved for backend-specific internal use
TT_ERR = 32,
......@@ -135,6 +162,7 @@ typedef struct HParserVtable_ HParserVtable;
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
HParserBackendVTable * backend_vtable;
void* backend_data;
void *env;
HCFChoice *desugared; /* if the parser can be desugared, its desugared form */
......@@ -173,6 +201,53 @@ typedef bool (*HPredicate)(HParseResult *p, void* user_data);
*/
typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
/*
* For parser used when extracting name and params for backend by name
* TODO: possibly move to its own file?
*/
enum BackendTokenType_ {
TT_backend_with_params_t = TT_USER,
TT_backend_name_t,
TT_backend_param_t,
TT_backend_param_name_t,
TT_backend_param_with_name_t,
TT_backend_params_t
};
typedef struct backend_param {
size_t len;
uint8_t *param;
uint8_t *param_name;
} backend_param_t;
typedef struct backend_param_name {
size_t len;
uint8_t *param_name;
size_t param_id;
} backend_param_name_t;
typedef struct backend_param_with_name {
backend_param_name_t param_name;
backend_param_t param;
} backend_param_with_name_t;
typedef struct {
uint8_t *name;
size_t len;
} backend_name_t;
typedef struct backend_params {
backend_param_with_name_t *params;
size_t len;
} backend_params_t;
typedef struct backend_with_params {
backend_name_t name;
backend_params_t params;
} backend_with_params_t;
// {{{ Stuff for benchmarking
typedef struct HParserTestcase_ {
unsigned char* input;
......@@ -260,6 +335,89 @@ typedef struct HBenchmarkResults_ {
#endif // SWIG
// }}}
/**
* Ask if this backend is available
*/
int h_is_backend_available(HParserBackend backend);
/**
* Ask what the default backend is (currently always PB_PACKRAT)
*/
HParserBackend h_get_default_backend(void);
HParserBackendVTable * h_get_default_backend_vtable(void);
/**
* Copy a backend+params, using the backend-supplied copy method; the
* allocator used is the one passed in, or call the __m version with
* a NULL allocator to use the one from the source HParserBackendWithParams
*/
HAMMER_FN_DECL(HParserBackendWithParams *, h_copy_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Free a backend+params
*/
void h_free_backend_with_params(HParserBackendWithParams *be_with_params);
/**
* Get a name string for a backend; this is constant per backend and so
* need not be freed; it will resolve to the backend under
* h_get_backend_by_name().
*/
const char * h_get_name_for_backend(HParserBackend be);
/**
* Get a name string for a backend with parameters; it is the caller's
* responsibility to free it later. This will resolve to the same
* backend and parameters under h_get_backend_with_params_by_name().
*/
HAMMER_FN_DECL(char *, h_get_name_for_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Get a human-readable descriptive string for a backend; this is constant
* per backend and so need not be freed.
*/
const char * h_get_descriptive_text_for_backend(HParserBackend be);
/**
* Get a human-readable descriptive string for a backend with params; it is
* the caller's responsibility to free it later. Sorry, but it's allowed
* to depend on the params and putting keeping the buffer elsewhere and
* replacing it on the next call wouldn't be thread-safe.
*/
HAMMER_FN_DECL(char *, h_get_descriptive_text_for_backend_with_params,
HParserBackendWithParams *be_with_params);
/**
* Look up an HParserBackend by name; this should round-trip with
* h_get_name_for_backend().
*/
HParserBackend h_query_backend_by_name(const char *name);
/**
* Get a Hammer Backend with params from a string of the form
* backend_name(params) for example "lalr(1)".
*
* If the backend is one of the existing backends in the HBackend enum,
* HBackend will be populated in the result.
*
* Otherwise the result will save the name for use in attempts later at
* loading the named module.
*
*/
HAMMER_FN_DECL(HParserBackendWithParams *, h_get_backend_with_params_by_name, const char *name_with_params);
/**
* Top-level function to call a parser that has been built over some
......@@ -295,7 +453,7 @@ HParseResult* h_parse_finish(HSuspendedParser* s);
*/
HAMMER_FN_DECL(HParser*, h_token, const uint8_t *str, const size_t len);
#define h_literal(s) h_token(s, sizeof(s)-1)
#define h_literal(s) h_token(((const uint8_t *)(s)), sizeof(s)-1)
/**
* Given a single character, returns a parser that parses that
......@@ -329,6 +487,14 @@ HAMMER_FN_DECL(HParser*, h_int_range, const HParser *p, const int64_t lower, con
*/
HAMMER_FN_DECL(HParser*, h_bits, size_t len, bool sign);
/**
* Returns a parser that parses the specified number of octets.
* The input does not have to be aligned to a byte boundary.
*
* Result token type: TT_BYTES
*/
HAMMER_FN_DECL(HParser*, h_bytes, size_t len);
/**
* Returns a parser that parses a signed 8-byte integer value.
*
......@@ -462,6 +628,15 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
*/
HAMMER_FN_DECL_VARARGS_ATTR(H_GCC_ATTRIBUTE((sentinel)), HParser*, h_sequence, HParser* p);
/**
* Given an `h_sequence` and a list of indices, returns a parser that parses the sequence
* but returns it without the results at the dropped indices. If a negative integer appears
* in the middle of the list, this combinator will silently ignore the rest of the list.
*
* Result token type: TT_SEQUENCE
*/
#define h_drop_from(p, ...) h_drop_from_(p, __VA_ARGS__, -1)
HAMMER_FN_DECL_VARARGS(HParser*, h_drop_from_, HParser* p);
/**
* Given an array of parsers, p_array, apply each parser in order. The
* first parser to succeed is the result; if no parsers succeed, the
......@@ -716,6 +891,32 @@ HAMMER_FN_DECL(HParser*, h_get_value, const char* name);
*/
HAMMER_FN_DECL(HParser*, h_bind, const HParser *p, HContinuation k, void *env);
/**
* This parser skips 'n' bits of input.
*
* Result: None. The HParseResult exists but its AST is NULL.
*/
HAMMER_FN_DECL(HParser*, h_skip, size_t n);
/**
* The HParser equivalent of fseek(), 'h_seek' modifies the parser's input
* position. Note that contrary to 'fseek', offsets are in bits, not bytes.
* The 'whence' argument uses the same values and semantics: SEEK_SET,
* SEEK_CUR, SEEK_END.
*
* Fails if the new input position would be negative or past the end of input.
*
* Result: TT_UINT. The new input position.
*/
HAMMER_FN_DECL(HParser*, h_seek, ssize_t offset, int whence);
/**
* Report the current position in bits. Consumes no input.
*
* Result: TT_UINT. The current input position.
*/
HAMMER_FN_DECL_NOARG(HParser*, h_tell);
/**
* Free the memory allocated to an HParseResult when it is no longer needed.
*/
......@@ -728,18 +929,38 @@ HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result);
*/
char* h_write_result_unamb(const HParsedToken* tok);
/**
* Format token to the given output stream. Indent starting at
* [indent] spaces, with [delta] spaces between levels.
* Format token to the given output stream. Indent starting at [indent] spaces,
* with [delta] spaces between levels.
*
* Note: This function does not print a trailing newline. It also does not
* print any spaces to indent the initial line of output. This makes it
* suitable for recursive use in the condensed output of larger structures.
*/
void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta);
/**
* Format token to the given output. Print a trailing newline.
*
* This function assumes an initial indentation of 0 and uses 2 spaces between
* indentation levels. It is equivalent to 'h_pprint(stream, tok, 0, 2)'
* followed by 'fputc('\n', stream)' and is provided for convenience.
*/
void h_pprintln(FILE* stream, const HParsedToken* tok);
/**
* Build parse tables for the given parser backend. See the
* documentation for the parser backend in question for information
* about the [params] parameter, or just pass in NULL for the defaults.
*
* Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise.
* Returns a nonzero value on error; 0 otherwise. Common return codes include:
*
* -1: parser uses a combinator that is incompatible with the chosen backend.
* -2: parser could not be compiled with the chosen parameters.
* >0: unexpected internal errors.
*
* Consult each backend for details.
*/
HAMMER_FN_DECL(int, h_compile_for_backend_with_params, HParser* parser, HParserBackendWithParams *be_with_params);
HAMMER_FN_DECL(int, h_compile, HParser* parser, HParserBackend backend, const void* params);
/**
......
......@@ -69,6 +69,8 @@ extern HAllocator system_allocator;
typedef struct HCFStack_ HCFStack;
#define DEFAULT_ENDIANNESS (BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN)
typedef struct HInputStream_ {
// This should be considered to be a really big value type.
const uint8_t *input;
......@@ -238,6 +240,33 @@ typedef struct HParserBackendVTable_ {
HParseResult *(*parse_finish)(HSuspendedParser *s);
// parse_finish must free s->backend_state.
// parse_finish will not be called before parse_chunk reports done.
/* The backend knows how to free its params */
void (*free_params)(HAllocator *mm__, void *p);
/*
* ..and how to copy them
*
* Since the backend params need not actually be an allocated object,
* (and in fact no current backends use this, although it is permissible),
* but might (as in PB_GLR) be some numeric constant cast to void * which
* copy_params() should just pass through, we can't use returning NULL
* to signal allocation failure. Hence, passing the result out in a
* void ** and returning a status code (0 indicates success).
*/
int (*copy_params)(HAllocator *mm__, void **out, void *in);
/* Description/name handling */
const char *backend_short_name;
const char *backend_description;
char * (*get_description_with_params)(HAllocator *mm__,
HParserBackend be,
void *params);
char * (*get_short_name_with_params)(HAllocator *mm__,
HParserBackend be,
void *params);
/* extract params from the input string */
int (*extract_params)(HParserBackendWithParams * be_with_params, backend_with_params_t *be_with_params_t);
} HParserBackendVTable;
......@@ -318,6 +347,7 @@ struct HBitWriter_ {
// Backends {{{
extern HParserBackendVTable h__missing_backend_vtable;
extern HParserBackendVTable h__packrat_backend_vtable;
extern HParserBackendVTable h__llk_backend_vtable;
extern HParserBackendVTable h__lalr_backend_vtable;
......@@ -326,20 +356,65 @@ extern HParserBackendVTable h__glr_backend_vtable;
// TODO(thequux): Set symbol visibility for these functions so that they aren't exported.
/*
* Helper functions for backend with params names and descriptions for
* backends which take no params.
*/
char * h_get_description_with_no_params(HAllocator *mm__,
HParserBackend be, void *params);
char * h_get_short_name_with_no_params(HAllocator *mm__,
HParserBackend be, void *params);
int64_t h_read_bits(HInputStream* state, int count, char signed_p);
void h_skip_bits(HInputStream* state, size_t count);
void h_seek_bits(HInputStream* state, size_t pos);
static inline size_t h_input_stream_pos(HInputStream* state) {
return state->index * 8 + state->bit_offset + state->margin;
assert(state->pos <= SIZE_MAX - state->index);
assert(state->pos + state->index < SIZE_MAX / 8);
return (state->pos + state->index) * 8 + state->bit_offset + state->margin;
}
static inline size_t h_input_stream_length(HInputStream *state) {
assert(state->pos <= SIZE_MAX - state->length);
assert(state->pos + state->length <= SIZE_MAX / 8);
return (state->pos + state->length) * 8;
}
// need to decide if we want to make this public.
HParseResult* h_do_parse(const HParser* parser, HParseState *state);
void put_cached(HParseState *ps, const HParser *p, HParseResult *cached);
/*
* Inline this for benefit of h_new_parser() below, then make
* the API h_get_default_backend() call it.
*/
static inline HParserBackend h_get_default_backend__int(void) {
return PB_PACKRAT;
}
static inline HParserBackendVTable * h_get_default_backend_vtable__int(void) {
return &h__packrat_backend_vtable;
}
static inline HParserBackendVTable * h_get_missing_backend_vtable__int(void) {
return &h__missing_backend_vtable;
}
/* copy_params for backends where the parameter is not actually a pointer */
int h_copy_numeric_param(HAllocator *mm__, void **out, void *in);
static inline
HParser *h_new_parser(HAllocator *mm__, const HParserVtable *vt, void *env) {
HParser *p = h_new(HParser, 1);
memset(p, 0, sizeof(HParser));
p->vtable = vt;
p->env = env;
/*
* Current limitation: if we specify backends solely by HParserBackend, we
* can't set a default backend that requires any parameters to h_compile()
*/
p->backend = h_get_default_backend__int();
p->backend_vtable = h_get_default_backend_vtable__int();
return p;
}
......@@ -360,16 +435,22 @@ HSlist* h_slist_remove_all(HSlist *slist, const void* item);
void h_slist_free(HSlist *slist);
static inline bool h_slist_empty(const HSlist *sl) { return (sl->head == NULL); }
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc);
void* h_hashtable_get(const HHashTable* ht, const void* key);
void h_hashtable_put(HHashTable* ht, const void* key, void* value);
void h_hashtable_update(HHashTable* dst, const HHashTable *src);
void h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc,
HHashFunc hashFunc);
void * h_hashtable_get_precomp(const HHashTable *ht, const void *key,
HHashValue hashval);
void * h_hashtable_get(const HHashTable *ht, const void *key);
void h_hashtable_put_precomp(HHashTable *ht, const void *key,
void *value, HHashValue hashval);
void h_hashtable_put(HHashTable *ht, const void *key, void *value);
void h_hashtable_update(HHashTable *dst, const HHashTable *src);
void h_hashtable_merge(void *(*combine)(void *v1, const void *v2),
HHashTable *dst, const HHashTable *src);
int h_hashtable_present(const HHashTable* ht, const void* key);
void h_hashtable_del(HHashTable* ht, const void* key);
void h_hashtable_free(HHashTable* ht);
static inline bool h_hashtable_empty(const HHashTable* ht) { return (ht->used == 0); }
int h_hashtable_present(const HHashTable *ht, const void *key);
void h_hashtable_del(HHashTable *ht, const void *key);
void h_hashtable_free(HHashTable *ht);
static inline bool h_hashtable_empty(const HHashTable *ht) { return (ht->used == 0); }
bool h_hashtable_equal(const HHashTable *a, const HHashTable *b, HEqualFunc value_eq);
typedef HHashTable HHashSet;
#define h_hashset_new(a,eq,hash) h_hashtable_new(a,eq,hash)
......
......@@ -3,18 +3,18 @@
static HParseResult *parse_and(void* env, HParseState* state) {
HInputStream bak = state->input_stream;
HParseResult *res = h_do_parse((HParser*)env, state);
if (!res)
return NULL; // propagate failed input state, esp. overrun
state->input_stream = bak;
if (res)
return make_result(state->arena, NULL);
return NULL;
return make_result(state->arena, NULL);
}
static const HParserVtable and_vt = {
.parse = parse_and,
.isValidRegular = h_false, /* TODO: strictly speaking this should be regular,
but it will be a huge amount of work and difficult
to get right, so we're leaving it for a future
revision. --mlp, 18/12/12 */
but it will be a huge amount of work and
difficult to get right, so we're leaving it for
a future revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */
.compile_to_rvm = h_not_regular,
.higher = true,
......
......@@ -21,8 +21,7 @@ static void *aa_alloc(HAllocator *allocator, size_t size)
static void *aa_realloc(HAllocator *allocator, void *ptr, size_t size)
{
HArena *arena = ((ArenaAllocator *)allocator)->arena;
assert(((void)"XXX need realloc for arena allocator", 0));
return NULL;
return h_arena_realloc(arena, ptr, size);
}
static void aa_free(HAllocator *allocator, void *ptr)
......
......@@ -14,6 +14,9 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
result->sint = h_read_bits(&state->input_stream, env_->length, true);
else
result->uint = h_read_bits(&state->input_stream, env_->length, false);
result->index = 0;
result->bit_length = 0;
result->bit_offset = 0;
return make_result(state->arena, result);
}
......@@ -29,7 +32,7 @@ static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
ret->token_type = TT_UINT;
if(signedp && (seq->elements[0]->uint & 128))
if(signedp && seq->used > 0 && (seq->elements[0]->uint & 128))
ret->uint = -1; // all ones
for(size_t i=0; i<seq->used; i++) {
......
......@@ -5,7 +5,6 @@ typedef struct {
const HParser *p2;
} HTwoParsers;
static HParseResult* parse_butnot(void *env, HParseState *state) {
HTwoParsers *parsers = (HTwoParsers*)env;
// cache the initial state of the input stream
......@@ -19,15 +18,18 @@ static HParseResult* parse_butnot(void *env, HParseState *state) {
HInputStream after_p1_state = state->input_stream;
state->input_stream = start_state;
HParseResult *r2 = h_do_parse(parsers->p2, state);
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
// don't touch the input state (overrun flag) if we must suspend
if (want_suspend(state)) {
return NULL;
}
// in all other cases, the input stream should be in the post-p1 state
state->input_stream = after_p1_state;
// if p2 failed, restore post-p1 state and bail out early
if (NULL == r2) {
return r1;
}
size_t r1len = token_length(r1);
size_t r2len = token_length(r2);
// if both match but p1's text is shorter than than p2's (or the same length), fail
// if both match but p1's text no longer than p2's, fail
if (r1len <= r2len) {
return NULL;
} else {
......
#include "parser_internal.h"
struct bytes_env {
size_t length;
};
static HParseResult *parse_bytes(void *env_, HParseState *state)
{
struct bytes_env *env = env_;
uint8_t *bs;
size_t i;
bs = a_new(uint8_t, env->length);
for (i=0; i < env->length && !state->input_stream.overrun; i++)
bs[i] = h_read_bits(&state->input_stream, 8, false);
HParsedToken *result = a_new(HParsedToken, 1);
result->token_type = TT_BYTES;
result->bytes.token = bs;
result->bytes.len = env->length;
result->index = 0;
result->bit_length = 0;
result->bit_offset = 0;
return make_result(state->arena, result);
}
static const HParserVtable bytes_vt = {
.parse = parse_bytes,
.isValidRegular = h_false, // XXX need desugar_bytes, reshape_bytes
.isValidCF = h_false, // XXX need bytes_ctrvm
};
HParser *h_bytes(size_t len)
{
return h_bytes__m(&system_allocator, len);
}
HParser *h_bytes__m(HAllocator *mm__, size_t len)
{
struct bytes_env *env = h_new(struct bytes_env, 1);
env->length = len;
return h_new_parser(mm__, &bytes_vt, env);
}
......@@ -8,6 +8,9 @@ static HParseResult* parse_ch(void* env, HParseState *state) {
if (c == r) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = r;
tok->index = 0;
tok->bit_length = 0;
tok->bit_offset = 0;
return make_result(state->arena, tok);
} else {
return NULL;
......
......@@ -10,6 +10,9 @@ static HParseResult* parse_charset(void *env, HParseState *state) {
if (charset_isset(cs, in)) {
HParsedToken *tok = a_new(HParsedToken, 1);
tok->token_type = TT_UINT; tok->uint = in;
tok->index = 0;
tok->bit_length = 0;
tok->bit_offset = 0;
return make_result(state->arena, tok);
} else
return NULL;
......
......@@ -30,6 +30,8 @@ static HParseResult* parse_choice(void *env, HParseState *state) {
HParseResult *tmp = h_do_parse(s->p_array[i], state);
if (NULL != tmp)
return tmp;
if (want_suspend(state))
return NULL; // bail out early, leaving overrun flag
}
// nothing succeeded, so fail
return NULL;
......@@ -163,7 +165,8 @@ HParser* h_choice__ma(HAllocator* mm__, void *args[]) {
HParser *ret = h_new(HParser, 1);
ret->vtable = &choice_vt;
ret->env = (void*)s;
ret->backend = PB_MIN;
ret->backend = h_get_default_backend();
ret->backend_vtable = h_get_default_backend_vtable();
ret->desugared = NULL;
return ret;
}
......@@ -18,9 +18,12 @@ static HParseResult* parse_difference(void *env, HParseState *state) {
HInputStream after_p1_state = state->input_stream;
state->input_stream = start_state;
HParseResult *r2 = h_do_parse(parsers->p2, state);
// TODO(mlp): I'm pretty sure the input stream state should be the post-p1 state in all cases
// don't touch the input state (overrun flag) if we must suspend
if (want_suspend(state)) {
return NULL;
}
// in all other cases, the input stream should be in the post-p1 state
state->input_stream = after_p1_state;
// if p2 failed, restore post-p1 state and bail out early
if (NULL == r2) {
return r1;
}
......@@ -34,7 +37,7 @@ static HParseResult* parse_difference(void *env, HParseState *state) {
}
}
static HParserVtable difference_vt = {
static const HParserVtable difference_vt = {
.parse = parse_difference,
.isValidRegular = h_false,
.isValidCF = h_false, // XXX should this be true if both p1 and p2 are CF?
......
#include "parser_internal.h"
static HParseResult* parse_end(void *env, HParseState *state) {
if (state->input_stream.index == state->input_stream.length) {
static HParseResult* parse_end(void *env, HParseState *state)
{
if (state->input_stream.index < state->input_stream.length)
return NULL;
assert(state->input_stream.index == state->input_stream.length);
if (state->input_stream.last_chunk) {
HParseResult *ret = a_new(HParseResult, 1);
ret->ast = NULL;
ret->bit_length = 0;
ret->arena = state->arena;
return ret;
} else {
state->input_stream.overrun = true; // need more input
return NULL;
}
}
......
......@@ -5,6 +5,7 @@ static HParseResult* parse_epsilon(void* env, HParseState* state) {
HParseResult* res = a_new(HParseResult, 1);
res->ast = NULL;
res->arena = state->arena;
res->bit_length = 0;
return res;
}
......@@ -28,7 +29,8 @@ HParser* h_epsilon_p__m(HAllocator* mm__) {
HParser *epsilon_p = h_new(HParser, 1);
epsilon_p->desugared = NULL;
epsilon_p->backend_data = NULL;
epsilon_p->backend = 0;
epsilon_p->backend = h_get_default_backend();
epsilon_p->backend_vtable = h_get_default_backend_vtable();
epsilon_p->vtable = &epsilon_vt;
return epsilon_p;
}
......@@ -8,6 +8,7 @@ static HParseResult* parse_ignore(void* env, HParseState* state) {
HParseResult *res = a_new(HParseResult, 1);
res->ast = NULL;
res->arena = state->arena;
res->bit_length = 0;
return res;
}
......
......@@ -21,30 +21,32 @@ static HParseResult *parse_many(void* env, HParseState *state) {
if (count > 0 && env_->sep != NULL) {
HParseResult *sep = h_do_parse(env_->sep, state);
if (!sep)
goto err0;
goto stop;
}
HParseResult *elem = h_do_parse(env_->p, state);
if (!elem)
goto err0;
goto stop;
if (elem->ast)
h_carray_append(seq, (void*)elem->ast);
count++;
}
if (count < env_->count)
goto err;
assert(count == env_->count);
succ:
; // necessary for the label to be here...
HParsedToken *res = a_new(HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
res->index = 0;
res->bit_length = 0;
res->bit_offset = 0;
return make_result(state->arena, res);
err0:
stop:
if (want_suspend(state))
return NULL; // bail out early, leaving overrun flag
if (count >= env_->count) {
state->input_stream = bak;
goto succ;
}
err:
state->input_stream = bak;
return NULL;
}
......@@ -85,6 +87,7 @@ static HParsedToken *reshape_many(const HParseResult *p, void *user)
res->seq = seq;
res->index = p->ast->index;
res->bit_offset = p->ast->bit_offset;
res->bit_length = p->bit_length;
return res;
}
......@@ -92,22 +95,17 @@ static void desugar_many(HAllocator *mm__, HCFStack *stk__, void *env) {
// TODO: refactor this.
HRepeat *repeat = (HRepeat*)env;
if (!repeat->min_p) {
assert(!"Unreachable");
// count is an exact count.
assert(repeat->sep == NULL);
HCFS_BEGIN_CHOICE() {
HCFS_BEGIN_SEQ() {
for (size_t i = 0; i < repeat->count; i++) {
if (i != 0 && repeat->sep != NULL)
HCFS_DESUGAR(repeat->sep); // Should be ignored.
for (size_t i = 0; i < repeat->count; i++)
HCFS_DESUGAR(repeat->p);
}
} HCFS_END_SEQ();
} HCFS_END_CHOICE();
return;
}
if(repeat->count > 1) {
assert_message(0, "'h_repeat_n' is not context-free, can't be desugared");
return;
}
assert(repeat->count <= 1);
/* many(A) =>
Ma -> A Mar
......
......@@ -4,10 +4,11 @@ static HParseResult* parse_not(void* env, HParseState* state) {
HInputStream bak = state->input_stream;
if (h_do_parse((HParser*)env, state))
return NULL;
else {
state->input_stream = bak;
return make_result(state->arena, NULL);
}
if (want_suspend(state))
return NULL; // bail out early, leaving overrun flag
// regular parse failure -> success
state->input_stream = bak;
return make_result(state->arena, NULL);
}
static const HParserVtable not_vt = {
......