Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Showing
with 1667 additions and 222 deletions
......@@ -2,11 +2,6 @@ require 'hammer/internal'
require 'hammer/parser'
require 'hammer/parser_builder'
# TODO:
# Probably need to rename this file to 'hammer-parser.rb', so
# people can use "require 'hammer-parser'" in their code.
# Leave this in for now to be able to play around with HParseResult in irb.
x = nil
parser = Hammer::Parser.build {
......
......@@ -19,8 +19,10 @@ module Hammer
@dont_gc = dont_gc.dup
end
# dont_gc is required to build a fuzzer from the declaration of Hammer::Parser object.
attr_reader :name
attr_reader :h_parser
attr_reader :dont_gc
# Parse the given data. Returns the parse result if successful, nil otherwise.
#
......
require 'bundler/setup'
require 'minitest/autorun'
require 'hammer'
require 'hammer-parser'
class TestToken < Minitest::Test
def setup
super
......
# -*- coding: utf-8 -*-
require 'bundler/setup'
require 'hammer'
require 'hammer-parser'
require 'minitest/autorun'
class ParserTest < Minitest::Test
......
%module hammer
%begin %{
#define SWIG_PYTHON_STRICT_BYTE_CHAR
%}
%nodefaultctor;
......@@ -25,6 +28,20 @@
}
%pythoncode %{
try:
INTEGER_TYPES = (int, long)
except NameError:
INTEGER_TYPES = (int,)
try:
TEXT_TYPE = unicode
def bchr(i):
return chr(i)
except NameError:
TEXT_TYPE = str
def bchr(i):
return bytes([i])
class Placeholder(object):
"""The python equivalent of TT_NONE"""
def __str__(self):
......@@ -69,11 +86,11 @@
PyErr_SetString(PyExc_ValueError, "Expecting a string");
return NULL;
} else {
$1 = *(uint8_t*)PyString_AsString($input);
$1 = *(uint8_t*)PyBytes_AsString($input);
}
}
%typemap(out) HBytes* {
$result = PyString_FromStringAndSize((char*)$1->token, $1->len);
$result = PyBytes_FromStringAndSize((char*)$1->token, $1->len);
}
%typemap(out) struct HCountedArray_* {
int i;
......@@ -173,7 +190,7 @@
return PyObject_CallFunctionObjArgs(_helper_Placeholder, NULL);
break;
case TT_BYTES:
return PyString_FromStringAndSize((char*)token->token_data.bytes.token, token->token_data.bytes.len);
return PyBytes_FromStringAndSize((char*)token->token_data.bytes.token, token->token_data.bytes.len);
case TT_SINT:
// TODO: return PyINT if appropriate
return PyLong_FromLong(token->token_data.sint);
......@@ -250,36 +267,35 @@
}
%pythoncode %{
def action(p, act):
return _h_action(p, act)
def attr_bool(p, pred):
return _h_attr_bool(p, pred)
def ch(ch):
if isinstance(ch, str) or isinstance(ch, unicode):
if isinstance(ch, (bytes, TEXT_TYPE)):
return token(ch)
else:
return _h_ch(ch)
def ch_range(c1, c2):
dostr = isinstance(c1, str)
dostr2 = isinstance(c2, str)
if isinstance(c1, unicode) or isinstance(c2, unicode):
dostr = isinstance(c1, bytes)
dostr2 = isinstance(c2, bytes)
if isinstance(c1, TEXT_TYPE) or isinstance(c2, TEXT_TYPE):
raise TypeError("ch_range only works on bytes")
if dostr != dostr2:
raise TypeError("Both arguments to ch_range must be the same type")
if dostr:
return action(_h_ch_range(c1, c2), chr)
return action(_h_ch_range(c1, c2), bchr)
else:
return _h_ch_range(c1, c2)
def epsilon_p(): return _h_epsilon_p()
def end_p():
return _h_end_p()
def in_(charset):
return action(_h_in(charset), chr)
return action(_h_in(charset), bchr)
def not_in(charset):
return action(_h_not_in(charset), chr)
return action(_h_not_in(charset), bchr)
def not_(p): return _h_not(p)
def int_range(p, i1, i2):
return _h_int_range(p, i1, i2)
......
......@@ -108,3 +108,77 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
out <<= final_shift;
return (out ^ msb) - msb; // perform sign extension
}
void h_skip_bits(HInputStream* stream, size_t count) {
size_t left;
if (count == 0)
return;
if (stream->overrun)
return;
if (stream->index == stream->length) {
stream->overrun = true;
return;
}
// consume from a partial byte?
left = 8 - stream->bit_offset - stream->margin;
if (count < left) {
stream->bit_offset += count;
return;
}
if (left < 8) {
stream->index += 1;
stream->bit_offset = 0;
stream->margin = 0;
count -= left;
}
assert(stream->bit_offset == 0);
assert(stream->margin == 0);
// consume full bytes
left = stream->length - stream->index;
if (count / 8 <= left) {
stream->index += count / 8;
count = count % 8;
} else {
stream->index = stream->length;
stream->overrun = true;
return;
}
assert(count < 8);
// final partial byte
if (count > 0 && stream->index == stream->length)
stream->overrun = true;
else
stream->bit_offset = count;
}
void h_seek_bits(HInputStream* stream, size_t pos) {
size_t pos_index = pos / 8;
size_t pos_offset = pos % 8;
/* seek within the current byte? */
if (pos_index == stream->index) {
stream->bit_offset = pos_offset;
return;
}
stream->margin = 0;
/* seek past the end? */
if ((pos_index > stream->length) ||
(pos_index == stream->length && pos_offset > 0)) {
stream->index = stream->length;
stream->bit_offset = 0;
stream->overrun = true;
return;
}
stream->index = pos_index;
stream->bit_offset = pos_offset;
stream->margin = 0;
}
......@@ -12,10 +12,8 @@
HBitWriter *h_bit_writer_new(HAllocator* mm__) {
HBitWriter *writer = h_new(HBitWriter, 1);
memset(writer, 0, sizeof(*writer));
writer->buf = mm__->alloc(mm__, writer->capacity = 8);
if (!writer) {
return NULL;
}
writer->buf = h_alloc(mm__, writer->capacity = 8);
assert(writer != NULL);
memset(writer->buf, 0, writer->capacity);
writer->mm__ = mm__;
writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN;
......
This diff is collapsed.
......@@ -8,15 +8,15 @@ typedef struct HCFGrammar_ {
HHashSet *nts; // HCFChoices, each representing the alternative
// productions for one nonterminal
HHashSet *geneps; // set of NTs that can generate the empty string
HHashTable **first; // memoized first sets of the grammar's symbols
HHashTable **follow; // memoized follow sets of the grammar's NTs
size_t kmax; // maximum lookahead depth allocated
HHashTable *first; // memoized first sets of the grammar's symbols
HHashTable *follow; // memoized follow sets of the grammar's NTs
HArena *arena;
HAllocator *mm__;
// constant set containing only the empty string.
// this is only a member of HCFGrammar because it needs a pointer to arena.
// constant sets containing only the empty string or end symbol.
// these are only members of HCFGrammar because they need a pointer to arena.
const struct HStringMap_ *singleton_epsilon;
const struct HStringMap_ *singleton_end;
} HCFGrammar;
......@@ -37,6 +37,7 @@ typedef struct HStringMap_ {
void *end_branch; // points to leaf value
HHashTable *char_branches; // maps to inner nodes (HStringMaps)
HArena *arena;
bool taint; // for use by h_follow() and h_first()
} HStringMap;
HStringMap *h_stringmap_new(HArena *a);
......@@ -52,10 +53,14 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);
bool h_stringmap_equal(const HStringMap *a, const HStringMap *b);
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
// dummy return value used by h_stringmap_get_lookahead when out of input
#define NEED_INPUT ((void *)-1)
/* Convert 'parser' into CFG representation by desugaring and compiling the set
* of nonterminals.
......@@ -102,4 +107,4 @@ void h_pprint_stringset(FILE *file, const HStringMap *set, int indent);
void h_pprint_stringmap(FILE *file, char sep,
void (*valprint)(FILE *f, void *env, void *val), void *env,
const HStringMap *map);
void h_pprint_char(FILE *file, char c);
void h_pprint_char(FILE *file, uint8_t c);
This diff is collapsed.
......@@ -60,15 +60,8 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
}
HParsedToken *h_act_flatten(const HParseResult *p, void* user_data) {
HCountedArray *seq = h_carray_new(p->arena);
act_flatten_(seq, p->ast);
HParsedToken *res = a_new_(p->arena, HParsedToken, 1);
res->token_type = TT_SEQUENCE;
res->seq = seq;
res->index = p->ast->index;
res->bit_offset = p->ast->bit_offset;
HParsedToken *res = h_make_seq(p->arena);
act_flatten_(res->seq, p->ast);
return res;
}
......@@ -106,7 +99,7 @@ HParsedToken *h_make_seqn(HArena *arena, size_t n)
return ret;
}
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len)
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len)
{
HParsedToken *ret = h_make_(arena, TT_BYTES);
ret->bytes.len = len;
......@@ -128,6 +121,20 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val)
return ret;
}
HParsedToken *h_make_double(HArena *arena, double val)
{
HParsedToken *ret = h_make_(arena, TT_DOUBLE);
ret->dbl = val;
return ret;
}
HParsedToken *h_make_float(HArena *arena, float val)
{
HParsedToken *ret = h_make_(arena, TT_FLOAT);
ret->flt = val;
return ret;
}
// XXX -> internal
HParsedToken *h_carray_index(const HCountedArray *a, size_t i)
{
......
......@@ -195,9 +195,11 @@ HParsedToken *h_act_ignore(const HParseResult *p, void* user_data);
HParsedToken *h_make(HArena *arena, HTokenType type, void *value);
HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence.
HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n.
HParsedToken *h_make_bytes(HArena *arena, uint8_t *array, size_t len);
HParsedToken *h_make_bytes(HArena *arena, const uint8_t *array, size_t len);
HParsedToken *h_make_sint(HArena *arena, int64_t val);
HParsedToken *h_make_uint(HArena *arena, uint64_t val);
HParsedToken *h_make_double(HArena *arena, double val);
HParsedToken *h_make_float(HArena *arena, float val);
// Standard short-hands to make tokens in an action.
#define H_MAKE(TYP, VAL) h_make(p->arena, (HTokenType)TT_ ## TYP, VAL)
......@@ -206,6 +208,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_MAKE_BYTES(VAL, LEN) h_make_bytes(p->arena, VAL, LEN)
#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL)
#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL)
#define H_MAKE_DOUBLE(VAL) h_make_double(p->arena, VAL)
#define H_MAKE_FLOAT(VAL) h_make_float(p->arena, VAL)
// Extract (cast) type-specific value back from HParsedTokens...
......@@ -218,6 +222,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK)
#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK)
#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK)
#define H_ASSERT_DOUBLE(TOK) h_assert_type(TT_DOUBLE, TOK)
#define H_ASSERT_FLOAT(TOK) h_assert_type(TT_FLOAT, TOK)
// Assert expected type and return contained value.
#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user)
......@@ -225,6 +231,8 @@ HParsedToken *h_make_uint(HArena *arena, uint64_t val);
#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes)
#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint)
#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint)
#define H_CAST_DOUBLE(TOK) (H_ASSERT_DOUBLE(TOK)->dbl)
#define H_CAST_FLOAT(TOK) (H_ASSERT_FLOAT(TOK)->flt)
// Sequence access...
......@@ -247,7 +255,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1)
#define H_INDEX_DOUBLE(SEQ, ...) H_CAST_DOUBLE(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_FLOAT(SEQ, ...) H_CAST_FLOAT(H_INDEX_TOKEN(SEQ, __VA_ARGS__))
#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(H_ASSERT_SEQ(SEQ), __VA_ARGS__, -1)
// Standard short-hand to access and cast elements on a sequence token.
#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__)
......@@ -255,6 +265,9 @@ HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va);
#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__)
#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__)
#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__)
#define H_FIELD_DOUBLE(...) H_INDEX_DOUBLE(p->ast, __VA_ARGS__)
#define H_FIELD_FLOAT(...) H_INDEX_FLOAT(p->ast, __VA_ARGS__)
#define H_FIELD_TOKEN(...) H_INDEX_TOKEN(p->ast, __VA_ARGS__)
// Lower-level helper for h_seq_index.
HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -81,6 +81,7 @@ static const HParserVtable action_vt = {
.isValidCF = action_isValidCF,
.desugar = desugar_action,
.compile_to_rvm = action_ctrvm,
.higher = true,
};
HParser* h_action(const HParser* p, const HAction a, void* user_data) {
......
......@@ -3,20 +3,21 @@
static HParseResult *parse_and(void* env, HParseState* state) {
HInputStream bak = state->input_stream;
HParseResult *res = h_do_parse((HParser*)env, state);
if (!res)
return NULL; // propagate failed input state, esp. overrun
state->input_stream = bak;
if (res)
return make_result(state->arena, NULL);
return NULL;
return make_result(state->arena, NULL);
}
static const HParserVtable and_vt = {
.parse = parse_and,
.isValidRegular = h_false, /* TODO: strictly speaking this should be regular,
but it will be a huge amount of work and difficult
to get right, so we're leaving it for a future
revision. --mlp, 18/12/12 */
but it will be a huge amount of work and
difficult to get right, so we're leaving it for
a future revision. --mlp, 18/12/12 */
.isValidCF = h_false, /* despite TODO above, this remains false. */
.compile_to_rvm = h_not_regular,
.higher = true,
};
......
......@@ -79,6 +79,7 @@ static const HParserVtable attr_bool_vt = {
.isValidCF = ab_isValidCF,
.desugar = desugar_ab,
.compile_to_rvm = ab_ctrvm,
.higher = true,
};
......
This diff is collapsed.
......@@ -14,6 +14,9 @@ static HParseResult* parse_bits(void* env, HParseState *state) {
result->sint = h_read_bits(&state->input_stream, env_->length, true);
else
result->uint = h_read_bits(&state->input_stream, env_->length, false);
result->index = 0;
result->bit_length = 0;
result->bit_offset = 0;
return make_result(state->arena, result);
}
......@@ -29,7 +32,7 @@ static HParsedToken *reshape_bits(const HParseResult *p, void* signedp_p) {
HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken));
ret->token_type = TT_UINT;
if(signedp && (seq->elements[0]->uint & 128))
if(signedp && seq->used > 0 && (seq->elements[0]->uint & 128))
ret->uint = -1; // all ones
for(size_t i=0; i<seq->used; i++) {
......@@ -102,6 +105,7 @@ static const HParserVtable bits_vt = {
.isValidCF = h_true,
.desugar = desugar_bits,
.compile_to_rvm = bits_ctrvm,
.higher = false,
};
HParser* h_bits(size_t len, bool sign) {
......