diff --git a/src/bindings/lua/hammer.lua b/src/bindings/lua/hammer.lua new file mode 100644 index 0000000000000000000000000000000000000000..fe0c47fae66933df813221ee867521315e2b9277 --- /dev/null +++ b/src/bindings/lua/hammer.lua @@ -0,0 +1,308 @@ +local ffi = require("ffi") +ffi.cdef[[ +static const BYTE_BIG_ENDIAN = 0x1 +static const BIT_BIG_ENDIAN = 0x2 +static const BYTE_LITTLE_ENDIAN = 0x0 +static const BIT_LITTLE_ENDIAN = 0x0 + +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = PB_MIN, // PB_MIN is always the default. + PB_REGULAR, + PB_LLk, + PB_LALR, + PB_GLR, + PB_MAX = PB_GLR +} HParserBackend; + +typedef enum HTokenType_ { + TT_NONE = 1, + TT_BYTES = 2, + TT_SINT = 4, + TT_UINT = 8, + TT_SEQUENCE = 16, + TT_RESERVED_1, // reserved for backend-specific internal use + TT_ERR = 32, + TT_USER = 64, + TT_MAX +} HTokenType; + +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; + +typedef struct HArena_ HArena ; // hidden implementation + +typedef struct HCountedArray_ { + size_t capacity; + size_t used; + HArena * arena; + struct HParsedToken_ **elements; +} HCountedArray; + +typedef struct HParsedToken_ { + HTokenType token_type; + union { + HBytes bytes; + int64_t sint; + uint64_t uint; + double dbl; + float flt; + HCountedArray *seq; // a sequence of HParsedToken's + void *user; + }; + size_t index; + size_t bit_length; + char bit_offset; +} HParsedToken; + +typedef struct HParseResult_ { + const HParsedToken *ast; + int64_t bit_length; + HArena * arena; +} HParseResult; + +typedef struct HParserVtable_ HParserVtable; +typedef struct HCFChoice_ HCFChoice; + +typedef struct HParser_ { + const HParserVtable *vtable; + HParserBackend backend; + void* backend_data; + void *env; + HCFChoice *desugared; +} HParser; + +typedef HParsedToken* (*HAction)(const HParseResult *p, void* user_data); +typedef bool (*HPredicate)(HParseResult *p, void* user_data); +typedef HParser* (*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env); + +HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length); +HParser* h_token(const uint8_t *str, const size_t len); +HParser* h_ch(const uint8_t c); +HParser* h_ch_range(const uint8_t lower, const uint8_t upper); +HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper); +HParser* h_bits(size_t len, bool sign); +HParser* h_int64(); +HParser* h_int32(); +HParser* h_int16(); +HParser* h_int8(); +HParser* h_uint64(); +HParser* h_uint32(); +HParser* h_uint16(); +HParser* h_uint8(); +HParser* h_whitespace(const HParser* p); +HParser* h_left(const HParser* p, const HParser* q); +HParser* h_right(const HParser* p, const HParser* q); +HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); +HParser* h_action(const HParser* p, const HAction a, void* user_data); +HParser* h_in(const uint8_t *charset, size_t length); +HParser* h_not_in(const uint8_t *charset, size_t length); +HParser* h_end_p(); +HParser* h_nothing_p(); +HParser* h_sequence__a(void *args[]); +HParser* h_choice__a(void *args[]); +HParser* h_permutation__a(void *args[]); +HParser* h_butnot(const HParser* p1, const HParser* p2); +HParser* h_difference(const HParser* p1, const HParser* p2); +HParser* h_xor(const HParser* p1, const HParser* p2); +HParser* h_many(const HParser* p); +HParser* h_many1(const HParser* p); +HParser* h_repeat_n(const HParser* p, const size_t n); +HParser* h_optional(const HParser* p); +HParser* h_ignore(const HParser* p); +HParser* h_sepBy(const HParser* p); +HParser* h_sepBy1(const HParser* p); +HParser* h_epsilon_p(); +HParser* h_length_value(const HParser* length, const HParser* value); +HParser* h_attr_bool(const HParser* p, HPredicate pred, void* user_data); +HParser* h_and(const HParser* p); +HParser* h_not(const HParser* p); +HParser* h_indirect(const HParser* p); +void h_bind_indirect(HParser* indirect, const HParser* inner); +HParser* h_with_endianness(char endianness, const HParser* p); +HParser* h_put_value(const HParser* p, const char* name); +HParser* h_get_value(const char* name); +HParser* h_bind(const HParser *p, HContinuation k, void *env); + +int h_compile(HParser* parser, HParserBackend backend, const void* params); +]] +local h = ffi.load("hammer") + +local function helper(a, n, b, ...) + if n == 0 then return a + else return b, helper(a, n-1, ...) end +end +local function append(a, ...) + return helper(a, select('#', ...), ...) +end + +local parser +local mt = { + __index = { + parse = function(p, str) return h.h_parse(p, str, #str) end, + }, +} +parser = ffi.metatype("HParser", mt) + +local function token(str) + return h.h_token(str, #str) +end +local function ch(c) + if type(c) == "number" then + return h.h_ch(c) + else + return h.h_ch(c:byte) + end +end +local function ch_range(lower, upper) + if type(lower) == "number" and type(upper) == "number" then + return h.h_ch_range(lower, upper) + -- FIXME this is really not thorough type checking + else + return h.h_ch_range(lower:byte, upper:byte) + end +end +local function int_range(parser, lower, upper) + return h.h_int_range(parser, lower, upper) +end +local function bits(len, sign) + return h.h_bits(len, sign) +end +local function int64() + return h.h_int64() +end +local function int32() + return h.h_int32() +end +local function int16() + return h.h_int16() +end +local function int8() + return h.h_int8() +end +local function uint64() + return h.h_uint64() +end +local function uint32() + return h.h_uint32() +end +local function uint16() + return h.h_uint16() +end +local function uint8() + return h.h_uint8() +end +local function whitespace(parser) + return h.h_whitespace(parser) +end +local function left(parser1, parser2) + return h.h_left(parser1, parser2) +end +local function right(parser1, parser2) + return h.h_right(parser1, parser2) +end +local function middle(parser1, parser2, parser3) + return h.h_middle(parser1, parser2, parser3) +end +-- There could also be an overload of this that doesn't +-- bother with the env pointer, and passes it as NIL by +-- default, but I'm not going to deal with overloads now. +local function action(parser, action, user_data) + local cb = ffi.cast("HAction", action) + return h.h_action(parser, cb, user_data) +end +local function in(charset) + return h.h_in(charset, #charset) +end +local function not_in(charset) + return h.h_not_in(charset, #charset) +end +local function end_p() + return h.h_end_p() +end +local function nothing_p() + return h.h_nothing_p() +end +local function sequence(...) + local parsers = append(nil, ...) + return h.h_sequence__a(parsers) +end +local function choice(...) + local parsers = append(nil, ...) + return h.h_choice__a(parsers) +end +local function permutation(...) + local parsers = append(nil, ...) + return h.h_permutation__a(parsers) +end +local function butnot(parser1, parser2) + return h.h_butnot(parser1, parser2) +end +local function difference(parser1, parser2) + return h.h_difference(parser1, parser2) +end +local function xor(parser1, parser2) + return h.h_xor(parser1, parser2) +end +local function many(parser) + return h.h_many(parser) +end +local function many1(parser) + return h.h_many1(parser) +end +local function repeat_n(parser, n) + return h.h_repeat_n(parser, n) +end +local function optional(parser) + return h.h_optional(parser) +end +local function ignore(parser) + return h.h_ignore(parser) +end +local function sepBy(parser) + return h.h_sepBy(parser) +end +local function sepBy1(parser) + return h.h_sepBy1(parser) +end +local function epsilon_p() + return h.h_epsilon_p() +end +local function length_value(length, value) + return h.h_length_value(length, value) +end +local function attr_bool(parser, predicate, user_data) + local cb = ffi.cast("HPredicate", predicate) + return h.h_attr_bool(parser, cb, user_data) +end +local function and(parser) + return h.h_and(parser) +end +local function not(parser) + return h.h_not(parser) +end +local function indirect(parser) + return h.h_indirect(parser) +end +local function bind_indirect(indirect, inner) + return h.h_bind_indirect(indirect, inner) +end +local function with_endianness(endianness, parser) + return h.h_with_endianness(endianness, parser) +end +local function put_value(parser, name) + return h.h_put_value(parser, name) +end +local function get_value(name) + return h.h_get_value(parser, name) +end +local function bind(parser, continuation, env) + local cb = ffi.cast("HContinuation", continuation) + return h.h_bind(parser, cb, env) +end + +local function compile(parser, backend, params) + return h.h_compile(parser, backend, params) +end diff --git a/src/bindings/lua/test.lua b/src/bindings/lua/test.lua new file mode 100644 index 0000000000000000000000000000000000000000..a5ee79773ecb590bc351ce15b817d8810db6ce5d --- /dev/null +++ b/src/bindings/lua/test.lua @@ -0,0 +1,840 @@ +describe("Combinator tests", function() + local hammer + + setup(function() + hammer = require("hammer") + end) + + teardown(function() + hammer = nil + end) + + describe("Token tests", function() + local parser = hammer.token("95" .. string.char(0xa2)) + it("parses a token", function() + local ret = parser:parse("95" .. string.char(0xa2)) + assert.are.same(ret.ast.bytes, "95" .. string.char(0xa2)) + end) + it("does not parse an incomplete token", function() + local ret = parser:parse("95") + assert.is_falsy(ret) + end) + end) + + describe("Char tests", function() + local parser = hammer.ch(0xa2) + it("parses a matching char", function() + local ret = parser:parse(string.char(0xa2)) + assert.are.same(ret.ast.uint, string.char(0xa2)) + end) + it("rejects a non-matching char", function() + local ret = parser:parse(string.char(0xa3)) + assert.is_falsy(ret) + end) + end) + + describe("Char range tests", function() + local parser = hammer.ch_range("a", "c") + it("parses a char in the range", function() + local ret = parser:parse("b") + assert.are.same(ret.ast.uint, "b") + end) + it("rejects a char outside the range", function() + local ret = parser:parse("d") + assert.is_falsy(ret) + ) + end) + + describe("Signed 64-bit int tests", function() + local parser = hammer.int64() + it("parses a valid 64-bit int", function() + local ret = parser:parse(string.char(0xff, 0xff, 0xff, 0xfe, 0x00, 0x00, 0x00, 0x00)) + assert.are.same(ret.ast.sint, -0x200000000) + end) + it("does not parse an invalid 64-bit int", function() + local ret = parser:parse(string.char(0xff, 0xff, 0xff, 0xfe, 0x00, 0x00, 0x00)) + assert.is_falsy(ret) + end) + end) + + describe("Signed 32-bit int tests", function() + local parser = hammer.int32() + it("parses a valid 32-bit int", function() + local ret = parser:parse(string.char(\xff, 0xfe, 0x00, 0x00)) + assert.are.same(ret.ast.sint, -0x20000) + end) + it("does not parse an invalid 32-bit int", function() + local ret = parser:parse(string.char(0xff, 0xfe, 0x00)) + assert.is_falsy(ret) + end) + end) + + describe("Signed 16-bit int tests", function() + local parser = hammer.int16() + it("parses a valid 16-bit int", function() + local ret = parser:parse(string.char(0xfe, 0x00)) + assert.are.same(ret.ast.sint, -0x200) + end) + it("does not parse an invalid 16-bit int", function() + local ret = parser:parse((string.char(0xfe)) + assert.is_falsy(ret) + end) + end) + + describe("Signed 8-bit int tests", function() + local parser = hammer.int8() + it("parses a valid 8-bit int", function() + local ret = parser:parse(string.char(0x88)) + assert.are.same(ret.ast.sint, -0x78) + end) + it("does not parse an invalid 8-bit int", function() + local ret = parser:parse("") + assert.is_falsy(ret) + end) + end) + + describe("Unsigned 64-bit int tests", function() + local parser = hammer.uint64() + it("parses a valid 64-bit unsigned int", function() + local ret = parser:parse(string.char(0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00)) + assert.are.same(ret.ast.uint, 0x200000000) + end) + it("does not parse an invalid 64-bit unsigned int", function() + local ret = parser:parse(string.char(0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00)) + assert.is_falsy(ret) + end) + end) + + describe("Unsigned 32-bit int tests", function() + local parser = hammer.uint32() + it("parses a valid 32-bit unsigned int", function() + local ret = parser:parse(string.char(0x00, 0x02, 0x00, 0x00)) + assert.are.same(ret.ast.uint, -0x20000) + end) + it("does not parse an invalid 32-bit unsigned int", function() + local ret = parser:parse(string.char(0x00, 0x02, 0x00)) + assert.is_falsy(ret) + end) + end) + + describe("Unsigned 16-bit int tests", function() + local parser = hammer.uint16() + it("parses a valid 16-bit unsigned int", function() + local ret = parser:parse(string.char(0x02, 0x00)) + assert.are.same(ret.ast.uint, 0x200) + end) + it("does not parse an invalid 16-bit unsigned int", function() + local ret = parser:parse(string.char(0x02)) + assert.is_falsy(ret) + end) + end) + + describe("Unsigned 8-bit int tests", function() + local parser = hammer.uint8() + it("parses a valid 8-bit unsigned int", function() + local ret = parser:parse(string.char(0x78)) + assert.are.same(ret.ast.uint, 0x78) + end) + it("does not parse an invalid 8=bit unsigned int", function() + local ret = parser:parse("") + assert.is_falsy(ret) + end) + end) + + describe("Integer range tests", function() + local parser = hammer.int_range(hammer.uint8(), 3, 10) + it("parses a value in the range", function() + local ret = parser:parse(string.char(0x05)) + assert.are.same(ret.ast.uint, 5) + end) + it("does not parse a value outside the range", function() + local ret = parser:parse(string.char(0xb)) + assert.is_falsy(ret) + end) + end) + + describe("Whitespace tests", function() + local parser = hammer.whitespace(hammer.ch("a")) + local parser2 = hammer.whitespace(hammer.end_p()) + it("parses a string with no whitespace", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.uint, "a") + end) + it("parses a string with a leading space", function() + local ret = parser:parse(" a") + assert.are.same(ret.ast.uint, "a") + end) + it("parses a string with leading spaces", function() + local ret = parser:parse(" a") + assert.are.same(ret.ast.uint, "a") + end) + it("parses a string with a leading tab", function() + local ret = parser:parse("\ta") + assert.are.same(ret.ast.uint, "a") + end) + it("does not parse a string with a leading underscore", function() + local ret = parser:parse("_a") + assert.is_falsy(ret) + end) + it("parses an empty string", function() + local ret = parser2:parse("") + assert.are.same(ret.ast, nil) + end) + it("parses a whitespace-only string", function() + local ret = parser2:parse(" ") + assert.are.same(ret.ast, nil) + end) + it("does not parse a string with leading whitespace and a trailing character", function() + local ret = parser2:parse(" x") + assert.is_falsy(ret) + end) + end) + + describe("Leftmost-parser tests", function() + local parser = hammer.left(hammer.ch("a"), hammer.ch(" ")) + it("parses the leftmost character", function() + local ret = parser:parse("a ") + assert.are.same(ret.ast.uint, "a") + end) + it("does not parse a string that is too short", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + it("does not parse a string that starts with the wrong character", function() + local ret = parser:parse(" ") + assert.is_falsy(ret) + end) + it("does not parse a string with the wrong character in the second place", function() + local ret = parser:parse("ab") + assert.is_falsy(ret) + end) + end) + + describe("Rightmost-parser tests", function() + local parser = hammer.right(hammer.ch(" "), hammer.ch("a")) + it("parses the rightmost character", function() + local ret = parser:parse(" a") + assert.are.same(ret.ast.uint, "a") + end) + it("does not parse a string that starts with the wrong character", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + it("does not parse a string that is too short", function() + local ret = parser:parse(" ") + assert.is_falsy(ret) + end) + it("does not parse a string with the characters in the wrong order", function() + local ret = parser:parse("ba") + assert.is_falsy(ret) + end) + end) + + describe("Middle-parser tests", function() + local parser = hammer.middle(hammer.ch(" "), hammer.ch("a"), hammer.ch(" ") + it("parses the middle character", function() + local ret = parser:parse(" a ") + assert.are.same(ret.ast.uint, "a") + end) + it("does not parse a string that is too short", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + ret = parser:parse(" ") + assert.is_falsy(ret) + ret = parser:parse(" a") + assert.is_falsy(ret) + ret = parser:parse("a ") + assert.is_falsy(ret) + end) + it("does not parse a string with the wrong character in the middle", function() + ret = parser:parse(" b ") + assert.is_falsy(ret) + end) + it("does not parse a string that starts with the wrong character", function() + ret = parser:parse("ba ") + assert.is_falsy(ret) + end) + it("does not parse a string that ends with the wrong character", function() + ret = parser:parse(" ab") + assert.is_falsy(ret) + end) + end) + + describe("Semantic action tests", function() + local function upcase(result, user_data) + local chars = result.ast.seq.elements + local ret = "" + for i, v in ipairs(chars) + do ret = ret .. string.char(v.uint):upper() + end + return ret + end + local parser = hammer.action(hammer.sequence(hammer.choice(hammer.ch("a"), hammer.ch("A")), hammer.choice(hammer.ch("b"), hammer.ch("B"))), upcase, nil) + it("converts a lowercase 'ab' to uppercase", function() + local ret = parser:parse("ab") + assert.are.same(ret.ast.seq, {"A", "B"}) + end) + it("accepts an uppercase 'AB' unchanged", function() + local ret = parser:parse("AB") + assert.are.same(ret.ast.seq, {"A", "B"}) + end) + it("rejects strings that don't match the underlying parser", function() + local ret = parser:parse() + assert.is_falsy(ret) + end) + end) + + describe("Character set membership tests", function() + local parser = hammer.in({"a", "b", "c"}) + it("parses a character that is in the included set", function() + local ret = parser:parse("b") + assert.are.same(ret.ast.uint, "b") + end) + it("does not parse a character that is not in the included set", function() + local ret = parser:parse("d") + assert.is_falsy(ret) + end) + end) + + describe("Character set non-membership tests", function() + local parser = hammer.not_in({"a", "b", "c"}) + it("parses a character that is not in the excluded set", function() + local ret = parser:parse("d") + assert.are.same(ret.ast.uint, "d") + end) + it("does not parse a character that is in the excluded set", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + end) + + describe("End-of-input tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.end_p()) + it("parses a string that ends where it is expected to", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + end) + it("does not parse a string that is too long", function() + local ret = parser:parse("aa") + assert.is_falsy(ret) + end) + end) + + describe("Bottom parser tests", function() + local parser = hammer.nothing_p() + it("always fails", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + end) + + describe("Parser sequence tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.ch("b")) + local parser2 = hammer.sequence(hammer.ch("a"), hammer.whitespace(hammer.ch("b"))) + it("parses a string matching the sequence", function() + local ret = parser:parse("ab") + assert.are.same(ret.ast.seq, {"a", "b"}) + end) + it("does not parse a string that is too short", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + it("does not parse a string with the sequence out of order", function() + local ret = parser:parse("ba") + assert.is_falsy(ret) + end) + it("parses a whitespace-optional string with no whitespace", function() + local ret = parser2:parse("ab") + assert.are.same(ret.ast.seq, {"a", "b"}) + end) + it("parses a whitespace-optional string containing whitespace", function() + local ret = parser:parse("a b") + assert.are.same(ret.ast.seq, {"a", "b"}) + ret = parser:parse("a b") + assert.are.same(ret.ast.seq, {"a", "b"}) + end) + end) + + describe("Choice-of-parsers tests", function() + local parser = hammer.choice(hammer.ch("a"), hammer.ch("b")) + it("parses a character in the choice set", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.uint, "a") + ret = parser:parse("b") + assert.are.same(ret.ast.uint, "b") + end) + it("does not parse a character not in the choice set", function() + local ret = parser:parse("c") + assert.is_falsy(ret) + end) + end) + + describe("X-but-not-Y tests", function() + local parser = hammer.butnot(hammer.ch("a"), hammer.token("ab")) + local parser2 = hammer.butnot(hammer.ch_range("0", "9"), hammer.ch("6")) + it("succeeds when 'a' matches but 'ab' doesn't", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.uint, "a") + ret = parser:parse("aa") + assert.are.same(ret.ast.uint, "a") + end) + it("fails when p2's result is longer than p1's", function() + local ret = parser:parse("ab") + assert.is_falsy(ret) + end) + it("fails when p2's result is the same length as p1's", function() + local ret = parser2:parse("6") + assert.is_falsy(ret) + end) + end) + + describe("Difference-of-parsers tests", function() + local parser = hammer.difference(hammer.token("ab"), hammer.ch("a")) + it("succeeds when 'ab' matches and its result is longer than the result for 'a'", function() + local ret = parser:parse("ab") + assert.are.same(ret.ast., ) + end) + it("fails if 'ab' doesn't match", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + end) + + describe("XOR-of-parsers tests", function() + local parser = hammer.xor(hammer.ch_range("0", "6"), hammer.ch_range("5", "9")) + it("parses a value only in the first range", function() + local ret = parser:parse("0") + assert.are.same(ret.ast.uint, "0") + end) + it("parses a value only in the second range", function() + local ret = parser:parse("9") + assert.are.same(ret.ast.uint, "9") + end) + it("does not parse a value inside both ranges", function() + local ret = parser:parse("5") + assert.is_falsy(ret) + end) + it("does not parse a value outside the range", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + end) + end) + + describe("Kleene * tests", function() + local parser = hammer.many(hammer.choice(hammer.ch("a"), hammer.ch("b"))) + it("parses an empty string", function() + local ret = parser:parse("") + assert.are.same(ret.ast.seq, {}) + end) + it("parses a single repetition of the pattern", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + ret = parser:parse("b") + assert.are.same(ret.ast.seq, {"b"}) + end) + it("parses multiple repetitions of the pattern", function() + local ret = parser:parse("aabbaba") + assert.are.same(ret.ast.seq, {"a", "a", "b", "b", "a", "b", "a"}) + end) + end) + + describe("Kleene + tests", function() + local parser = hammer.many1(hammer.choice(hammer.ch("a"), hammer.ch("b"))) + it("does not parse an empty string", function() + local ret = parser:parse("") + assert.is_falsy(ret) + end) + it("parses a single repetition of the pattern", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + ret = parser:parse("b") + assert.are.same(ret.ast.seq, {"b"}) + end) + it("parses multiple repetitions of the pattern", function() + local ret = parser:parse("aabbaba") + assert.are.same(ret.ast.seq, {"a", "a", "b", "b", "a", "b", "a"}) + end) + it("does not parse a string that does not start with one of the patterns to repeat", function() + local ret = parser:parse("daabbabadef") + assert.is_falsy(ret) + end) + end) + + describe("Fixed-number-of-repetitions tests", function() + local parser = hammer.repeat_n(hammer.choice(hammer.ch("a"), hammer.ch("b")), 2) + it("does not parse a string without enough repetitions", function() + local ret = parser:parse("adef") + assert.is_falsy(ret) + end) + it("parses a string containing the correct number of repetitions", function() + local ret = parser:parse("abdef") + assert.are.same(ret.ast.seq, {"a", "b"}) + end) + it("does not parse a string that does not start with a character in the repetition set", function() + local ret = parser:parse("dabdef") + assert.is_falsy(ret) + end) + end) + + describe("Kleene ? tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.optional(hammer.choice(hammer.ch("b"), hammer.ch("c"))), hammer.ch("d")) + it("parses a string containing either optional character", function() + local ret = parser:parse("abd") + assert.are.same(ret.ast.seq, {"a", "b", "d"}) + ret = parser:parse("acd") + assert.are.same(ret.ast.seq, {"a", "c", "d"}) + end) + it("parses a string missing one of the optional characters", function() + local ret = parser:parse("ad") + assert.are.same(ret.ast.seq, {"a", {}, "d"}) + end + it("does not parse a string containing a character not among the optional ones", function() + local ret = parser:parse("aed") + assert.is_falsy(ret) + end) + end) + + describe("'ignore' decorator tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.ignore(hammer.ch("b")), hammer.ch("c")) + it("parses a string containing the pattern to ignore, and leaves that pattern out of the result", function() + local ret = parser:parse("abc") + assert.are.same(ret.ast.seq, {"a", "c"}) + end) + it("does not parse a string not containing the pattern to ignore", function() + local ret = parser:parse("ac") + assert.is_falsy(ret) + end) + end) + + describe("Possibly-empty separated lists", function() + local parser = hammer.sepBy(hammer.choice(hammer.ch("1"), hammer.ch("2"), hammer.ch("3")), hammer.ch(",")) + it("parses an ordered list", function() + local ret = parser:parse("1,2,3") + assert.are.same(ret.ast.seq, {"1", "2", "3"}) + end) + it("parses an unordered list", function() + local ret = parser:parse("1,3,2") + assert.are.same(ret.ast.seq, {"1", "3", "2"}) + end) + it("parses a list not containing all options", function() + local ret = parser:parse("1,3") + assert.are.same(ret.ast.seq, {"1", "3"}) + end) + it("parses a unary list", function() + local ret = parser:parse("3") + assert.are.same(ret.ast.seq, {"3"}) + end) + it("parses an empty list", function() + local ret = parser:parse("") + assert.are.same(ret.ast.seq, {}) + end) + end) + + describe("Non-empty separated lists", function() + local parser = hammer.sepBy1(hammer.choice(hammer.ch("1"), hammer.ch("2"), hammer.ch("3")), hammer.ch(",")) + it("parses an ordered list", function() + local ret = parser:parse("1,2,3") + assert.are.same(ret.ast.seq, {"1", "2", "3"}) + end) + it("parses an unordered list", function() + local ret = parser:parse("1,3,2") + assert.are.same(ret.ast.seq, {"1", "3", "2"}) + end) + it("parses a list not containing all options", function() + local ret = parser:parse("1,3") + assert.are.same(ret.ast.seq, {"1", "3"}) + end) + it("parses a unary list", function() + local ret = parser:parse("3") + assert.are.same(ret.ast.seq, {"3"}) + end) + it("does not parse an empty list", function() + local ret = parser:parse("") + assert.is_falsy(ret) + end) + end) + + describe("Empty string tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.epsilon_p(), hammer.ch("b")) + local parser2 = hammer.sequence(hammer.epsilon_p(), hammer.ch("a")) + local parser3 = hammer.sequence(hammer.ch("a"), hammer.epsilon_p()) + it("parses an empty string between two characters", function() + local ret = parser:parse("ab") + assert.are.same(ret.ast.seq, {"a", "b"}) + end) + it("parses an empty string before a character", function() + local ret = parser2:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + end) + it("parses a ", function() + local ret = parser3:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + end) + end) + + describe("Attribute validation tests", function() + local function equals(result, user_data) + return result.ast.seq.elements[0].uint == result.ast.seq.elements[1].uint + end + local parser = hammer.attr_bool(hammer.many1(hammer.choice(hammer.ch("a"), hammer.ch("b"))), equals) + it("parses successfully when both characters are the same (i.e., the validation function succeeds)", function() + local ret = parser:parse("aa") + assert.are.same(ret.ast.seq, {"a", "a"}) + ret = parser:parse("bb") + assert.are.same(ret.ast.seq, {"b", "b"}) + end) + it("does not parse successfully when the characters are different (i.e., the validation function fails)", function() + local ret = parser:parse("ab") + assert.is_falsy(ret) + end) + end) + + describe("Matching lookahead tests", function() + local parser = hammer.sequence(hammer.and(hammer.ch("0")), hammer.ch("0")) + local parser2 = hammer.sequence(hammer.and(hammer.ch("0")), hammer.ch("1")) + local parser3 = hammer.sequence(hammer.ch("1"), hammer.and(hammer.ch("2"))) + it("parses successfully when the lookahead matches the next character to parse", function() + local ret = parser:parse("0") + assert.are.same(ret.ast.seq, {"0"}) + end) + it("does not parse successfully when the lookahead does not match the next character to parse", function() + local ret = parser2:parse("0") + assert.is_falsy(ret) + end) + it("parses successfully when the lookahead is there", function() + local ret = parser3:parse("12") + assert.are.same(ret.ast.seq, {"1"}) + end) + end) + + describe("Non-matching lookahead tests", function() + local parser = hammer.sequence(hammer.ch("a"), hammer.choice(hammer.ch("+"), hammer.token("++")), hammer.ch("b")) + local parser2 = hammer.sequence(hammer.ch("a"), hammer.choice(hammer.sequence(hammer.ch("+"), hammer.not(hammer.ch("+"))), hammer.token("++")), hammer.ch("b")) + it("parses a single plus correctly in the 'choice' example", function() + local ret = parser:parse("a+b") + assert.are.same(ret.ast.seq, {"a", "+", "b"}) + end) + it("does not parse a double plus correctly in the 'choice' example", function() + local ret = parser:parse("a++b") + assert.is_falsy(ret) + end) + it("parses a single plus correctly in the 'not' example", function() + local ret = parser2:parse("a+b") + assert.are.same(ret.ast.seq, {"a", {"+"}, "b"}) + end) + it("parses a double plus correctly in the 'not' example", function() + local ret = parser2:parse("a++b") + assert.are.same(ret.ast.seq, {"a", "++", "b"}) + end) + end) + + describe("Left recursion tests", function() + local parser = hammer.indirect() + hammer.bind_indirect(parser, hammer.choice(hammer.sequence(parser, hammer.ch("a")), hammer.ch("a"))) + it("parses the base case", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + end) + it("parses one level of recursion", function() + local ret = parser:parse("aa") + assert.are.same(ret.ast.seq, {"a", "a"}) + end) + it("parses two levels of recursion", function() + local ret = parser:parse("aaa") + assert.are.same(ret.ast.seq, {{"a", "a"}, "a"}) + end) + end) + + describe("Right recursion tests", function() + local parser = hammer.indirect() + hammer.bind_indirect(parser, hammer.choice(hammer.sequence(hammer.ch("a"), parser), hammer.epsilon_p())) + it("parses the base case", function() + local ret = parser:parse("a") + assert.are.same(ret.ast.seq, {"a"}) + end) + it("parses one level of recursion", function() + local ret = parser:parse("aa") + assert.are.same(ret.ast.seq, {"a", {"a"}}) + end) + it("parses two levels of recursion", function() + local ret = parser:parse("aaa") + assert.are.same(ret.ast.seq, {"a", {"a", {"a"}}}) + end) + end) + + describe("Endianness tests", function() + local bit = require("bit") + local u32 = hammer.uint32() + local u5 = hammer.bits(5, false) + local bb = bit.bor(BYTE_BIG_ENDIAN, BIT_BIG_ENDIAN) + local bl = bit.bor(BYTE_BIG_ENDIAN, BIT_LITTLE_ENDIAN) + local lb = bit.bor(BYTE_LITTLE_ENDIAN, BIT_BIG_ENDIAN) + local ll = bit.bor(BYTE_LITTLE_ENDIAN, BIT_LITTLE_ENDIAN) + local parser1 = hammer.with_endianness(bb, u32) + local parser2 = hammer.with_endianness(bb, u5) + local parser3 = hammer.with_endianness(ll, u32) + local parser4 = hammer.with_endianness(ll, u5) + local parser5 = hammer.with_endianness(bl, u32) + local parser6 = hammer.with_endianness(bl, u5) + local parser7 = hammer.with_endianness(lb, u32) + local parser8 = hammer.with_endianness(lb, u5) + it("parses big-endian cases", function() + local ret = parser1:parse("abcd") + assert.are.same(ret.ast.uint, 0x61626364) + ret = parser2:parse("abcd") + assert.are.same(ret.ast.uint, 0xc) + end) + it("parses little-endian cases", function() + local ret = parser3:parse("abcd") + assert.are.same(ret.ast.uint, 0x61626364) + ret = parser4:parse("abcd") + assert.are.same(ret.ast.uint, 0xc) + end) + it("parses mixed-endian cases", function() + local ret = parser5:parse("abcd") + assert.are.same(ret.ast.uint, 0x61626364) + ret = parser6:parse("abcd") + assert.are.same(ret.ast.uint, 0x1) + ret = parser7:parse("abcd") + assert.are.same(ret.ast.uint, 0x64636261) + ret = parser8:parse("abcd") + assert.are.same(ret.ast.uint, 0xc) + end) + end) + + describe("Symbol table tests", function() + local parser = hammer.sequence(hammer.put_value(hammer.uint8(),"size"), hammer.token("foo"), hammer.length_value(hammer.get_value("size"), hammer.uint8())) + it("parses a string that has enough bytes for the specified length", function() + local ret = parser:parse(string.char(0x06) .. "fooabcdef") + assert.are.same(ret.ast.seq.elements[1].bytes, "foo") + assert.are.same(ret.ast.seq.elements[2].seq, {0x61, 0x62, 0x63, 0x64, 0x65, 0x66}) + end) + it("does not parse a string that does not have enough bytes for the specified length", function() + local ret = parser:parse() + assert.is_falsy(ret) + end) + end) + + describe("Permutation tests", function() + local parser = hammer.permutation(hammer.ch("a"), hammer.ch("b"), hammer.ch("c")) + it("parses a permutation of 'abc'", function() + local ret = parser:parse("abc") + assert.are.same(ret.ast.seq, {"a", "b", "c"}) + ret = parser:parse("acb") + assert.are.same(ret.ast.seq, {"a", "c", "b"}) + ret = parser:parse("bac") + assert.are.same(ret.ast.seq, {"b", "a", "c"}) + ret = parser:parse("bca") + assert.are.same(ret.ast.seq, {"b", "c", "a"}) + ret = parser:parse("cab") + assert.are.same(ret.ast.seq, {"c", "a", "b"}) + ret = parser:parse("cba") + assert.are.same(ret.ast.seq, {"c", "b", "a"}) + end) + it("does not parse a string that is not a permutation of 'abc'", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + ret = parser:parse("ab") + assert.is_falsy(ret) + ret = parser:parse("abb") + assert.is_falsy(ret) + end) + parser = hammer.permutation(hammer.ch("a"), hammer.ch("b"), hammer.optional(hammer.ch("c"))) + it("parses a string that is a permutation of 'ab[c]'", function() + local ret = parser:parse("abc") + assert.are.same(ret.ast.seq, {"a", "b", "c"}) + ret = parser:parse("acb") + assert.are.same(ret.ast.seq, {"a", "c", "b"}) + ret = parser:parse("bac") + assert.are.same(ret.ast.seq, {"b", "a", "c"}) + ret = parser:parse("bca") + assert.are.same(ret.ast.seq, {"b", "c", "a"}) + ret = parser:parse("cab") + assert.are.same(ret.ast.seq, {"c", "a", "b"}) + ret = parser:parse("cba") + assert.are.same(ret.ast.seq, {"c", "b", "a"}) + ret = parser:parse("ab") + assert.are.same(ret.ast.seq, {"a", "b"}) + ret = parser:parse("ba") + assert.are.same(ret.ast.seq, {"b", "a"}) + end) + it("does not parse a string that is not a permutation of 'ab[c]'", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + ret = parser:parse("b") + assert.is_falsy(ret) + ret = parser:parse("c") + assert.is_falsy(ret) + ret = parser:parse("ca") + assert.is_falsy(ret) + ret = parser:parse("cb") + assert.is_falsy(ret) + ret = parser:parse("cc") + assert.is_falsy(ret) + ret = parser:parse("ccab") + assert.is_falsy(ret) + ret = parser:parse("ccc") + assert.is_falsy(ret) + end) + parser = hammer.permutation(hammer.optional(hammer.ch("c")), hammer.ch("a"), hammer.ch("b")) + it("parses a string that is a permutation of '[c]ab'", function() + local ret = parser:parse("abc") + assert.are.same(ret.ast.seq, {"a", "b", "c"}) + ret = parser:parse("acb") + assert.are.same(ret.ast.seq, {"a", "c", "b"}) + ret = parser:parse("bac") + assert.are.same(ret.ast.seq, {"b", "a", "c"}) + ret = parser:parse("bca") + assert.are.same(ret.ast.seq, {"b", "c", "a"}) + ret = parser:parse("cab") + assert.are.same(ret.ast.seq, {"c", "a", "b"}) + ret = parser:parse("cba") + assert.are.same(ret.ast.seq, {"c", "b", "a"}) + ret = parser:parse("ab") + assert.are.same(ret.ast.seq, {"a", "b"}) + ret = parser:parse("ba") + assert.are.same(ret.ast.seq, {"b", "a"}) + end) + it("does not parse a string that is not a permutation of '[c]ab'", function() + local ret = parser:parse("a") + assert.is_falsy(ret) + ret = parser:parse("b") + assert.is_falsy(ret) + ret = parser:parse("c") + assert.is_falsy(ret) + ret = parser:parse("ca") + assert.is_falsy(ret) + ret = parser:parse("cb") + assert.is_falsy(ret) + ret = parser:parse("cc") + assert.is_falsy(ret) + ret = parser:parse("ccab") + assert.is_falsy(ret) + ret = parser:parse("ccc") + assert.is_falsy(ret) + end) + end) + + -- describe("Monadic binding tests", function() + -- local function continuation(allocator, result, env) + -- local val = 0 + -- for k, v in result.seq.elements + -- do val = val*10 + v->uint - 48 + -- end + -- if val > 26 then + -- return nil + -- else + -- return hammer.ch + -- end + -- end + -- local parser = hammer.bind(hammer.many1(hammer.ch_range("0", "9")), continuation, "a") + -- it("parses a ", function() + -- local ret = parser:parse() + -- assert.are.same(ret.ast., ) + -- end) + -- it("does not parse a ", function() + -- local ret = parser:parse() + -- assert.is_falsy(ret) + -- end) + -- end) +end) \ No newline at end of file