diff --git a/src/hammer.c b/src/hammer.c index 6f60c959dbb965c3d62e0fbef446e2062a9cd2e7..178eeb7267742448175d5450f04e7d138d70f40a 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -759,15 +759,31 @@ parser_t* indirect() { } typedef struct { + const parser_t *p; predicate_t pred; } attr_bool_t; static parse_result_t* parse_attr_bool(void *env, parse_state_t *state) { - - + attr_bool_t *a = (attr_bool_t*)env; + parse_result_t *res = do_parse(a->p, state); + if (res) { + if (a->pred(res)) + return res; + else + return NULL; + } else + return NULL; } -const parser_t* attr_bool(const parser_t* p, attr_bool_t a) { return &unimplemented; } +const parser_t* attr_bool(const parser_t* p, predicate_t pred) { + parser_t *res = g_new(parser_t, 1); + res->fn = parse_attr_bool; + attr_bool_t *env = g_new(attr_bool_t, 1); + env->p = p; + env->pred = pred; + res->env = (void*)env; + return res; +} const parser_t* and(const parser_t* p) { return &unimplemented; } diff --git a/src/hammer.h b/src/hammer.h index 915f7ac35760324404316a3e06bdd83481aa3000..c16dd49eb33bb1bcb437e91eb9579e8a6eaeac0d 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -74,164 +74,271 @@ typedef struct parse_result { arena_t arena; } parse_result_t; -/* Type of an action to apply to an AST, used in the action() parser. */ +/** + * Type of an action to apply to an AST, used in the action() parser. + */ typedef parse_result_t* (*action_t)(parse_result_t *p); -/* Type of a boolean attribute-checking function, used in the attr_bool() parser. */ -typedef int (*predicate_t)(parse_result_t *p); +/** + * Type of a boolean attribute-checking function, used in the + * attr_bool() parser. It can be any (user-defined) function that takes + * a parse_result_t and returns true or false. + */ +typedef bool (*predicate_t)(parse_result_t *p); typedef struct parser { parse_result_t* (*fn)(void *env, parse_state_t *state); void *env; } parser_t; +/** + * Top-level function to call a parser that has been built over some + * piece of input (of known size). + */ parse_result_t* parse(const parser_t* parser, const uint8_t* input, size_t length); -/* Given a string, returns a parser that parses that string value. */ +/** + * Given a string, returns a parser that parses that string value. + */ const parser_t* token(const uint8_t *str, const size_t len); -/* Given a single character, returns a parser that parses that character. */ +/** + * Given a single character, returns a parser that parses that + * character. + */ const parser_t* ch(const uint8_t c); -/* Given two single-character bounds, lower and upper, returns a parser that parses a single character within the range [lower, upper] (inclusive). */ +/** + * Given two single-character bounds, lower and upper, returns a parser + * that parses a single character within the range [lower, upper] + * (inclusive). + */ const parser_t* range(const uint8_t lower, const uint8_t upper); -/* Returns a parser that parses the specified number of bits. sign == true if signed, false if unsigned. */ +/** + * Returns a parser that parses the specified number of bits. sign == + * true if signed, false if unsigned. + */ const parser_t* bits(size_t len, bool sign); -/* Returns a parser that parses a signed 8-byte integer value. */ +/** + * Returns a parser that parses a signed 8-byte integer value. + */ const parser_t* int64(); -/* Returns a parser that parses a signed 4-byte integer value. */ +/** + * Returns a parser that parses a signed 4-byte integer value. + */ const parser_t* int32(); -/* Returns a parser that parses a signed 2-byte integer value. */ +/** + * Returns a parser that parses a signed 2-byte integer value. + */ const parser_t* int16(); -/* Returns a parser that parses a signed 1-byte integer value. */ +/** + * Returns a parser that parses a signed 1-byte integer value. + */ const parser_t* int8(); -/* Returns a parser that parses an unsigned 8-byte integer value. */ +/** + * Returns a parser that parses an unsigned 8-byte integer value. + */ const parser_t* uint64(); -/* Returns a parser that parses an unsigned 4-byte integer value. */ +/** + * Returns a parser that parses an unsigned 4-byte integer value. + */ const parser_t* uint32(); -/* Returns a parser that parses an unsigned 2-byte integer value. */ +/** + * Returns a parser that parses an unsigned 2-byte integer value. + */ const parser_t* uint16(); -/* Returns a parser that parses an unsigned 1-byte integer value. */ +/** + * Returns a parser that parses an unsigned 1-byte integer value. + */ const parser_t* uint8(); -/* Returns a parser that parses a double-precision floating-point value. */ +/** + * Returns a parser that parses a double-precision floating-point + * value. + */ const parser_t* float64(); -/* Returns a parser that parses a single-precision floating-point value. */ +/** + * Returns a parser that parses a single-precision floating-point + * value. + */ const parser_t* float32(); -/* Given another parser, p, returns a parser that skips any whitespace and then applies p. */ +/** + * Given another parser, p, returns a parser that skips any whitespace + * and then applies p. + */ const parser_t* whitespace(const parser_t* p); -/* Given another parser, p, and a function f, returns a parser that applies p, then applies f to everything in the AST of p's result. */ +/** + * Given another parser, p, and a function f, returns a parser that + * applies p, then applies f to everything in the AST of p's result. + */ const parser_t* action(const parser_t* p, const action_t a); -/* Parse a single character *NOT* in charset */ +/** + * Parse a single character *NOT* in the given charset. + */ const parser_t* not_in(const uint8_t *charset, int length); -/* A no-argument parser that succeeds if there is no more input to parse. */ +/** + * A no-argument parser that succeeds if there is no more input to + * parse. + */ const parser_t* end_p(); -/* This parser always fails. */ +/** + * This parser always fails. + */ const parser_t* nothing_p(); -/* Given an null-terminated list of parsers, apply each parser in order. The parse succeeds only if all parsers succeed. */ +/** + * Given a null-terminated list of parsers, apply each parser in order. + * The parse succeeds only if all parsers succeed. + */ const parser_t* sequence(const parser_t* p, ...) __attribute__((sentinel)); -/* Given an array of parsers, p_array, apply each parser in order. The first parser to succeed is the result; if no parsers succeed, the parse fails. */ +/** + * Given an array of parsers, p_array, apply each parser in order. The + * first parser to succeed is the result; if no parsers succeed, the + * parse fails. + */ const parser_t* choice(const parser_t* p, ...) __attribute__((sentinel)); -/* Given two parsers, p1 and p2, this parser succeeds in the following cases: +/** + * Given two parsers, p1 and p2, this parser succeeds in the following + * cases: * - if p1 succeeds and p2 fails * - if both succeed but p1's result is as long as or shorter than p2's */ const parser_t* butnot(const parser_t* p1, const parser_t* p2); -/* Given two parsers, p1 and p2, this parser succeeds in the following cases: +/** + * Given two parsers, p1 and p2, this parser succeeds in the following + * cases: * - if p1 succeeds and p2 fails * - if both succeed but p2's result is shorter than p1's */ const parser_t* difference(const parser_t* p1, const parser_t* p2); -/* Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not if they both do. +/** + * Given two parsers, p1 and p2, this parser succeeds if *either* p1 or + * p2 succeed, but not if they both do. */ const parser_t* xor(const parser_t* p1, const parser_t* p2); -/* Given a parser, p, this parser succeeds for zero or more repetitions of p. */ +/** + * Given a parser, p, this parser succeeds for zero or more repetitions + * of p. + */ const parser_t* many(const parser_t* p); -/* Given a parser, p, this parser succeeds for one or more repetitions of p. */ +/** + * Given a parser, p, this parser succeeds for one or more repetitions + * of p. + */ const parser_t* many1(const parser_t* p); -/* Given a parser, p, this parser succeeds for exactly N repetitions of p. */ +/** + * Given a parser, p, this parser succeeds for exactly N repetitions + * of p. + */ const parser_t* repeat_n(const parser_t* p, const size_t n); -/* Given a parser, p, this parser succeeds with the value p parsed or with an empty result. */ +/** + * Given a parser, p, this parser succeeds with the value p parsed or + * with an empty result. + */ const parser_t* optional(const parser_t* p); -/* Given a parser, p, this parser succeeds if p succeeds, but doesn't include p's result in the result. */ +/** + * Given a parser, p, this parser succeeds if p succeeds, but doesn't + * include p's result in the result. + */ const parser_t* ignore(const parser_t* p); -/* Given a parser, p, and a parser for a separator, sep, this parser matches a (possibly empty) list of things that p can parse, separated by sep. - * For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy(p, sep) will match a comma-separated list of integers. +/** + * Given a parser, p, and a parser for a separator, sep, this parser + * matches a (possibly empty) list of things that p can parse, + * separated by sep. + * For example, if p is repeat1(range('0','9')) and sep is ch(','), + * sepBy(p, sep) will match a comma-separated list of integers. */ const parser_t* sepBy(const parser_t* p, const parser_t* sep); -/* Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. +/** + * Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. * For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will match a comma-separated list of integers. */ const parser_t* sepBy1(const parser_t* p, const parser_t* sep); -/* This parser always returns a zero length match, i.e., empty string. */ +/** + * This parser always returns a zero length match, i.e., empty string. + */ const parser_t* epsilon_p(); -/* This parser attaches an attribute function, which returns true or false, to a parser. The function is evaluated over the parser's result AST. +/** + * This parser attaches a predicate function, which returns true or + * false, to a parser. The function is evaluated over the parser's + * result. * The parse only succeeds if the attribute function returns true. */ -const parser_t* attr_bool(const parser_t* p, const attr_bool_t a); +const parser_t* attr_bool(const parser_t* p, predicate_t pred); -/* The 'and' parser is a predicate. It asserts that a conditional syntax is satisfied, but consumes no input. +/** + * The 'and' parser is a predicate. It asserts that a conditional + * syntax is satisfied, but consumes no input. * This is useful for lookahead. As an example: * - * Suppose you already have a parser, hex_p, that parses numbers in hexadecimal format (including the leading '0x'). Then + * Suppose you already have a parser, hex_p, that parses numbers in + * hexadecimal format (including the leading '0x'). Then * sequence(and(token((const uint8_t*)"0x", 2)), hex_p) - * checks to see whether there is a leading "0x", *does not* consume the "0x", and then applies hex_p to parse the hex-formatted number. + * checks to see whether there is a leading "0x", *does not* consume + * the "0x", and then applies hex_p to parse the hex-formatted number. * - * 'and' succeeds if p succeeds, and fails if p fails. Like 'ignore', 'and' does not attach a result to the AST. + * 'and' succeeds if p succeeds, and fails if p fails. Like 'ignore', + * 'and' does not attach a result to the AST. */ const parser_t* and(const parser_t* p); -/* The 'not' parser is a predicate. It asserts that a conditional syntax is *not* satisfied, and consumes no input. +/** + * The 'not' parser is a predicate. It asserts that a conditional + * syntax is *not* satisfied, and consumes no input. * As a somewhat contrived example: * * Since 'choice' applies its arguments in order, the following parser: * sequence(ch('a'), choice(ch('+'), token((const uint8_t*)"++"), NULL), ch('b'), NULL) - * will not parse "a++b", because once choice() has succeeded, it will not backtrack and try other alternatives if a later parser in the sequence - * fails. - * Instead, you can force the use of the second alternative by turning the ch('+') alternative into a sequence with not: + * will not parse "a++b", because once choice() has succeeded, it will + * not backtrack and try other alternatives if a later parser in the + * sequence fails. + * Instead, you can force the use of the second alternative by turning + * the ch('+') alternative into a sequence with not: * sequence(ch('a'), choice(sequence(ch('+'), not(ch('+')), NULL), token((const uint8_t*)"++")), ch('b'), NULL) - * If the input string is "a+b", the first alternative is applied; if the input string is "a++b", the second alternative is applied. + * If the input string is "a+b", the first alternative is applied; if + * the input string is "a++b", the second alternative is applied. */ const parser_t* not(const parser_t* p); /** - * Create a parser that just calls out to another, as yet unknown, parser. + * Create a parser that just calls out to another, as yet unknown, + * parser. * Note that the inner parser gets bound later, with bind_indirect. * This can be used to create recursive parsers. */ parser_t *indirect(); /** - * Set the inner parser of an indirect. See comments on indirect for details. + * Set the inner parser of an indirect. See comments on indirect for + * details. */ void bind_indirect(parser_t* indirect, parser_t* inner);