diff --git a/.gitignore b/.gitignore index bc16b561ac2314599ede9a86b12d2f6d2129dada..99d0928add6ce226c8f45b4846b0b29d1deef8a3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,15 @@ *.o *~ *.a +*.class +*.so +jni/com*.h src/test_suite lib/hush examples/dns examples/base64 +examples/base64_sem1 +examples/base64_sem2 TAGS *.swp *.swo diff --git a/Makefile b/Makefile index fbd96c4fdf9866b28d806cb2bbcdfbb00b026e93..ef5be76a59520fdc44348e426bca421708a02262 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ # and kick off a recursive make # Also, "make src/all" turns into "make -C src all" -SUBDIRS = src examples +SUBDIRS = src examples jni include config.mk diff --git a/README.md b/README.md index d88a153b3f5183b56a13c30e2b98917dab29836f..e008b12d880576e6a210b277362170b916022773 100644 --- a/README.md +++ b/README.md @@ -48,3 +48,11 @@ Examples The `examples/` directory contains some simple examples, currently including: * base64 * DNS + +Community +========= +Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing. + +Contact +======= +You can also email us at <hammer@upstandinghackers.com>. diff --git a/common.mk b/common.mk index 143a0f40959a7474b578214f2815d40bd59dff33..66e48f05a1a5e3068962715aef0d1b5099e81f47 100644 --- a/common.mk +++ b/common.mk @@ -8,7 +8,7 @@ include $(TOPLEVEL)/config.mk TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) -lrt -CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes +CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -g LDFLAGS := CC ?= gcc diff --git a/examples/Makefile b/examples/Makefile index 6a054cad515f9e697fb5d1a954824e8a00cfe259..663a2144030018138de364b9392377720a84c359 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -2,7 +2,11 @@ OUTPUTS := dns.o \ dns \ base64.o \ - base64 + base64 \ + base64_sem1.o \ + base64_sem1 \ + base64_sem2.o \ + base64_sem2 TOPLEVEL := ../ @@ -12,20 +16,26 @@ LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns base64 +all: dns base64 base64_sem1 base64_sem2 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -dns.o: ../src/hammer.h dns_common.h - -rr.o: ../src/hammer.h rr.h dns_common.h - -dns_common.o: ../src/hammer.h dns_common.h +dns.o: ../src/hammer.h dns_common.h ../src/glue.h +rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h +dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) base64: base64.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -base64.o: ../src/hammer.h +base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem1: base64_sem1.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem2: base64_sem2.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64%.o: ../src/hammer.h ../src/glue.h diff --git a/examples/base64.c b/examples/base64.c index beb2484f9dcbbf2add81c2295572731ca06fa487..ee142e3cbfee5fa7bf2032d4f07882581916d4f7 100644 --- a/examples/base64.c +++ b/examples/base64.c @@ -1,3 +1,13 @@ +// Example parser: Base64, syntax only. +// +// Demonstrates how to construct a Hammer parser that recognizes valid Base64 +// sequences. +// +// Note that no semantic evaluation of the sequence is performed, i.e. the +// byte sequence being represented is not returned, or determined. See +// base64_sem1.c and base64_sem2.c for examples how to attach appropriate +// semantic actions to the grammar. + #include "../src/hammer.h" const HParser* document = NULL; @@ -14,18 +24,17 @@ void init_parser(void) const HParser *equals = h_ch('='); const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); - const HParser *bsfdig_4bit = h_choice( - h_ch('A'), h_ch('E'), h_ch('I'), h_ch('M'), h_ch('Q'), h_ch('U'), - h_ch('Y'), h_ch('c'), h_ch('g'), h_ch('k'), h_ch('o'), h_ch('s'), - h_ch('w'), h_ch('0'), h_ch('4'), h_ch('8'), NULL); - const HParser *bsfdig_2bit = h_choice(h_ch('A'), h_ch('Q'), h_ch('g'), h_ch('w'), NULL); + const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); + const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); + const HParser *base64_3 = h_repeat_n(bsfdig, 4); const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); - const HParser *base64 = h_choice(base64_2, base64_1, NULL); - // why does this parse "A=="?! - // why does this parse "aaA=" but not "aA=="?! + const HParser *base64 = h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL); - document = base64; + document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); } diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c new file mode 100644 index 0000000000000000000000000000000000000000..f2a3e82b3ef2ce60befd3ccc8d5570937ecd7166 --- /dev/null +++ b/examples/base64_sem1.c @@ -0,0 +1,172 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to grammar rules and piece by +// piece transform the parse tree into the desired semantic representation, +// in this case a sequence of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses fine-grained semantic actions that +// transform the parse tree in small steps in a bottom-up fashion. Compare +// base64_sem2.c for an alternative approach using a single top-level action. + +#include "../src/hammer.h" +#include "../src/glue.h" +#include <assert.h> + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_<rulename>. +/// + +const HParsedToken *act_bsfdig(const HParseResult *p) +{ + HParsedToken *res = H_MAKE_UINT(0); + + uint8_t c = H_CAST_UINT(p->ast); + + if(c >= 0x40 && c <= 0x5A) // A-Z + res->uint = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + res->uint = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + res->uint = c - 0x30 + 52; + else if(c == '+') + res->uint = 62; + else if(c == '/') + res->uint = 63; + + return res; +} + +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_bsfdig_4bit act_bsfdig +#define act_bsfdig_2bit act_bsfdig + +#define act_equals h_act_ignore +#define act_ws h_act_ignore + +#define act_document act_index0 + +// General-form action to turn a block of base64 digits into bytes. +const HParsedToken *act_base64_n(int n, const HParseResult *p) +{ + HParsedToken *res = H_MAKE_SEQN(n); + + HParsedToken **digits = h_seq_elements(p->ast); + + uint32_t x = 0; + int bits = 0; + for(int i=0; i<n+1; i++) { + x <<= 6; x |= digits[i]->uint; + bits += 6; + } + x >>= bits%8; // align, i.e. cut off extra bits + + for(int i=0; i<n; i++) { + HParsedToken *item = H_MAKE_UINT(x & 0xFF); + + res->seq->elements[n-1-i] = item; // output the last byte and + x >>= 8; // discard it + } + res->seq->used = n; + + return res; +} + +H_ACT_APPLY(act_base64_3, act_base64_n, 3); +H_ACT_APPLY(act_base64_2, act_base64_n, 2); +H_ACT_APPLY(act_base64_1, act_base64_n, 1); + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + HParsedToken *res = H_MAKE_SEQ(); + + // concatenate base64_3 blocks + HCountedArray *seq = H_FIELD_SEQ(0); + for(size_t i=0; i<seq->used; i++) + h_seq_append(res, seq->elements[i]); + + // append one trailing base64_2 or _1 block + const HParsedToken *tok = h_seq_index(p->ast, 1); + if(tok->token_type == TT_SEQUENCE) + h_seq_append(res, tok); + + return res; +} + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_ARULE(equals, h_ch('=')); + + H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); + H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include <stdio.h> + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c new file mode 100644 index 0000000000000000000000000000000000000000..32afe5bbc1ab74077f08311c1c9d47405060e3f7 --- /dev/null +++ b/examples/base64_sem2.c @@ -0,0 +1,176 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to a grammar and transform the +// parse tree into the desired semantic representation, in this case a sequence +// of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses coarse-grained semantic actions, +// transforming the entire parse tree in one big step. Compare base64_sem1.c +// for an alternative approach using a fine-grained piece-by-piece +// transformation. + +#include "../src/hammer.h" +#include "../src/glue.h" +#include <assert.h> + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_<rulename>. +/// + +// helper: return the numeric value of a parsed base64 digit +uint8_t bsfdig_value(const HParsedToken *p) +{ + uint8_t value = 0; + + if(p && p->token_type == TT_UINT) { + uint8_t c = p->uint; + if(c >= 0x40 && c <= 0x5A) // A-Z + value = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + value = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + value = c - 0x30 + 52; + else if(c == '+') + value = 62; + else if(c == '/') + value = 63; + } + + return value; +} + +// helper: append a byte value to a sequence +#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + // grab b64_3 block sequence + // grab and analyze b64 end block (_2 or _1) + const HParsedToken *b64_3 = p->ast->seq->elements[0]; + const HParsedToken *b64_2 = p->ast->seq->elements[1]; + const HParsedToken *b64_1 = p->ast->seq->elements[1]; + + if(b64_2->token_type != TT_SEQUENCE) + b64_1 = b64_2 = NULL; + else if(b64_2->seq->elements[2]->uint == '=') + b64_2 = NULL; + else + b64_1 = NULL; + + // allocate result sequence + HParsedToken *res = H_MAKE_SEQ(); + + // concatenate base64_3 blocks + for(size_t i=0; i<b64_3->seq->used; i++) { + assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE); + HParsedToken **digits = b64_3->seq->elements[i]->seq->elements; + + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + x <<= 6; x |= bsfdig_value(digits[3]); + seq_append_byte(res, (x >> 16) & 0xFF); + seq_append_byte(res, (x >> 8) & 0xFF); + seq_append_byte(res, x & 0xFF); + } + + // append one trailing base64_2 or _1 block + if(b64_2) { + HParsedToken **digits = b64_2->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + seq_append_byte(res, (x >> 10) & 0xFF); + seq_append_byte(res, (x >> 2) & 0xFF); + } else if(b64_1) { + HParsedToken **digits = b64_1->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + seq_append_byte(res, (x >> 4) & 0xFF); + } + + return res; +} + +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_ws h_act_ignore +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_RULE (equals, h_ch('=')); + + H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include <stdio.h> + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} diff --git a/examples/dns.c b/examples/dns.c index 54d9c7e33bf09dad413ca798703c97b5322e40f2..7887ba6a7881d41c21836effa5f3ebaffc986d56 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -10,7 +10,12 @@ #define false 0 #define true 1 -bool is_zero(HParseResult *p) { + +/// +// Validations +/// + +bool validate_hdzero(HParseResult *p) { if (TT_UINT != p->ast->token_type) return false; return (0 == p->ast->uint); @@ -20,408 +25,244 @@ bool is_zero(HParseResult *p) { * Every DNS message should have QDCOUNT entries in the question * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. */ -bool validate_dns(HParseResult *p) { +bool validate_message(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; - // The header holds the counts as its last 4 elements. - HParsedToken **elems = p->ast->seq->elements[0]->seq->elements; - size_t qd = elems[8]->uint; - size_t an = elems[9]->uint; - size_t ns = elems[10]->uint; - size_t ar = elems[11]->uint; - HParsedToken *questions = p->ast->seq->elements[1]; - if (questions->seq->used != qd) + + dns_header_t *header = H_FIELD(dns_header_t, 0); + size_t qd = header->question_count; + size_t an = header->answer_count; + size_t ns = header->authority_count; + size_t ar = header->additional_count; + + if (H_FIELD_SEQ(1)->used != qd) return false; - HParsedToken *rrs = p->ast->seq->elements[2]; - if (an+ns+ar != rrs->seq->used) + if (an+ns+ar != H_FIELD_SEQ(2)->used) return false; + return true; } -struct dns_qname get_qname(const HParsedToken *t) { - // The qname parser parses at least 1 length-value pair, then a NULL. - // So, t->seq->elements[0] is a sequence of at least 1 such pair, - // and t->seq->elements[1] is the null. - const HParsedToken *labels = t->seq->elements[0]; - struct dns_qname ret = { - .qlen = labels->seq->used, - .labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used) - }; - // i is which label we're on - for (size_t i=0; i<labels->seq->used; ++i) { - ret.labels[i].len = labels->seq->elements[i]->seq->used; - ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1); - // j is which char of the label we're on - for (size_t j=0; j<ret.labels[i].len; ++j) - ret.labels[i].label[j] = labels->seq->elements[i]->seq->elements[j]->uint; - ret.labels[i].label[ret.labels[i].len] = 0; + +/// +// Semantic Actions +/// + +// Helper: Parse and pack the RDATA field of a Resource Record. +void set_rdata(struct dns_rr *rr, HCountedArray *rdata) { + uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); + for (size_t i=0; i<rdata->used; ++i) + data[i] = H_CAST_UINT(rdata->elements[i]); + + // Parse RDATA if possible. + const HParseResult *p = NULL; + const HParser *parser = init_rdata(rr->type); + if (parser) + p = h_parse(parser, (const uint8_t*)data, rdata->used); + + // If the RR doesn't parse, set its type to 0. + if (!p) + rr->type = 0; + + // Pack the parsed rdata into rr. + switch(rr->type) { + case 1: rr->a = H_CAST_UINT(p->ast); break; + case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break; + case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break; + case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break; + case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break; + case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break; + case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break; + case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break; + case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break; + case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break; + case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break; + case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break; + case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break; + case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break; + default: break; } - return ret; } -char* get_domain(const HParsedToken *t) { - switch(t->token_type) { - case TT_UINT: - return " "; - case TT_SEQUENCE: - { - // Sequence of subdomains separated by "." - // Each subdomain is a label, which can be no more than 63 chars. - char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used); - size_t count = 0; - for (size_t i=0; i<t->seq->used; ++i) { - HParsedToken *tmp = t->seq->elements[i]; - for (size_t j=0; j<tmp->seq->used; ++j) { - ret[count] = tmp->seq->elements[i]->uint; - ++count; - } - ret[count] = '.'; - ++count; - } - ret[count-1] = '\x00'; - return ret; - } - default: - return NULL; - } +const HParsedToken* act_header(const HParseResult *p) { + HParsedToken **fields = h_seq_elements(p->ast); + dns_header_t header_ = { + .id = H_CAST_UINT(fields[0]), + .qr = H_CAST_UINT(fields[1]), + .opcode = H_CAST_UINT(fields[2]), + .aa = H_CAST_UINT(fields[3]), + .tc = H_CAST_UINT(fields[4]), + .rd = H_CAST_UINT(fields[5]), + .ra = H_CAST_UINT(fields[6]), + .rcode = H_CAST_UINT(fields[7]), + .question_count = H_CAST_UINT(fields[8]), + .answer_count = H_CAST_UINT(fields[9]), + .authority_count = H_CAST_UINT(fields[10]), + .additional_count = H_CAST_UINT(fields[11]) + }; + + dns_header_t *header = H_ALLOC(dns_header_t); + *header = header_; + + return H_MAKE(dns_header_t, header); } -uint8_t* get_cs(const HCountedArray *arr) { - uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); - for (size_t i=0; i<arr->used; ++i) - ret[i] = arr->elements[i]->uint; - return ret; +const HParsedToken* act_label(const HParseResult *p) { + dns_label_t *r = H_ALLOC(dns_label_t); + + r->len = h_seq_len(p->ast); + r->label = h_arena_malloc(p->arena, r->len + 1); + for (size_t i=0; i<r->len; ++i) + r->label[i] = H_FIELD_UINT(i); + r->label[r->len] = 0; + + return H_MAKE(dns_label_t, r); } -uint8_t** get_txt(const HCountedArray *arr) { - uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); - for (size_t i=0; i<arr->used; ++i) { - uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); - for (size_t j=0; j<arr->elements[i]->seq->used; ++j) - tmp[j] = arr->elements[i]->seq->elements[j]->uint; - } - return ret; +const HParsedToken* act_rr(const HParseResult *p) { + dns_rr_t *rr = H_ALLOC(dns_rr_t); + + rr->name = *H_FIELD(dns_domain_t, 0); + rr->type = H_FIELD_UINT(1); + rr->class = H_FIELD_UINT(2); + rr->ttl = H_FIELD_UINT(3); + rr->rdlength = H_FIELD_SEQ(4)->used; + + // Parse and pack RDATA. + set_rdata(rr, H_FIELD_SEQ(4)); + + return H_MAKE(dns_rr_t, rr); } -void set_rr(struct dns_rr rr, HCountedArray *rdata) { - uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); - for (size_t i=0; i<rdata->used; ++i) - data[i] = rdata->elements[i]->uint; +const HParsedToken* act_question(const HParseResult *p) { + dns_question_t *q = H_ALLOC(dns_question_t); + HParsedToken **fields = h_seq_elements(p->ast); - // If the RR doesn't parse, set its type to 0. - switch(rr.type) { - case 1: // A - { - const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.a = r->ast->seq->elements[0]->uint; - break; - } - case 2: // NS - { - const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ns = get_domain(r->ast->seq->elements[0]); - break; - } - case 3: // MD - { - const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } - case 4: // MF - { - const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } - case 5: // CNAME - { - const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.cname = get_domain(r->ast->seq->elements[0]); - break; - } - case 6: // SOA - { - const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.soa.mname = get_domain(r->ast->seq->elements[0]); - rr.soa.rname = get_domain(r->ast->seq->elements[1]); - rr.soa.serial = r->ast->seq->elements[2]->uint; - rr.soa.refresh = r->ast->seq->elements[3]->uint; - rr.soa.retry = r->ast->seq->elements[4]->uint; - rr.soa.expire = r->ast->seq->elements[5]->uint; - rr.soa.minimum = r->ast->seq->elements[6]->uint; - } - break; - } - case 7: // MB - { - const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mb = get_domain(r->ast->seq->elements[0]); - break; - } - case 8: // MG - { - const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mg = get_domain(r->ast->seq->elements[0]); - break; - } - case 9: // MR - { - const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mr = get_domain(r->ast->seq->elements[0]); - break; - } - case 10: // NULL - { - const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used); - for (size_t i=0; i<r->ast->seq->used; ++i) - rr.null[i] = r->ast->seq->elements[i]->uint; - } - break; - } - case 11: // WKS - { - const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.wks.address = r->ast->seq->elements[0]->uint; - rr.wks.protocol = r->ast->seq->elements[1]->uint; - rr.wks.len = r->ast->seq->elements[2]->seq->used; - rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used); - for (size_t i=0; i<rr.wks.len; ++i) - rr.wks.bit_map[i] = r->ast->seq->elements[2]->seq->elements[i]->uint; - } - break; - } - case 12: // PTR - { - const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ptr = get_domain(r->ast->seq->elements[0]); - break; - } - case 13: // HINFO - { - const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq); - rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq); - } - break; - } - case 14: // MINFO - { - const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]); - rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]); - } - break; - } - case 15: // MX - { - const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.mx.preference = r->ast->seq->elements[0]->uint; - rr.mx.exchange = get_domain(r->ast->seq->elements[1]); - } - break; - } - case 16: // TXT - { - const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.txt.count = r->ast->seq->elements[0]->seq->used; - rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq); - } - break; - } - default: - break; + // QNAME is a sequence of labels. Pack them into an array. + q->qname.qlen = h_seq_len(fields[0]); + q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); + for(size_t i=0; i<q->qname.qlen; i++) { + q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i); } + + q->qtype = H_CAST_UINT(fields[1]); + q->qclass = H_CAST_UINT(fields[2]); + + return H_MAKE(dns_question_t, q); } -const HParsedToken* pack_dns_struct(const HParseResult *p) { +const HParsedToken* act_message(const HParseResult *p) { h_pprint(stdout, p->ast, 0, 2); - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_USER; - - dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t)); - - HParsedToken *hdr = p->ast->seq->elements[0]; - struct dns_header header = { - .id = hdr->seq->elements[0]->uint, - .qr = hdr->seq->elements[1]->uint, - .opcode = hdr->seq->elements[2]->uint, - .aa = hdr->seq->elements[3]->uint, - .tc = hdr->seq->elements[4]->uint, - .rd = hdr->seq->elements[5]->uint, - .ra = hdr->seq->elements[6]->uint, - .rcode = hdr->seq->elements[7]->uint, - .question_count = hdr->seq->elements[8]->uint, - .answer_count = hdr->seq->elements[9]->uint, - .authority_count = hdr->seq->elements[10]->uint, - .additional_count = hdr->seq->elements[11]->uint - }; - msg->header = header; + dns_message_t *msg = H_ALLOC(dns_message_t); - HParsedToken *qs = p->ast->seq->elements[1]; + // Copy header into message struct. + dns_header_t *header = H_FIELD(dns_header_t, 0); + msg->header = *header; + + // Copy questions into message struct. + HParsedToken *qs = h_seq_index(p->ast, 1); struct dns_question *questions = h_arena_malloc(p->arena, - sizeof(struct dns_question)*(header.question_count)); - for (size_t i=0; i<header.question_count; ++i) { - // QNAME is a sequence of labels. In the parser, it's defined as - // sequence(many1(length_value(...)), ch('\x00'), NULL). - questions[i].qname = get_qname(qs->seq->elements[i]->seq->elements[0]); - questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint; - questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint; + sizeof(struct dns_question)*(header->question_count)); + for (size_t i=0; i<header->question_count; ++i) { + questions[i] = *H_INDEX(dns_question_t, qs, i); } msg->questions = questions; - HParsedToken *rrs = p->ast->seq->elements[2]; + // Copy answer RRs into message struct. + HParsedToken *rrs = h_seq_index(p->ast, 2); struct dns_rr *answers = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.answer_count)); - for (size_t i=0; i<header.answer_count; ++i) { - answers[i].name = get_domain(rrs[i].seq->elements[0]); - answers[i].type = rrs[i].seq->elements[1]->uint; - answers[i].class = rrs[i].seq->elements[2]->uint; - answers[i].ttl = rrs[i].seq->elements[3]->uint; - answers[i].rdlength = rrs[i].seq->elements[4]->seq->used; - set_rr(answers[i], rrs[i].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->answer_count)); + for (size_t i=0; i<header->answer_count; ++i) { + answers[i] = *H_INDEX(dns_rr_t, rrs, i); } msg->answers = answers; + // Copy authority RRs into message struct. struct dns_rr *authority = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.authority_count)); - for (size_t i=0, j=header.answer_count; i<header.authority_count; ++i, ++j) { - authority[i].name = get_domain(rrs[j].seq->elements[0]); - authority[i].type = rrs[j].seq->elements[1]->uint; - authority[i].class = rrs[j].seq->elements[2]->uint; - authority[i].ttl = rrs[j].seq->elements[3]->uint; - authority[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(authority[i], rrs[j].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->authority_count)); + for (size_t i=0, j=header->answer_count; i<header->authority_count; ++i, ++j) { + authority[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->authority = authority; + // Copy additional RRs into message struct. struct dns_rr *additional = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.additional_count)); - for (size_t i=0, j=header.answer_count+header.authority_count; i<header.additional_count; ++i, ++j) { - additional[i].name = get_domain(rrs[j].seq->elements[0]); - additional[i].type = rrs[j].seq->elements[1]->uint; - additional[i].class = rrs[j].seq->elements[2]->uint; - additional[i].ttl = rrs[j].seq->elements[3]->uint; - additional[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(additional[i], rrs[j].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->additional_count)); + for (size_t i=0, j=header->answer_count+header->authority_count; i<header->additional_count; ++i, ++j) { + additional[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->additional = additional; - ret->user = (void*)msg; - return ret; + return H_MAKE(dns_message_t, msg); } +#define act_hdzero h_act_ignore +#define act_qname act_index0 + + +/// +// Grammar +/// + const HParser* init_parser() { - static HParser *dns_message = NULL; - if (dns_message) - return dns_message; - - const HParser *domain = init_domain(); - - const HParser *dns_header = h_sequence(h_bits(16, false), // ID - h_bits(1, false), // QR - h_bits(4, false), // opcode - h_bits(1, false), // AA - h_bits(1, false), // TC - h_bits(1, false), // RD - h_bits(1, false), // RA - h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z - h_bits(4, false), // RCODE - h_uint16(), // QDCOUNT - h_uint16(), // ANCOUNT - h_uint16(), // NSCOUNT - h_uint16(), // ARCOUNT - NULL); - - const HParser *type = h_int_range(h_uint16(), 1, 16); - - const HParser *qtype = h_choice(type, - h_int_range(h_uint16(), 252, 255), - NULL); - - const HParser *class = h_int_range(h_uint16(), 1, 4); - - const HParser *qclass = h_choice(class, - h_int_range(h_uint16(), 255, 255), - NULL); - - const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255), - h_uint8())), - h_ch('\x00'), - NULL), // QNAME - qtype, // QTYPE - qclass, // QCLASS - NULL); - - - const HParser *dns_rr = h_sequence(domain, // NAME - type, // TYPE - class, // CLASS - h_uint32(), // TTL - h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA - NULL); - - - dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header, - h_many(dns_question), - h_many(dns_rr), - h_end_p(), - NULL), - validate_dns), - pack_dns_struct); - - return dns_message; + static const HParser *ret = NULL; + if (ret) + return ret; + + H_RULE (domain, init_domain()); + H_AVRULE(hdzero, h_bits(3, false)); + H_ARULE (header, h_sequence(h_bits(16, false), // ID + h_bits(1, false), // QR + h_bits(4, false), // opcode + h_bits(1, false), // AA + h_bits(1, false), // TC + h_bits(1, false), // RD + h_bits(1, false), // RA + hdzero, // Z + h_bits(4, false), // RCODE + h_uint16(), // QDCOUNT + h_uint16(), // ANCOUNT + h_uint16(), // NSCOUNT + h_uint16(), // ARCOUNT + NULL)); + H_RULE (type, h_int_range(h_uint16(), 1, 16)); + H_RULE (qtype, h_choice(type, + h_int_range(h_uint16(), 252, 255), + NULL)); + H_RULE (class, h_int_range(h_uint16(), 1, 4)); + H_RULE (qclass, h_choice(class, + h_int_range(h_uint16(), 255, 255), + NULL)); + H_RULE (len, h_int_range(h_uint8(), 1, 255)); + H_ARULE (label, h_length_value(len, h_uint8())); + H_ARULE (qname, h_sequence(h_many1(label), + h_ch('\x00'), + NULL)); + H_ARULE (question, h_sequence(qname, qtype, qclass, NULL)); + H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); + H_ARULE (rr, h_sequence(domain, // NAME + type, // TYPE + class, // CLASS + h_uint32(), // TTL + rdata, // RDLENGTH+RDATA + NULL)); + H_AVRULE(message, h_sequence(header, + h_many(question), + h_many(rr), + h_end_p(), + NULL)); + + ret = message; + return ret; } + +/// +// Main Program for a Dummy DNS Server +/// + int start_listening() { // return: fd int sock; @@ -442,7 +283,7 @@ int start_listening() { const int TYPE_MAX = 16; typedef const char* cstr; -const char* TYPE_STR[17] = { +static const char* TYPE_STR[17] = { "nil", "A", "NS", "MD", "MF", "CNAME", "SOA", "MB", "MG", "MR", "NULL", "WKS", diff --git a/examples/dns.h b/examples/dns.h index 151c46e57d6718cb1c97f9336545fb124aea00a0..ed2c26f7d5c233b5e5f6764fd635da31af90002a 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -1,6 +1,27 @@ #include "../src/hammer.h" -struct dns_header { +enum DNSTokenType_ { + TT_dns_message_t = TT_USER, + TT_dns_header_t, + TT_dns_label_t, + TT_dns_qname_t, + TT_dns_question_t, + TT_dns_rr_t, + TT_dns_rr_txt_t, + TT_dns_rr_hinfo_t, + TT_dns_rr_minfo_t, + TT_dns_rr_mx_t, + TT_dns_rr_soa_t, + TT_dns_rr_wks_t, + TT_dns_rr_null_t, + TT_dns_domain_t, + TT_dns_cstr_t +}; + +typedef char *dns_domain_t; +typedef uint8_t *dns_cstr_t; + +typedef struct dns_header { uint16_t id; bool qr, aa, tc, rd, ra; char opcode, rcode; @@ -8,74 +29,93 @@ struct dns_header { size_t answer_count; size_t authority_count; size_t additional_count; -}; -struct dns_qname { +} dns_header_t; + +typedef struct dns_label { + size_t len; + uint8_t *label; +} dns_label_t; + +typedef struct dns_qname { size_t qlen; - struct { - size_t len; - uint8_t *label; - } *labels; -}; -struct dns_question { - struct dns_qname qname; + dns_label_t *labels; +} dns_qname_t; + +typedef struct dns_question { + dns_qname_t qname; uint16_t qtype; uint16_t qclass; -}; -struct dns_rr { +} dns_question_t; + +typedef struct { + dns_cstr_t cpu; + dns_cstr_t os; +} dns_rr_hinfo_t; + +typedef struct { + char* rmailbx; + char* emailbx; +} dns_rr_minfo_t; + +typedef struct { + uint16_t preference; + char* exchange; +} dns_rr_mx_t; + +typedef struct { + char* mname; + char* rname; + uint32_t serial; + uint32_t refresh; + uint32_t retry; + uint32_t expire; + uint32_t minimum; +} dns_rr_soa_t; + +typedef struct { + size_t count; + uint8_t** txt_data; +} dns_rr_txt_t; + +typedef struct { + uint32_t address; + uint8_t protocol; + size_t len; + uint8_t* bit_map; +} dns_rr_wks_t; + +typedef uint8_t *dns_rr_null_t; + +typedef struct dns_rr { char* name; uint16_t type; uint16_t class; uint32_t ttl; // cmos is also acceptable. uint16_t rdlength; union { - char* cname; - struct { - uint8_t* cpu; - uint8_t* os; - } hinfo; - char* mb; - char* md; - char* mf; - char* mg; - struct { - char* rmailbx; - char* emailbx; - } minfo; - char* mr; - struct { - uint16_t preference; - char* exchange; - } mx; - uint8_t* null; - char* ns; - char* ptr; - struct { - char* mname; - char* rname; - uint32_t serial; - uint32_t refresh; - uint32_t retry; - uint32_t expire; - uint32_t minimum; - } soa; - struct { - size_t count; - uint8_t** txt_data; - } txt; - uint32_t a; - struct { - uint32_t address; - uint8_t protocol; - size_t len; - uint8_t* bit_map; - } wks; + uint32_t a; + char* ns; + char* md; + char* mf; + char* cname; + dns_rr_soa_t soa; + char* mb; + char* mg; + char* mr; + dns_rr_null_t null; + dns_rr_wks_t wks; + char* ptr; + dns_rr_hinfo_t hinfo; + dns_rr_minfo_t minfo; + dns_rr_mx_t mx; + dns_rr_txt_t txt; }; -}; +} dns_rr_t; typedef struct dns_message { - struct dns_header header; - struct dns_question *questions; - struct dns_rr *answers; - struct dns_rr *authority; - struct dns_rr *additional; + dns_header_t header; + dns_question_t *questions; + dns_rr_t *answers; + dns_rr_t *authority; + dns_rr_t *additional; } dns_message_t; diff --git a/examples/dns_common.c b/examples/dns_common.c index 3d349f1e3d14c8c61964dba2624e87ac0e5cc397..76915b66e8030b26cbf23462ccaad2d944949d05 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -1,9 +1,12 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #define false 0 #define true 1 +H_ACT_APPLY(act_index0, h_act_index, 0) + /** * A label can't be more than 63 characters. */ @@ -13,51 +16,64 @@ bool validate_label(HParseResult *p) { return (64 > p->ast->seq->used); } -const HParser* init_domain() { - static const HParser *domain = NULL; - if (domain) - return domain; - - const HParser *letter = h_choice(h_ch_range('a', 'z'), - h_ch_range('A', 'Z'), - NULL); +#define act_label h_act_flatten - const HParser *let_dig = h_choice(letter, - h_ch_range('0', '9'), - NULL); +const HParsedToken* act_domain(const HParseResult *p) { + const HParsedToken *ret = NULL; + char *arr = NULL; - const HParser *ldh_str = h_many1(h_choice(let_dig, - h_ch('-'), - NULL)); + switch(p->ast->token_type) { + case TT_UINT: + arr = " "; + break; + case TT_SEQUENCE: + // Sequence of subdomains separated by "." + // Each subdomain is a label, which can be no more than 63 chars. + arr = h_arena_malloc(p->arena, 64*p->ast->seq->used); + size_t count = 0; + for (size_t i=0; i<p->ast->seq->used; ++i) { + HParsedToken *tmp = p->ast->seq->elements[i]; + for (size_t j=0; j<tmp->seq->used; ++j) { + arr[count] = tmp->seq->elements[i]->uint; + ++count; + } + arr[count] = '.'; + ++count; + } + arr[count-1] = '\x00'; + break; + default: + arr = NULL; + ret = NULL; + } - const HParser *label = h_attr_bool(h_sequence(letter, - h_optional(h_sequence(h_optional(ldh_str), - let_dig, - NULL)), - NULL), - validate_label); + if(arr) { + dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char* + *val = arr; + ret = H_MAKE(dns_domain_t, val); + } - /** - * You could write it like this ... - * HParser *indirect_subdomain = h_indirect(); - * const HParser *subdomain = h_choice(label, - * h_sequence(indirect_subdomain, - * h_ch('.'), - * label, - * NULL), - * NULL); - * h_bind_indirect(indirect_subdomain, subdomain); - * - * ... but this is easier and equivalent - */ + return ret; +} - const HParser *subdomain = h_sepBy1(label, h_ch('.')); +const HParser* init_domain() { + static const HParser *ret = NULL; + if (ret) + return ret; - domain = h_choice(subdomain, - h_ch(' '), - NULL); + H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); + H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); + H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); + H_VARULE(label, h_sequence(letter, + h_optional(h_sequence(h_optional(ldh_str), + let_dig, + NULL)), + NULL)); + H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); + H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL)); - return domain; + ret = domain; + return ret; } const HParser* init_character_string() { diff --git a/examples/dns_common.h b/examples/dns_common.h index 41d73f0d626b7760387fc6d739bc14f47b329a79..c1d8d7e9d66f98c666f08c95ff5d34fa93f874c3 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,8 +2,11 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" +#include "../src/glue.h" const HParser* init_domain(); const HParser* init_character_string(); +const HParsedToken* act_index0(const HParseResult *p); + #endif diff --git a/examples/rr.c b/examples/rr.c index 8dae8859a208d1940ff37bcc75dbf5fb91254ff1..8c14e0aec8e678f86dfdbc54dc0499dd3a828d8e 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -1,219 +1,193 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #include "rr.h" #define false 0 #define true 1 -const HParser* init_cname() { - static const HParser *cname = NULL; - if (cname) - return cname; - - cname = h_sequence(init_domain(), - h_end_p(), - NULL); - - return cname; -} -const HParser* init_hinfo() { - static const HParser *hinfo = NULL; - if (hinfo) - return hinfo; - - const HParser* cstr = init_character_string(); - - hinfo = h_sequence(cstr, - cstr, - h_end_p(), - NULL); +/// +// Validations and Semantic Actions +/// - return hinfo; +bool validate_null(HParseResult *p) { + if (TT_SEQUENCE != p->ast->token_type) + return false; + return (65536 > p->ast->seq->used); } -const HParser* init_mb() { - static const HParser *mb = NULL; - if (mb) - return mb; - - mb = h_sequence(init_domain(), - h_end_p(), - NULL); +const HParsedToken *act_null(const HParseResult *p) { + dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); - return mb; -} + size_t len = h_seq_len(p->ast); + uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len); + for (size_t i=0; i<len; ++i) + buf[i] = H_FIELD_UINT(i); -const HParser* init_md() { - static const HParser *md = NULL; - if (md) - return md; - - md = h_sequence(init_domain(), - h_end_p, - NULL); - - return md; + return H_MAKE(dns_rr_null_t, null); } -const HParser* init_mf() { - static const HParser *mf = NULL; - if (mf) - return mf; - - mf = h_sequence(init_domain(), - h_end_p(), - NULL); +const HParsedToken *act_txt(const HParseResult *p) { + dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); - return mf; -} + const HCountedArray *arr = H_CAST_SEQ(p->ast); + uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); + for (size_t i=0; i<arr->used; ++i) { + size_t len = h_seq_len(arr->elements[i]); + uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len); + for (size_t j=0; j<len; ++j) + tmp[j] = H_INDEX_UINT(arr->elements[i], j); + ret[i] = tmp; + } -const HParser* init_mg() { - static const HParser *mg = NULL; - if (mg) - return mg; - - mg = h_sequence(init_domain(), - h_end_p(), - NULL); + txt->count = arr->used; + txt->txt_data = ret; - return mg; + return H_MAKE(dns_rr_txt_t, txt); } -const HParser* init_minfo() { - static const HParser *minfo = NULL; - if (minfo) - return minfo; - - const HParser* domain = init_domain(); - - minfo = h_sequence(domain, - domain, - h_end_p(), - NULL); +const HParsedToken* act_cstr(const HParseResult *p) { + dns_cstr_t *cs = H_ALLOC(dns_cstr_t); - return minfo; -} - -const HParser* init_mr() { - static const HParser *mr = NULL; - if (mr) - return mr; - - mr = h_sequence(init_domain(), - h_end_p(), - NULL); + const HCountedArray *arr = H_CAST_SEQ(p->ast); + uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); + for (size_t i=0; i<arr->used; ++i) + ret[i] = H_CAST_UINT(arr->elements[i]); + assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? + *cs = ret; - return mr; + return H_MAKE(dns_cstr_t, cs); } -const HParser* init_mx() { - static const HParser *mx = NULL; - if (mx) - return mx; - - mx = h_sequence(h_uint16(), - init_domain(), - h_end_p(), - NULL); +const HParsedToken* act_soa(const HParseResult *p) { + dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t); - return mx; -} + soa->mname = *H_FIELD(dns_domain_t, 0); + soa->rname = *H_FIELD(dns_domain_t, 1); + soa->serial = H_FIELD_UINT(2); + soa->refresh = H_FIELD_UINT(3); + soa->retry = H_FIELD_UINT(4); + soa->expire = H_FIELD_UINT(5); + soa->minimum = H_FIELD_UINT(6); -bool validate_null(HParseResult *p) { - if (TT_SEQUENCE != p->ast->token_type) - return false; - return (65536 > p->ast->seq->used); + return H_MAKE(dns_rr_soa_t, soa); } -const HParser* init_null() { - static const HParser *null_ = NULL; - if (null_) - return null_; +const HParsedToken* act_wks(const HParseResult *p) { + dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); - null_ = h_attr_bool(h_many(h_uint8()), validate_null); + wks->address = H_FIELD_UINT(0); + wks->protocol = H_FIELD_UINT(1); + wks->len = H_FIELD_SEQ(2)->used; + wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len); + for (size_t i=0; i<wks->len; ++i) + wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i); - return null_; + return H_MAKE(dns_rr_wks_t, wks); } -const HParser* init_ns() { - static const HParser *ns = NULL; - if (ns) - return ns; +const HParsedToken* act_hinfo(const HParseResult *p) { + dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t); - ns = h_sequence(init_domain(), - h_end_p(), - NULL); + hinfo->cpu = *H_FIELD(dns_cstr_t, 0); + hinfo->os = *H_FIELD(dns_cstr_t, 1); - return ns; + return H_MAKE(dns_rr_hinfo_t, hinfo); } -const HParser* init_ptr() { - static const HParser *ptr = NULL; - if (ptr) - return ptr; - - ptr = h_sequence(init_domain(), - h_end_p(), - NULL); - - return ptr; -} - -const HParser* init_soa() { - static const HParser *soa = NULL; - if (soa) - return soa; - - const HParser *domain = init_domain(); +const HParsedToken* act_minfo(const HParseResult *p) { + dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t); - soa = h_sequence(domain, // MNAME - domain, // RNAME - h_uint32(), // SERIAL - h_uint32(), // REFRESH - h_uint32(), // RETRY - h_uint32(), // EXPIRE - h_uint32(), // MINIMUM - h_end_p(), - NULL); + minfo->rmailbx = *H_FIELD(dns_domain_t, 0); + minfo->emailbx = *H_FIELD(dns_domain_t, 1); - return soa; + return H_MAKE(dns_rr_minfo_t, minfo); } -const HParser* init_txt() { - static const HParser *txt = NULL; - if (txt) - return txt; +const HParsedToken* act_mx(const HParseResult *p) { + dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); - txt = h_sequence(h_many1(init_character_string()), - h_end_p(), - NULL); + mx->preference = H_FIELD_UINT(0); + mx->exchange = *H_FIELD(dns_domain_t, 1); - return txt; + return H_MAKE(dns_rr_mx_t, mx); } -const HParser* init_a() { - static const HParser *a = NULL; - if (a) - return a; - a = h_sequence(h_uint32(), - h_end_p(), - NULL); +/// +// Parsers for all types of RDATA +/// - return a; -} - -const HParser* init_wks() { - static const HParser *wks = NULL; - if (wks) - return wks; +#define RDATA_TYPE_MAX 16 +const HParser* init_rdata(uint16_t type) { + static const HParser *parsers[RDATA_TYPE_MAX+1]; + static int inited = 0; - wks = h_sequence(h_uint32(), - h_uint8(), - h_many(h_uint8()), - h_end_p(), - NULL); - - return wks; + if (type >= sizeof(parsers)) + return NULL; + + if (inited) + return parsers[type]; + + + H_RULE (domain, init_domain()); + H_ARULE(cstr, init_character_string()); + + H_RULE (a, h_uint32()); + H_RULE (ns, domain); + H_RULE (md, domain); + H_RULE (mf, domain); + H_RULE (cname, domain); + H_ARULE(soa, h_sequence(domain, // MNAME + domain, // RNAME + h_uint32(), // SERIAL + h_uint32(), // REFRESH + h_uint32(), // RETRY + h_uint32(), // EXPIRE + h_uint32(), // MINIMUM + NULL)); + H_RULE (mb, domain); + H_RULE (mg, domain); + H_RULE (mr, domain); + H_VRULE(null, h_many(h_uint8())); + H_RULE (wks, h_sequence(h_uint32(), + h_uint8(), + h_many(h_uint8()), + NULL)); + H_RULE (ptr, domain); + H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); + H_RULE (minfo, h_sequence(domain, domain, NULL)); + H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); + H_ARULE(txt, h_many1(cstr)); + + + parsers[ 0] = NULL; // there is no type 0 + parsers[ 1] = a; + parsers[ 2] = ns; + parsers[ 3] = md; + parsers[ 4] = mf; + parsers[ 5] = cname; + parsers[ 6] = soa; + parsers[ 7] = mb; + parsers[ 8] = mg; + parsers[ 9] = mr; + parsers[10] = null; + parsers[11] = wks; + parsers[12] = ptr; + parsers[13] = hinfo; + parsers[14] = minfo; + parsers[15] = mx; + parsers[16] = txt; + + // All parsers must consume their input exactly. + for(uint16_t i; i<sizeof(parsers); i++) { + if(parsers[i]) { + parsers[i] = h_action(h_sequence(parsers[i], h_end_p(), NULL), + act_index0); + } + } + + inited = 1; + return parsers[type]; } diff --git a/examples/rr.h b/examples/rr.h index 54172f82d142f092252a5e542919ffdbcef93ea4..fce457817c7802fbd0cb77b688c99f9244bda86a 100644 --- a/examples/rr.h +++ b/examples/rr.h @@ -3,21 +3,6 @@ #include "../src/hammer.h" -const HParser* init_cname(); -const HParser* init_hinfo(); -const HParser* init_mb(); -const HParser* init_md(); -const HParser* init_mf(); -const HParser* init_mg(); -const HParser* init_minfo(); -const HParser* init_mr(); -const HParser* init_mx(); -const HParser* init_null(); -const HParser* init_ns(); -const HParser* init_ptr(); -const HParser* init_soa(); -const HParser* init_txt(); -const HParser* init_a(); -const HParser* init_wks(); +const HParser* init_rdata(uint16_t type); #endif diff --git a/jni/Example.java b/jni/Example.java new file mode 100644 index 0000000000000000000000000000000000000000..4e6d76825aa64ddeb9e916ea0596e457ff2751f0 --- /dev/null +++ b/jni/Example.java @@ -0,0 +1,92 @@ +import com.upstandinghackers.hammer.*; +import java.util.Arrays; +/** +* Example JHammer usage +*/ + +public class Example +{ + +static { + System.loadLibrary("jhammer"); +} + +private static void handle(ParseResult result) +{ + if(result == null) + { + System.out.println("FAIL"); + } + else + { + System.out.println("PASS"); + handleToken(result.getAst()); + } +} + +private static void handleToken(ParsedToken p) +{ + if(p==null) + { + System.out.println("Empty AST"); + return; + } + switch(p.getTokenType()) + { + case NONE: out("NONE token type"); break; + case BYTES: out("BYTES token type, value: " + Arrays.toString(p.getBytesValue())); break; + case SINT: out("SINT token type, value: " + p.getSIntValue()); break; + case UINT: out("UINT token type, value: " + p.getUIntValue()); break; + case SEQUENCE: out("SEQUENCE token type"); for(ParsedToken tok : p.getSeqValue()) {handleToken(tok);} break; + case ERR: out("ERR token type"); break; + case USER: out("USER token type"); break; + } +} + +private static void out(String msg) +{ + System.out.println(">> " + msg); +} + +public static void main(String args[]) +{ + out("chRange"); + handle(Hammer.parse(Hammer.chRange((byte)0x30, (byte)0x39), "1".getBytes(), 1)); + handle(Hammer.parse(Hammer.chRange((byte)0x30, (byte)0x39), "a".getBytes(), 1)); + + out("ch"); + handle(Hammer.parse(Hammer.ch((byte)0x31), "1".getBytes(), 1)); + handle(Hammer.parse(Hammer.ch((byte)0x31), "0".getBytes(), 1)); + + out("token"); + handle(Hammer.parse(Hammer.token("herp".getBytes(), 4), "herp".getBytes(), 4)); + handle(Hammer.parse(Hammer.token("herp".getBytes(), 4), "derp".getBytes(), 4)); + + out("intRange"); + byte inbytes[] = {0x31, 0x31, 0x31, 0x31}; + handle(Hammer.parse(Hammer.intRange(Hammer.uInt8(), 0L, 0x32), inbytes, inbytes.length)); + handle(Hammer.parse(Hammer.intRange(Hammer.uInt8(), 0L, 0x30), inbytes, inbytes.length)); + + out("bits"); + handle(Hammer.parse(Hammer.bits(7, false), inbytes, inbytes.length)); + + out("int64"); + byte ints[] = {(byte)0x8F, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}; + handle(Hammer.parse(Hammer.int64(), ints, ints.length)); + handle(Hammer.parse(Hammer.int64(), inbytes, inbytes.length)); + + out("choice"); + Parser two32s[] = {Hammer.intRange(Hammer.uInt32(), 0x00, 0x01), Hammer.int32()}; + handle(Hammer.parse(Hammer.choice(Hammer.intRange(Hammer.uInt32(), 0x00, 0x01), Hammer.int32()), ints, ints.length)); + + out("sequence"); + byte i3[] = {(byte)'i', (byte)3, (byte)0xFF}; + Parser i3parsers[] = {Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()}; + handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()), i3, i3.length)); + + +} + + + +} diff --git a/jni/Makefile b/jni/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..85be973388223c6b9332c0d72a54f8a283e2c899 --- /dev/null +++ b/jni/Makefile @@ -0,0 +1,42 @@ +JSOURCES := Action.java Hammer.java ParsedToken.java ParseResult.java Parser.java Predicate.java +JSOURCES_NATIVE := Hammer ParsedToken Parser ParseResult +CSOURCES := com_upstandinghackers_hammer_Hammer.c com_upstandinghackers_hammer_ParsedToken.c com_upstandinghackers_hammer_Parser.c com_upstandinghackers_hammer_ParseResult.c + +# ls *.h *.o *.so com/upstandinghackers/hammer/*.class | grep -v jhammer.h | tr '\n' ' '; replace single $ with $$ +OUTPUTS := com/upstandinghackers/hammer/Action.class com/upstandinghackers/hammer/Hammer.class com_upstandinghackers_hammer_Hammer.h com_upstandinghackers_hammer_Hammer.o com/upstandinghackers/hammer/Hammer\$TokenType.class com_upstandinghackers_hammer_Hammer_TokenType.h com/upstandinghackers/hammer/ParsedToken.class com_upstandinghackers_hammer_ParsedToken.h com_upstandinghackers_hammer_ParsedToken.o com/upstandinghackers/hammer/Parser.class com/upstandinghackers/hammer/ParseResult.class com_upstandinghackers_hammer_ParseResult.h com_upstandinghackers_hammer_ParseResult.o com_upstandinghackers_hammer_Parser.h com_upstandinghackers_hammer_Parser.o com/upstandinghackers/hammer/Predicate.class libjhammer.so + +TOPLEVEL := ../ + +JC=javac +JH=javah +CP=com/upstandinghackers/hammer +PACKAGE=com.upstandinghackers.hammer + +include ../common.mk + +JNI_INCLUDE := /usr/lib/jvm/java-6-openjdk/include/ +CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE) + +%.java: $(call ifsilent,| $(HUSH)) + $(call hush, "Compiling Java source $@") $(JC) $(CP)/$@ + +all: javacc prepare compile link + +link: compile + $(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../src/*.o ../src/backends/*.o ../src/parsers/*.o + +$(CSOURCES): prepare + $(call hush, "Compiling $@") $(CC) -c $(CFLAGS) $@ + +compile: prepare $(CSOURCES) + +prepare: javacc $(JSOURCES_NATIVE) + +$(JSOURCES_NATIVE): javacc + $(call hush, "Generating JNI headers for $@") $(JH) $(PACKAGE).$@ + +javacc: $(JSOURCES) + +#TODO make this not-as-hardcoded +#clean: +# rm $(CP)/*.class && rm com_upstandinghackers_*.h && rm com_upstandinghackers_*.o && rm libjhammer.so diff --git a/jni/NOTES b/jni/NOTES new file mode 100644 index 0000000000000000000000000000000000000000..564b3f5af43db8859e42a52878e8637a206a6f64 --- /dev/null +++ b/jni/NOTES @@ -0,0 +1,26 @@ +USING THE JNI BINDINGS: +1. import com.upstandinghackers.hammer.*; +2. Add a static initializer block that loads the correct library, like this: static { System.loadLibrary("jhammer"); } +3. Code stuff. Just look at Example.java for a few handy snippets (for walking the resulting syntax tree [AST] etc) +4. Compile your java sources like always +5. Add the folder containing libhammer.so/dll/whatever to Java's library path to run it, for example: java -Djava.library.path=. <CLASS> + +UNIMPLEMENTED: +User-defined types, predicates and actions are unimplemented. +Memory leaks because there is no reliable garbage collection. + +TODO: +Testing + + +TYPE MAPPING: +Hammer Java JNI +uint8_t byte jbyte jbyte/byte is signed +char byte jbyte jchar would be 16 bit wide +size_t int jint signed as well; jsize == jint, actually +int64_t long jlong +uint64_t long jlong signed! +bool boolean jboolean JNI_TRUE / JNI_FALSE +float float jfloat +double double jdouble +void void void diff --git a/jni/com/upstandinghackers/hammer/Action.java b/jni/com/upstandinghackers/hammer/Action.java new file mode 100644 index 0000000000000000000000000000000000000000..aecb713e44148f02aa76a1555b9d0a77bc608e21 --- /dev/null +++ b/jni/com/upstandinghackers/hammer/Action.java @@ -0,0 +1,8 @@ +package com.upstandinghackers.hammer; + +import java.util.List; + +public interface Action +{ + public List<ParsedToken> execute(ParseResult p); +} diff --git a/jni/com/upstandinghackers/hammer/Hammer.java b/jni/com/upstandinghackers/hammer/Hammer.java new file mode 100644 index 0000000000000000000000000000000000000000..3e06a91eca4b15eebbfe28c99547829a3c2d6068 --- /dev/null +++ b/jni/com/upstandinghackers/hammer/Hammer.java @@ -0,0 +1,76 @@ +package com.upstandinghackers.hammer; +import java.util.HashMap; + +public class Hammer +{ + public final static byte BYTE_BIG_ENDIAN = 0x1; + public final static byte BIT_BIG_ENDIAN = 0x2; + public final static byte BYTE_LITTLE_ENDIAN = 0x0; + public final static byte BIT_LITTLE_ENDIAN = 0x0; + + static final HashMap<Integer, TokenType> tokenTypeMap = new HashMap<Integer, TokenType>(); + + public enum TokenType + { + NONE(1), + BYTES(2), + SINT(4), + UINT(8), + SEQUENCE(16), + ERR(32), + USER(64); + + private int value; + public int getValue() { return this.value; } + private TokenType(int value) { this.value = value; } + } + + static + { + for(TokenType tt : TokenType.values()) + { + Hammer.tokenTypeMap.put(new Integer(tt.getValue()), tt); + } + } + + public static native ParseResult parse(Parser parser, byte[] input, int length); + public static native Parser token(byte[] str, int length); + public static native Parser ch(byte c); + public static native Parser chRange(byte from, byte to); + public static native Parser intRange(Parser p, long lower, long upper); + public static native Parser bits(int len, boolean sign); + public static native Parser int64(); + public static native Parser int32(); + public static native Parser int16(); + public static native Parser int8(); + public static native Parser uInt64(); + public static native Parser uInt32(); + public static native Parser uInt16(); + public static native Parser uInt8(); + public static native Parser whitespace(Parser p); + public static native Parser left(Parser p, Parser q); + public static native Parser right(Parser p, Parser q); + public static native Parser middle(Parser p, Parser x, Parser q); +// public static native Parser action(Parser p, Action a); + public static native Parser in(byte[] charset, int length); + public static native Parser endP(); + public static native Parser nothingP(); + public static native Parser sequence(Parser... parsers); + public static native Parser choice(Parser... parsers); + public static native Parser butNot(Parser p1, Parser p2); + public static native Parser difference(Parser p1, Parser p2); + public static native Parser xor(Parser p1, Parser p2); + public static native Parser many(Parser p); + public static native Parser many1(Parser p); + public static native Parser repeatN(Parser p, int n); + public static native Parser optional(Parser p); + public static native Parser ignore(Parser p); + public static native Parser sepBy(Parser p, Parser sep); + public static native Parser sepBy1(Parser p, Parser sep); + public static native Parser epsilonP(); + public static native Parser lengthValue(Parser length, Parser value); +// public static native Parser attrBool(Parser p, Predicate pred); + public static native Parser and(Parser p); + public static native Parser not(Parser p); + public static native Parser indirect(); +} diff --git a/jni/com/upstandinghackers/hammer/ParseResult.java b/jni/com/upstandinghackers/hammer/ParseResult.java new file mode 100644 index 0000000000000000000000000000000000000000..e5ad6c94182e9c70aedcbb8fe81445cae12614c5 --- /dev/null +++ b/jni/com/upstandinghackers/hammer/ParseResult.java @@ -0,0 +1,15 @@ +package com.upstandinghackers.hammer; + +import java.util.List; + +public class ParseResult +{ + public native ParsedToken getAst(); + public native long getBitLength(); + + public native void free(); + public long getInner() {return this.inner;} + + private long inner; + ParseResult(long inner) {this.inner=inner;} +} diff --git a/jni/com/upstandinghackers/hammer/ParsedToken.java b/jni/com/upstandinghackers/hammer/ParsedToken.java new file mode 100644 index 0000000000000000000000000000000000000000..efbc8ed7a388b0940e45fb00574f14ea9bc621c0 --- /dev/null +++ b/jni/com/upstandinghackers/hammer/ParsedToken.java @@ -0,0 +1,40 @@ +package com.upstandinghackers.hammer; + +public class ParsedToken +{ + public Hammer.TokenType getTokenType() + { + int tt = this.getTokenTypeInternal(); + if(0==tt) + return null; + return Hammer.tokenTypeMap.get(new Integer(tt)); + } + + private native int getTokenTypeInternal(); + public native int getIndex(); + public native byte getBitOffset(); + public native byte[] getBytesValue(); + public native long getSIntValue(); + public native long getUIntValue(); + public native double getDoubleValue(); + public native float getFloatValue(); + public native ParsedToken[] getSeqValue(); +// public native Object getUserValue(); + + native void setTokenType(Hammer.TokenType type); + native void setIndex(int index); + native void setBitOffset(byte offset); + native void setBytesValue(byte[] value); + native void setSIntValue(long value); + native void setUIntValue(long value); + native void setDoubleValue(double value); + native void setFloatValue(float value); + native void setSeqValue(ParsedToken value[]); +// native void setUserValue(Object value); + +// public native void free(); + public long getInner() {return this.inner;} + + private long inner; + ParsedToken(long inner) {this.inner=inner;} +} diff --git a/jni/com/upstandinghackers/hammer/Parser.java b/jni/com/upstandinghackers/hammer/Parser.java new file mode 100644 index 0000000000000000000000000000000000000000..2e924cb2ba26a6b21d777deb1bc076f9e7ec1eac --- /dev/null +++ b/jni/com/upstandinghackers/hammer/Parser.java @@ -0,0 +1,11 @@ +package com.upstandinghackers.hammer; + +public class Parser +{ + public native void bindIndirect(Parser inner); + public native void free(); + public long getInner() {return this.inner;} + + private long inner; + Parser(long inner) {this.inner=inner;} +} diff --git a/jni/com/upstandinghackers/hammer/Predicate.java b/jni/com/upstandinghackers/hammer/Predicate.java new file mode 100644 index 0000000000000000000000000000000000000000..52ca4bf7b94569906a072a73587a75b95550a1f4 --- /dev/null +++ b/jni/com/upstandinghackers/hammer/Predicate.java @@ -0,0 +1,6 @@ +package com.upstandinghackers.hammer; + +public interface Predicate +{ + public boolean apply(ParseResult p); +} diff --git a/jni/com_upstandinghackers_hammer_Hammer.c b/jni/com_upstandinghackers_hammer_Hammer.c new file mode 100644 index 0000000000000000000000000000000000000000..f83414bc8bd9c443a338e16300f4eb3bd3637d37 --- /dev/null +++ b/jni/com_upstandinghackers_hammer_Hammer.c @@ -0,0 +1,335 @@ +#include "jhammer.h" +#include "com_upstandinghackers_hammer_Hammer.h" +#include <stdlib.h> + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_parse + (JNIEnv *env, jclass class, jobject obj, jbyteArray input_, jint length_) +{ + HParser *parser; + uint8_t* input; + size_t length; + HParseResult *result; + jclass resultClass; + jobject retVal; + + parser = UNWRAP(env, obj); + + input = (uint8_t *) ((*env)->GetByteArrayElements(env, input_, NULL)); + length = (size_t) length_; + + result = h_parse(parser, input, length); + + if(result==NULL) + return NULL; + + FIND_CLASS(resultClass, env, "com/upstandinghackers/hammer/ParseResult"); + + NEW_INSTANCE(retVal, env, resultClass, result); + + return retVal; +} + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_token + (JNIEnv *env, jclass class, jbyteArray str, jint len) +{ + RETURNWRAP(env, h_token((uint8_t *) ((*env)->GetByteArrayElements(env, str, NULL)), (size_t) len)); +} + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_ch + (JNIEnv *env, jclass class, jbyte c) +{ + RETURNWRAP(env, h_ch((uint8_t) c)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_chRange + (JNIEnv *env, jclass class, jbyte lower, jbyte upper) +{ + + RETURNWRAP(env, h_ch_range((uint8_t) lower, (uint8_t) upper)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_intRange + (JNIEnv *env, jclass class, jobject obj, jlong lower, jlong upper) +{ + HParser *parser; + parser = UNWRAP(env, obj); + RETURNWRAP(env, h_int_range(parser, (int64_t) lower, (int64_t) upper)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_bits + (JNIEnv *env, jclass class, jint len, jboolean sign) +{ + RETURNWRAP(env, h_bits((size_t) len, (bool)(sign & JNI_TRUE))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int64 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_int64()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int32 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_int32()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int16 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_int16()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_int8 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_int8()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt64 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_uint64()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt32 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_uint32()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt16 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_uint16()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_uInt8 + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_uint8()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_whitespace + (JNIEnv *env, jclass class, jobject parser) +{ + RETURNWRAP(env, h_whitespace(UNWRAP(env, parser))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_left + (JNIEnv *env, jclass class, jobject p, jobject q) +{ + RETURNWRAP(env, h_left(UNWRAP(env, p), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_right + (JNIEnv *env, jclass class, jobject p, jobject q) +{ + RETURNWRAP(env, h_right(UNWRAP(env, p), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_middle + (JNIEnv *env, jclass class, jobject p, jobject x, jobject q) +{ + RETURNWRAP(env, h_middle(UNWRAP(env, p), UNWRAP(env, x), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_in + (JNIEnv *env, jclass class, jbyteArray charset, jint length) +{ + RETURNWRAP(env, h_in((uint8_t *) ((*env)->GetByteArrayElements(env, charset, NULL)), (size_t)length)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_endP + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_end_p()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_nothingP + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_nothing_p()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sequence + (JNIEnv *env, jclass class, jobjectArray sequence) +{ + jsize length; + void **parsers; + int i; + jobject current; + const HParser *result; + + length = (*env)->GetArrayLength(env, sequence); + parsers = malloc(sizeof(void *)*(length+1)); + if(NULL==parsers) + { + return NULL; + } + + for(i=0; i<length; i++) + { + current = (*env)->GetObjectArrayElement(env, sequence, (jsize)i); + parsers[i] = UNWRAP(env, current); + } + parsers[length] = NULL; + + result = h_sequence__a(parsers); + RETURNWRAP(env, result); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_choice + (JNIEnv *env, jclass class, jobjectArray choices) +{ + jsize length; + void **parsers; + int i; + jobject current; + const HParser *result; + + length = (*env)->GetArrayLength(env, choices); + parsers = malloc(sizeof(HParser *)*(length+1)); + if(NULL==parsers) + { + return NULL; + } + + for(i=0; i<length; i++) + { + current = (*env)->GetObjectArrayElement(env, choices, (jsize)i); + parsers[i] = UNWRAP(env, current); + } + parsers[length] = NULL; + + result = h_choice__a(parsers); + RETURNWRAP(env, result); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_butNot + (JNIEnv *env, jclass class, jobject p, jobject q) +{ + RETURNWRAP(env, h_butnot(UNWRAP(env, p), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_difference + (JNIEnv *env, jclass class, jobject p, jobject q) +{ + RETURNWRAP(env, h_difference(UNWRAP(env, p), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_xor + (JNIEnv *env, jclass class, jobject p, jobject q) +{ + RETURNWRAP(env, h_xor(UNWRAP(env, p), UNWRAP(env, q))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_many + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_many(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_many1 + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_many1(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_repeatN + (JNIEnv *env, jclass class, jobject p, jint n) +{ + RETURNWRAP(env, h_repeat_n(UNWRAP(env, p), (size_t)n)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_optional + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_optional(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_ignore + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_ignore(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sepBy + (JNIEnv *env, jclass class, jobject p, jobject sep) +{ + RETURNWRAP(env, h_sepBy(UNWRAP(env, p), UNWRAP(env, sep))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_sepBy1 + (JNIEnv *env, jclass class, jobject p, jobject sep) +{ + RETURNWRAP(env, h_sepBy1(UNWRAP(env, p), UNWRAP(env, sep))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_epsilonP + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_epsilon_p()); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_lengthValue + (JNIEnv *env, jclass class, jobject length, jobject value) +{ + RETURNWRAP(env, h_length_value(UNWRAP(env, length), UNWRAP(env, value))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_and + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_and(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_not + (JNIEnv *env, jclass class, jobject p) +{ + RETURNWRAP(env, h_not(UNWRAP(env, p))); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_indirect + (JNIEnv *env, jclass class) +{ + RETURNWRAP(env, h_indirect()); +} + + + diff --git a/jni/com_upstandinghackers_hammer_ParseResult.c b/jni/com_upstandinghackers_hammer_ParseResult.c new file mode 100644 index 0000000000000000000000000000000000000000..ad45fe3a6d9ce238fae7c05d2c999ad7cc1ae19e --- /dev/null +++ b/jni/com_upstandinghackers_hammer_ParseResult.c @@ -0,0 +1,45 @@ +#include "jhammer.h" +#include "com_upstandinghackers_hammer_ParseResult.h" + +HParseResult *unwrap_parse_result(JNIEnv *env, jobject obj) +{ + jclass parseResultClass; + jfieldID parseResultInner; + FIND_CLASS(parseResultClass, env, "com/upstandinghackers/hammer/ParseResult"); + parseResultInner = (*env)->GetFieldID(env, parseResultClass, "inner", "J"); + return (HParseResult *)((*env)->GetLongField(env, obj, parseResultInner)); +} + + +JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_ParseResult_getAst + (JNIEnv *env, jobject this) +{ + HParseResult *inner; + jclass parsedTokenClass; + jobject retVal; + + if(this == NULL) + return NULL; // parse unsuccessful + inner = unwrap_parse_result(env, this); + if(inner->ast == NULL) + return NULL; // parse successful, but empty + + FIND_CLASS(parsedTokenClass, env, "com/upstandinghackers/hammer/ParsedToken"); + NEW_INSTANCE(retVal, env, parsedTokenClass, inner->ast); + return retVal; + +} + +JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParseResult_getBitLength + (JNIEnv *env, jobject this) +{ + HParseResult *inner = unwrap_parse_result(env, this); + return (jlong) (inner->bit_length); +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParseResult_free + (JNIEnv *env, jobject this) +{ + //XXX: NOT IMPLEMENTED +} + diff --git a/jni/com_upstandinghackers_hammer_ParsedToken.c b/jni/com_upstandinghackers_hammer_ParsedToken.c new file mode 100644 index 0000000000000000000000000000000000000000..e863738a4b80f9c281ec907b86153a25fee99980 --- /dev/null +++ b/jni/com_upstandinghackers_hammer_ParsedToken.c @@ -0,0 +1,195 @@ +#include "jhammer.h" +#include "com_upstandinghackers_hammer_ParsedToken.h" + +#define HPT_UNWRAP(env, this) HParsedToken *inner = unwrap_parsed_token(env, this); assert(inner!=NULL) + +HParsedToken *unwrap_parsed_token(JNIEnv *env, jobject obj) +{ + jclass parsedTokenClass; + jfieldID parsedTokenInner; + FIND_CLASS(parsedTokenClass, env, "com/upstandinghackers/hammer/ParsedToken"); + parsedTokenInner = (*env)->GetFieldID(env, parsedTokenClass, "inner", "J"); + return (HParsedToken *)((*env)->GetLongField(env, obj, parsedTokenInner)); +} + + +JNIEXPORT jint JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getTokenTypeInternal + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + if(inner==NULL) + return (jint)0; + return (jint)(inner->token_type); +} + +JNIEXPORT jint JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getIndex + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jint) (inner->index); +} + +JNIEXPORT jbyte JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getBitOffset + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jbyte) (inner->bit_offset); +} + +JNIEXPORT jbyteArray JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getBytesValue + (JNIEnv *env, jobject this) +{ + jbyteArray outArray; + HPT_UNWRAP(env, this); + outArray = (*env)->NewByteArray(env, (jsize)inner->bytes.len); + (*env)->SetByteArrayRegion(env, outArray, (jsize) 0, (jsize)(inner->bytes.len), (jbyte *)(inner->bytes.token)); + return outArray; +} + +JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getSIntValue + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jlong) (inner->sint); +} + +JNIEXPORT jlong JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getUIntValue + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jlong) (inner->uint); +} + +JNIEXPORT jdouble JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getDoubleValue + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jdouble) (inner->dbl); +} + +JNIEXPORT jfloat JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getFloatValue + (JNIEnv *env, jobject this) +{ + HPT_UNWRAP(env, this); + return (jfloat) (inner->flt); +} + +JNIEXPORT jobjectArray JNICALL Java_com_upstandinghackers_hammer_ParsedToken_getSeqValue + (JNIEnv *env, jobject this) +{ + jsize i; + HPT_UNWRAP(env, this); + jsize returnSize = inner->seq->used; + jobject currentObject; + jclass returnClass; + FIND_CLASS(returnClass, env, "com/upstandinghackers/hammer/ParsedToken"); + jobjectArray retVal = (*env)->NewObjectArray(env, returnSize, returnClass, NULL); + for(i = 0; i<returnSize; i++) + { + NEW_INSTANCE(currentObject, env, returnClass, inner->seq->elements[i]); + (*env)->SetObjectArrayElement(env, retVal, i, currentObject); + } + return retVal; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setTokenType + (JNIEnv *env, jobject this, jobject tokenType) +{ + jclass tokenTypeClass; + jmethodID getValue; + jint typeVal; + HPT_UNWRAP(env, this); + + FIND_CLASS(tokenTypeClass, env, "com/upstandinghackers/hammer/Hammer$TokenType"); + getValue = (*env)->GetMethodID(env, tokenTypeClass, "getValue", "()I"); + typeVal = (*env)->CallIntMethod(env, tokenType, getValue); + + inner->token_type = (int32_t) typeVal; // unsafe cast, but enums should be of type int +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setIndex + (JNIEnv *env, jobject this, jint index) +{ + HPT_UNWRAP(env, this); + inner->index = (size_t)index; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setBitOffset + (JNIEnv *env, jobject this, jbyte bit_offset) +{ + HPT_UNWRAP(env, this); + inner->bit_offset = (char)bit_offset; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setBytesValue + (JNIEnv *env, jobject this, jbyteArray bytes_) +{ + HBytes bytes; + HPT_UNWRAP(env, this); + + bytes.token = (uint8_t *) ((*env)->GetByteArrayElements(env, bytes_, NULL)); + bytes.len = (size_t) (*env)->GetArrayLength(env, bytes_); + + inner->bytes = bytes; + inner->token_type = TT_BYTES; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setSIntValue + (JNIEnv *env, jobject this, jlong sint) +{ + HPT_UNWRAP(env, this); + inner->token_type = TT_SINT; + inner->sint = (int64_t)sint; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setUIntValue + (JNIEnv *env, jobject this, jlong uint) +{ + HPT_UNWRAP(env, this); + inner->token_type = TT_UINT; + inner->uint = (uint64_t)uint; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setDoubleValue + (JNIEnv *env, jobject this, jdouble dbl) +{ + HPT_UNWRAP(env, this); + //token_type? + inner->dbl = (double)dbl; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setFloatValue + (JNIEnv *env, jobject this, jfloat flt) +{ + HPT_UNWRAP(env, this); + //token_type? + inner->flt = (float)flt; +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_ParsedToken_setSeqValue + (JNIEnv *env, jobject this, jobjectArray values) +{ + HArena *arena; + size_t len, i; + jobject currentValue; + HParsedToken *currentValueInner; + HCountedArray *seq; + HPT_UNWRAP(env, this); + len = (size_t) (*env)->GetArrayLength(env, values); + arena = h_new_arena(&system_allocator, 0); + seq = h_carray_new_sized(arena, len); + + // unwrap each value and append it to the new HCountedArray + for(i = 0; i<len; i++) + { + currentValue = (*env)->GetObjectArrayElement(env, values, (jsize)i); + if(NULL == currentValue) + continue; + currentValueInner = unwrap_parsed_token(env, currentValue); + if(currentValueInner) + h_carray_append(seq, (void *)currentValueInner); + } + + inner->token_type = TT_SEQUENCE; + inner->seq = seq; +} diff --git a/jni/com_upstandinghackers_hammer_Parser.c b/jni/com_upstandinghackers_hammer_Parser.c new file mode 100644 index 0000000000000000000000000000000000000000..4e95e23455b6733ef74d0fdacb9731def2264b02 --- /dev/null +++ b/jni/com_upstandinghackers_hammer_Parser.c @@ -0,0 +1,15 @@ +#include "jhammer.h" +#include "com_upstandinghackers_hammer_Parser.h" + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_Parser_bindIndirect + (JNIEnv *env, jobject this, jobject parser) +{ + h_bind_indirect(UNWRAP(env, this), UNWRAP(env, parser)); +} + +JNIEXPORT void JNICALL Java_com_upstandinghackers_hammer_Parser_free + (JNIEnv *env, jobject this) +{ + //XXX NOT IMPLEMENTED + //h_free(UNWRAP(env, this)); +} diff --git a/jni/jhammer.h b/jni/jhammer.h new file mode 100644 index 0000000000000000000000000000000000000000..077777b723ec09867d30472d5da586a5c6aa9425 --- /dev/null +++ b/jni/jhammer.h @@ -0,0 +1,31 @@ +#ifndef JHAMMER_H +#define JHAMMER_H +#include <jni.h> +#include "internal.h" +#include <assert.h> + +// Unsafe (non-asserting) helpers +#define FIND_CLASS_(env, class) (*env)->FindClass(env, class) +#define REFCONSTRUCTOR_(env, class) (*env)->GetMethodID(env, class, "<init>", "(J)V") +#define NEW_INSTANCE_(env, class, inner) (*env)->NewObject(env, class, REFCONSTRUCTOR_(env, class), (jlong)inner) + +// Safer versions, assert that the result is not NULL +// If one of those asserts fails, it most likely means that there's a typo (wrong class name or method signature) or big trouble (OOM) +#define FIND_CLASS(target, env, class) target = FIND_CLASS_(env, class); assert(target != NULL) +#define REFCONSTRUCTOR(target, env, class) target = REFCONSTRUCTOR_(env, class); assert(target != NULL) +#define NEW_INSTANCE(target, env, class, inner) target = NEW_INSTANCE_(env, class, inner); assert(target != NULL) + + +// Since there's a LOT of wrapping/unwrapping HParsers, these macros make it a bit more readable +#define PARSER_CLASS "com/upstandinghackers/hammer/Parser" +#define PARSER_REF(env) (*env)->GetFieldID(env, FIND_CLASS_(env, PARSER_CLASS), "inner", "J") + +#define RETURNWRAP(env, inner) jclass __cls=FIND_CLASS_(env, PARSER_CLASS); \ + assert(__cls != NULL); \ + jmethodID __constructor = REFCONSTRUCTOR_(env, __cls); \ + assert(__constructor != NULL); \ + return (*env)->NewObject(env, __cls, __constructor, (jlong)inner) + +#define UNWRAP(env, object) (HParser *)((*env)->GetLongField(env, object, PARSER_REF(env))) + +#endif diff --git a/src/Makefile b/src/Makefile index 13cbf84b6180880ff467cd9077ba11a706bf0cd1..94690d36dd510997b31f5f0e7621886193a4992c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -42,6 +42,8 @@ HAMMER_PARTS := \ benchmark.o \ cfgrammar.o \ actions.o \ + compile.o \ + glue.o \ $(PARSERS:%=parsers/%.o) \ $(BACKENDS:%=backends/%.o) @@ -50,6 +52,7 @@ TESTS := t_benchmark.o \ t_bitwriter.o \ t_parser.o \ t_grammar.o \ + t_misc.o \ test_suite.o OUTPUTS := libhammer.a \ @@ -65,12 +68,15 @@ include ../common.mk $(TESTS): CFLAGS += $(TEST_CFLAGS) $(TESTS): LDFLAGS += $(TEST_LDFLAGS) +CFLAGS += -fPIC + all: libhammer.a libhammer.a: $(HAMMER_PARTS) bitreader.o: test_suite.h hammer.o: hammer.h +glue.o: hammer.h glue.h all: libhammer.a diff --git a/src/backends/packrat.c b/src/backends/packrat.c index 1b7acf291a6668296bbab673b5174d36b9e6cbeb..c5c9565f272caab47aeab2f59592bf93dd40d524 100644 --- a/src/backends/packrat.c +++ b/src/backends/packrat.c @@ -86,14 +86,18 @@ HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { if (!rec_detect->head) { HRecursionHead *some = a_new(HRecursionHead, 1); - some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; + some->head_parser = p; + some->involved_set = h_slist_new(state->arena); + some->eval_set = NULL; rec_detect->head = some; } assert(state->lr_stack->head != NULL); - HLeftRec *lr = state->lr_stack->head->elem; - while (lr && lr->rule != p) { + HSlistNode *head = state->lr_stack->head; + HLeftRec *lr; + while (head && (lr = head->elem)->rule != p) { lr->head = rec_detect->head; h_slist_push(lr->head->involved_set, (void*)lr->rule); + head = head->next; } } @@ -110,7 +114,7 @@ HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) HParseResult *old_res = old_cached->right->result; // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; + head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { diff --git a/src/datastructures.c b/src/datastructures.c index 9d91540b4919bc81f3b3fd4eacb0059ee30c1d4e..078104090940303ff747c3f9dc0473826bea679d 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -41,6 +41,26 @@ HSlist* h_slist_new(HArena *arena) { return ret; } +HSlist* h_slist_copy(HSlist *slist) { + HSlist *ret = h_slist_new(slist->arena); + HSlistNode *head = slist->head; + HSlistNode *tail; + if (head != NULL) { + h_slist_push(ret, head->elem); + tail = ret->head; + head = head->next; + } + while (head != NULL) { + // append head item to tail in a new node + HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + node->elem = head->elem; + node->next = NULL; + tail = tail->next = node; + head = head->next; + } + return ret; +} + void* h_slist_pop(HSlist *slist) { HSlistNode *head = slist->head; if (!head) diff --git a/src/glue.c b/src/glue.c new file mode 100644 index 0000000000000000000000000000000000000000..f1e086a5fa4d3153b9382ea6c09449f933e90e48 --- /dev/null +++ b/src/glue.c @@ -0,0 +1,177 @@ +#include "glue.h" +#include "../src/internal.h" // for h_carray_* + + +// The action equivalent of h_ignore. +const HParsedToken *h_act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *h_act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +// Action version of h_seq_flatten. +const HParsedToken *h_act_flatten(const HParseResult *p) { + return h_seq_flatten(p->arena, p->ast); +} + +// Low-level helper for the h_make family. +HParsedToken *h_make_(HArena *arena, HTokenType type) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + return ret; +} + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + +HParsedToken *h_make_seq(HArena *arena) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; +} + +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + +HParsedToken *h_make_bytes(HArena *arena, size_t len) +{ + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); + return ret; +} + +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) +{ + va_list va; + + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); + va_end(va); + + return ret; +} + +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) +{ + HParsedToken *ret = h_seq_index(p, i); + int j; + + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); + + return ret; +} + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i=0; i<ys->seq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; i<p->seq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} diff --git a/src/glue.h b/src/glue.h new file mode 100644 index 0000000000000000000000000000000000000000..3125ae07f308b363e25716777cc689c5e6c607c7 --- /dev/null +++ b/src/glue.h @@ -0,0 +1,253 @@ +// +// API additions for writing grammar and semantic actions more concisely +// +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// + +#ifndef HAMMER_GLUE__H +#define HAMMER_GLUE__H + +#include <assert.h> +#include "hammer.h" + + +// +// Grammar specification +// +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_<rulename>. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_<rulename>. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + + +// +// Pre-fab semantic actions +// +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// + +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } + + +// +// Working with HParsedTokens +// +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// + +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + +// Token constructors... + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); + +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) + +// Extract (cast) type-specific value back from HParsedTokens... + +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) + +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) + +// Sequence access... + +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + +// Access a sequence element by index. +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); + +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); + +// Convenience macros combining (nested) index access and h_cast. +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) + +// Standard short-hand to access and cast elements on a sequence token. +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) + +// Lower-level helper for h_seq_index. +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + +// Add elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + +// XXX TODO: Remove elements from a sequence. + +// Flatten nested sequences into one. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); + + +#endif diff --git a/src/hammer.h b/src/hammer.h index b872ee6a75379334b0f72c4f1212389a9b3edd85..ebbec52a8fcc4b439d435fb7b79599ccddfc0025 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -42,14 +42,15 @@ typedef enum HParserBackend_ { } HParserBackend; typedef enum HTokenType_ { - TT_NONE, - TT_BYTES, - TT_SINT, - TT_UINT, - TT_SEQUENCE, + // Before you change the explicit values of these, think of the poor bindings ;_; + TT_NONE = 1, + TT_BYTES = 2, + TT_SINT = 4, + TT_UINT = 8, + TT_SEQUENCE = 16, TT_RESERVED_1, // reserved for backend-specific internal use + TT_ERR = 32, TT_USER = 64, - TT_ERR, TT_MAX } HTokenType; @@ -60,13 +61,15 @@ typedef struct HCountedArray_ { struct HParsedToken_ **elements; } HCountedArray; +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; + typedef struct HParsedToken_ { HTokenType token_type; union { - struct { - const uint8_t *token; - size_t len; - } bytes; + HBytes bytes; int64_t sint; uint64_t uint; double dbl; @@ -175,14 +178,18 @@ typedef struct HBenchmarkResults_ { rtype_t name(__VA_ARGS__, ...); \ rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \ rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ - rtype_t name##__v(__VA_ARGS__, va_list ap) + rtype_t name##__v(__VA_ARGS__, va_list ap); \ + rtype_t name##__a(void *args[]); \ + rtype_t name##__ma(HAllocator *mm__, void *args[]) // Note: this drops the attributes on the floor for the __v versions #define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \ rtype_t name(__VA_ARGS__, ...) attr; \ rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \ rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ - rtype_t name##__v(__VA_ARGS__, va_list ap) + rtype_t name##__v(__VA_ARGS__, va_list ap); \ + rtype_t name##__a(void *args[]); \ + rtype_t name##__ma(HAllocator *mm__, void *args[]) // }}} diff --git a/src/internal.h b/src/internal.h index 251b2c6dcb317384c4686e95dfb02a90e3256ead..889e5c3b1c377d567a0d79fcc632d3dc8e8b36f7 100644 --- a/src/internal.h +++ b/src/internal.h @@ -243,6 +243,7 @@ HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); HSlist* h_slist_new(HArena *arena); +HSlist* h_slist_copy(HSlist *slist); void* h_slist_pop(HSlist *slist); void h_slist_push(HSlist *slist, void* item); bool h_slist_find(HSlist *slist, const void* item); diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 30cc725b051e836ba1d48508a506ea89e22cbb60..5485f2f35cdbee4a0513dab2200bf306255d6550 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -126,3 +126,27 @@ HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) { return h_new_parser(mm__, &choice_vt, s); } +HParser* h_choice__a(void *args[]) { + return h_choice__ma(&system_allocator, args); +} + +HParser* h_choice__ma(HAllocator* mm__, void *args[]) { + size_t len = -1; // because do...while + const HParser *arg; + + do { + arg=((HParser **)args)[++len]; + } while(arg); + + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(const HParser *, len); + + for (size_t i = 0; i < len; i++) { + s->p_array[i] = ((HParser **)args)[i]; + } + + s->len = len; + HParser *ret = h_new(HParser, 1); + ret->vtable = &choice_vt; ret->env = (void*)s; + return ret; +} diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index cf27ded7132ddcdb22f2fd6dd7d1b83b67432033..4de218339bde91d72918bc1d7b1dedfee681197c 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -144,3 +144,28 @@ HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) { s->len = len; return h_new_parser(mm__, &sequence_vt, s); } + +HParser* h_sequence__a(void *args[]) { + return h_sequence__ma(&system_allocator, args); +} + +HParser* h_sequence__ma(HAllocator* mm__, void *args[]) { + size_t len = -1; // because do...while + const HParser *arg; + + do { + arg=((HParser **)args)[++len]; + } while(arg); + + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(const HParser *, len); + + for (size_t i = 0; i < len; i++) { + s->p_array[i] = ((HParser **)args)[i]; + } + + s->len = len; + HParser *ret = h_new(HParser, 1); + ret->vtable = &sequence_vt; ret->env = (void*)s; + return ret; +} diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 73c558ef7d0a010953420bb72628008b324cd4d6..89ce23b0d992692fccec210484198d276935f7cd 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -9,7 +9,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { bak = state->input_stream; c = h_read_bits(&state->input_stream, 8, false); if (state->input_stream.overrun) - return NULL; + break; } while (isspace(c)); state->input_stream = bak; return h_do_parse((HParser*)env, state); diff --git a/src/pprint.c b/src/pprint.c index 1ffe764349f265f9d381ed10ae3f7567649bd3ec..d8b22e2d2fa1d317db15334b70021696c054dd9d 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -69,7 +69,11 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*sUSER\n", indent, ""); break; default: - assert_message(0, "Should not reach here."); + if(tok->token_type > TT_USER) { + fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER); + } else { + assert_message(0, "Should not reach here."); + } } } diff --git a/src/t_misc.c b/src/t_misc.c new file mode 100644 index 0000000000000000000000000000000000000000..5c08a2eb64a4db685bb841996d9471969233e70c --- /dev/null +++ b/src/t_misc.c @@ -0,0 +1,16 @@ +#include <glib.h> +#include "test_suite.h" +#include "hammer.h" + +static void test_tt_user(void) { + g_check_cmpint(TT_USER, >, TT_NONE); + g_check_cmpint(TT_USER, >, TT_BYTES); + g_check_cmpint(TT_USER, >, TT_SINT); + g_check_cmpint(TT_USER, >, TT_UINT); + g_check_cmpint(TT_USER, >, TT_SEQUENCE); + g_check_cmpint(TT_USER, >, TT_ERR); +} + +void register_misc_tests(void) { + g_test_add_func("/core/misc/tt_user", test_tt_user); +} diff --git a/src/t_parser.c b/src/t_parser.c index b525a1f46d88aadc18fefc920ae9e8cb5318a8c2..3071043f25720b7843fd8e354e1d2481e7d5826b 100644 --- a/src/t_parser.c +++ b/src/t_parser.c @@ -116,12 +116,17 @@ static void test_float32(gconstpointer backend) { static void test_whitespace(gconstpointer backend) { const HParser *whitespace_ = h_whitespace(h_ch('a')); + const HParser *whitespace_end = h_whitespace(h_end_p()); g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), " a", 2, "u0x61"); g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), " a", 3, "u0x61"); g_check_parse_ok(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "\ta", 2, "u0x61"); g_check_parse_failed(whitespace_, (HParserBackend)GPOINTER_TO_INT(backend), "_a", 2); + + g_check_parse_ok(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend), "", 0, "NULL"); + g_check_parse_ok(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend)," ", 2, "NULL"); + g_check_parse_failed(whitespace_end, (HParserBackend)GPOINTER_TO_INT(backend)," x", 3); } static void test_left(gconstpointer backend) { @@ -395,6 +400,17 @@ static void test_not(gconstpointer backend) { g_check_parse_ok(not_2, (HParserBackend)GPOINTER_TO_INT(backend), "a++b", 4, "(u0x61 <2b.2b> u0x62)"); } +static void test_leftrec(gconstpointer backend) { + const HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "a", 1, "u0x61"); + g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aa", 2, "(u0x61 u0x61)"); + g_check_parse_ok(lr_, (HParserBackend)GPOINTER_TO_INT(backend), "aaa", 3, "((u0x61 u0x61) u0x61)"); +} + void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/token", GINT_TO_POINTER(PB_PACKRAT), test_token); g_test_add_data_func("/core/parser/packrat/ch", GINT_TO_POINTER(PB_PACKRAT), test_ch); @@ -437,6 +453,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/packrat/and", GINT_TO_POINTER(PB_PACKRAT), test_and); g_test_add_data_func("/core/parser/packrat/not", GINT_TO_POINTER(PB_PACKRAT), test_not); g_test_add_data_func("/core/parser/packrat/ignore", GINT_TO_POINTER(PB_PACKRAT), test_ignore); + g_test_add_data_func("/core/parser/leftrec", GINT_TO_POINTER(PB_PACKRAT), test_leftrec); g_test_add_data_func("/core/parser/llk/token", GINT_TO_POINTER(PB_LLk), test_token); g_test_add_data_func("/core/parser/llk/ch", GINT_TO_POINTER(PB_LLk), test_ch); @@ -473,6 +490,7 @@ void register_parser_tests(void) { g_test_add_data_func("/core/parser/llk/epsilon_p", GINT_TO_POINTER(PB_LLk), test_epsilon_p); g_test_add_data_func("/core/parser/llk/attr_bool", GINT_TO_POINTER(PB_LLk), test_attr_bool); g_test_add_data_func("/core/parser/llk/ignore", GINT_TO_POINTER(PB_LLk), test_ignore); + g_test_add_data_func("/core/parser/leftrec", GINT_TO_POINTER(PB_LLk), test_leftrec); g_test_add_data_func("/core/parser/regex/token", GINT_TO_POINTER(PB_REGULAR), test_token); g_test_add_data_func("/core/parser/regex/ch", GINT_TO_POINTER(PB_REGULAR), test_ch); diff --git a/src/test_suite.c b/src/test_suite.c index 109c2e2f4ee16a498a926f377b15628c97fbff4b..e065f138ece71d5b09c4aa0e1060a2e944f1d283 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -23,6 +23,7 @@ extern void register_bitreader_tests(); extern void register_bitwriter_tests(); extern void register_parser_tests(); extern void register_grammar_tests(); +extern void register_misc_tests(); extern void register_benchmark_tests(); int main(int argc, char** argv) { @@ -33,6 +34,7 @@ int main(int argc, char** argv) { register_bitwriter_tests(); register_parser_tests(); register_grammar_tests(); + register_misc_tests(); register_benchmark_tests(); g_test_run();