From b4ee6122909f038f4b38293912e9d54b378d82eb Mon Sep 17 00:00:00 2001 From: Pompolic <pompolic@special-circumstanc.es> Date: Wed, 19 Feb 2020 16:41:12 +0100 Subject: [PATCH] Ascii85Decode semantic actions and grammar --- pdf.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 13 deletions(-) diff --git a/pdf.c b/pdf.c index cf3c308..1db9e37 100644 --- a/pdf.c +++ b/pdf.c @@ -240,7 +240,7 @@ act_ahextruncated(const HParseResult *p, void *u) HParsedToken * act_a85zero(const HParseResult *p, void *u) { - uint8_t b = 0; + uint32_t b = 0; return H_MAKE_UINT(b); } @@ -278,40 +278,118 @@ act_a85fivedigits(const HParseResult *p, void *u) return H_MAKE_UINT(fourbytes); } +/* Checking the following condition in the spec: + * The value represented by a group of 5 characters is greater than 2^32 - 1. + */ +bool +validate_a85fivedigits(HParseResult *p, void *u) +{ + /* "s8W-!" should be the highest accepted value */ + return H_CAST_UINT(p->ast) <= A85GRPMAX; +} + HParsedToken * act_a85group(const HParseResult *p, void *u) { + uint8_t *bytes = h_arena_malloc(p->arena, 4); + uint32_t fourbytes = H_CAST_UINT(p->ast); + + bytes[0] = (fourbytes & 0xFF000000) >> 24; + bytes[1] = (fourbytes & 0x00FF0000) >> 16; + bytes[2] = (fourbytes & 0x0000FF00) >> 8; + bytes[3] = (fourbytes & 0x000000FF); + HParsedToken *b = H_MAKE_BYTES(bytes, 4); + return b; } HParsedToken * act_a85partial2group(const HParseResult *p, void *u) { + uint64_t fourbytes = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + HParsedToken **digits = h_seq_elements(p->ast); + assert(seq->used == 2); + fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4)); + fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3)); + + assert(fourbytes <= A85GRPMAX); + return H_MAKE_UINT(fourbytes); +} + +bool +validate_a85partial2group(HParseResult *p, void *u) +{ + return H_CAST_UINT(p->ast) <= A85GRPMAX; } HParsedToken * act_a85partial3group(const HParseResult *p, void *u) { + uint64_t fourbytes = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + HParsedToken **digits = h_seq_elements(p->ast); + + assert(seq->used == 3); + fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4)); + fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3)); + fourbytes += H_CAST_UINT(digits[2]) * ((uint64_t) pow(85,2)); + assert(fourbytes <= A85GRPMAX); + return H_MAKE_UINT(fourbytes); +} + +bool +validate_a85partial3group(HParseResult *p, void *u) +{ + return H_CAST_UINT(p->ast) <= A85GRPMAX; } HParsedToken * act_a85partial4group(const HParseResult *p, void *u) { + uint64_t fourbytes = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + HParsedToken **digits = h_seq_elements(p->ast); + + assert(seq->used == 4); + fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4)); + fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3)); + fourbytes += H_CAST_UINT(digits[2]) * ((uint64_t) pow(85,2)); + fourbytes += H_CAST_UINT(digits[3]) * ((uint64_t) pow(85,1)); + assert(fourbytes <= A85GRPMAX); + return H_MAKE_UINT(fourbytes); } -/* Checking the following condition in the spec: - * The value represented by a group of 5 characters is greater than 2^32 - 1. -*/ bool -validate_a85fivedigits(HParseResult *p, void *u) +validate_a85partial4group(HParseResult *p, void *u) { - // XXX test with "s8W-!" return H_CAST_UINT(p->ast) <= A85GRPMAX; } +HParsedToken * +act_a85partialgroup(const HParseResult *p, void *u) +{ + uint8_t bytes_helper[4]; + size_t bytes_used = 1; + uint8_t *bytes; + + uint32_t fourbytes = H_CAST_UINT(p->ast); + + for (size_t i; i < 4; ++i) + { + bytes_helper[i] = (fourbytes >> (3-i * 8)) & 0xFF; + bytes_used += 1; + } + + bytes = h_arena_malloc(p->arena, bytes_used); + return H_MAKE_BYTES(bytes, bytes_used); +} + +// TODO: flatten sequence in a85string semantic action + HParsedToken * act_nat(const HParseResult *p, void *u) { @@ -1186,15 +1264,16 @@ ASCII85Decode(const Dict *parms, HBytes b, HParser *p) /* This encoding of zero is not allowed */ H_RULE(a85fiveexcl, h_repeat_n(MANY_LWS(h_ch('!')), 5)); H_VARULE(a85fivedigits, SEQ(h_and(h_not(a85fiveexcl)), h_repeat_n(MANY_LWS(a85digit), 5))); - //H_RULE(a85digitws, SEQ(a85digit, OPT( - H_ARULE(a85group, CHX(a85zero, h_repeat_n(MANY_LWS(a85digit), 5))); - // XXX semantic actions need cleaning - H_ARULE(a85partial2group, h_repeat_n(MANY_LWS(a85digit), 2)); - H_ARULE(a85partial3group, h_repeat_n(MANY_LWS(a85digit), 3)); - H_ARULE(a85partial4group, h_repeat_n(MANY_LWS(a85digit), 4)); + H_ARULE(a85group, CHX(a85zero, a85fivedigits)); + + H_VARULE(a85partial2group, h_repeat_n(MANY_LWS(a85digit), 2)); + H_VARULE(a85partial3group, h_repeat_n(MANY_LWS(a85digit), 3)); + H_VARULE(a85partial4group, h_repeat_n(MANY_LWS(a85digit), 4)); + + H_ARULE(a85partialgroup, CHX(a85partial4group, a85partial3group, a85partial4group)); - H_RULE(a85string, SEQ(h_many(a85group), OPT(CHX(a85partial2group, a85partial3group, a85partial4group)), a85eod)); + H_RULE(a85string, SEQ(h_many(a85group), OPT(a85partialgroup), IGN(a85eod))); res = h_parse(a85string, b.token, b.len); if(!res) -- GitLab