From 670551a4279fdc45c43b276d01633eeff43ccc9d Mon Sep 17 00:00:00 2001 From: Pompolic <pompolic@special-circumstanc.es> Date: Thu, 13 Feb 2020 20:24:24 +0100 Subject: [PATCH] WIP: grammar definition for ASCIIHexDecode --- pdf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/pdf.c b/pdf.c index 23cf121..3e4cb37 100644 --- a/pdf.c +++ b/pdf.c @@ -222,6 +222,21 @@ act_hupper(const HParseResult *p, void *u) return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A'); } +HParsedToken * +act_ahextruncated(const HParseResult *p, void *u) +{ + uint8_t b = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + + /* Assumption: At this point seq->elements[0] is a hex digit + * and seq->elements[1] holds '>' (EOD) + */ + // XXX figure out how to compare to '>' + assert(seq->used == 2); + b = H_CAST_UINT(seq->elements[0]) << 4; + return H_MAKE_UINT(b); +} + HParsedToken * act_nat(const HParseResult *p, void *u) { @@ -1043,6 +1058,47 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p) return res; } +/* + * Decodes ASCII hexadecimal data into binary data. + * parms should be empty, because the filter has no parameters + */ +HParseResult * +ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p) +{ + HParseResult *res; + H_RULE(lwchar, IN(LWCHARS)); + H_ARULE(digit, h_ch_range('0', '9')); + H_ARULE(hlower, h_ch_range('a', 'f')); + H_ARULE(hupper, h_ch_range('A', 'F')); + H_RULE(ahexeod, h_ch('>')); + H_RULE(hdigit, CHX(digit, hlower, hupper)); + H_RULE(hdigitpair, h_middle(SEQ(lwchar),SEQ(hdigit,hdigit),SEQ(lwchar))); + H_ARULE(ahextruncated, SEQ(hdigit,ahexeod)); + + H_RULE(hs_end, CHX(hdigitpair, ahextruncated)); + H_RULE(hexstream, SEQ(h_many(hdigitpair),hs_end)); + + res = h_parse(hexstream, b.token, b.len); + if(!res) + { + fprintf(stderr, "parse error in ASCIIHexDecode filter\n"); + return NULL; + } + + return res; +} + +/* + * Decodes ASCII base-85 encoded data and produces binary data. + * parms should be empty, because the filter has no parameters + */ +HParseResult* +ASCII85Decode(const Dict *parms, HBytes b, HParser *p) +{ + fprintf(stderr, "ASCII85Decode: not implemented\n"); + return NULL; +} + /* * decode the bytes in 'b' according to metadata in the stream dictionary 'd' * and parse the result with 'p'. @@ -1066,11 +1122,15 @@ decode_stream(const Dict *d, HBytes b, HParser *p) return NULL; // XXX filter chains not supported, yet assert(v->token_type == TT_BYTES); if (bytes_eq(v->bytes, "FlateDecode")) - filter = FlateDecode; + filter = FlateDecode; // XXX add ASCIIHexDecode and ASCII85Decode here + else if (bytes_eq(v->bytes, "ASCIIHexDecode")) + filter = ASCIIHexDecode; + else if (bytes_eq(v->bytes, "ASCII85Decode")) + filter = ASCII85Decode; else return NULL; /* filter not supported */ - v = dictentry(d, "DecodeParms"); + v = dictentry(d, "DecodeParms"); // XXX ASCII filters don't use DecodeParms if (v && v->token_type == TT_Dict) parms = v->user; -- GitLab