From 670551a4279fdc45c43b276d01633eeff43ccc9d Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Thu, 13 Feb 2020 20:24:24 +0100
Subject: [PATCH] WIP: grammar definition for ASCIIHexDecode

---
 pdf.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/pdf.c b/pdf.c
index 23cf121..3e4cb37 100644
--- a/pdf.c
+++ b/pdf.c
@@ -222,6 +222,21 @@ act_hupper(const HParseResult *p, void *u)
 	return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A');
 }
 
+HParsedToken *
+act_ahextruncated(const HParseResult *p, void *u)
+{
+	uint8_t b = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+
+	/* Assumption: At this point seq->elements[0] is a hex digit
+	 * and seq->elements[1] holds '>' (EOD)
+	 */
+	// XXX figure out how to compare to '>'
+	assert(seq->used == 2);
+	b = H_CAST_UINT(seq->elements[0]) << 4;
+	return H_MAKE_UINT(b);
+}
+
 HParsedToken *
 act_nat(const HParseResult *p, void *u)
 {
@@ -1043,6 +1058,47 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p)
 	return res;
 }
 
+/*
+ * Decodes ASCII hexadecimal data into binary data.
+ * parms should be empty, because the filter has no parameters
+ */
+HParseResult *
+ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
+{
+	HParseResult *res;
+	H_RULE(lwchar,	IN(LWCHARS));
+	H_ARULE(digit,	h_ch_range('0', '9'));
+	H_ARULE(hlower,	h_ch_range('a', 'f'));
+	H_ARULE(hupper,	h_ch_range('A', 'F'));
+	H_RULE(ahexeod,	h_ch('>'));
+	H_RULE(hdigit,	CHX(digit, hlower, hupper));
+	H_RULE(hdigitpair, h_middle(SEQ(lwchar),SEQ(hdigit,hdigit),SEQ(lwchar)));
+	H_ARULE(ahextruncated, SEQ(hdigit,ahexeod));
+
+	H_RULE(hs_end, CHX(hdigitpair, ahextruncated));
+	H_RULE(hexstream, SEQ(h_many(hdigitpair),hs_end));
+
+	res = h_parse(hexstream, b.token, b.len);
+	if(!res)
+	{
+		fprintf(stderr, "parse error in ASCIIHexDecode filter\n");
+		return NULL;
+	}
+
+	return res;
+}
+
+/*
+ * Decodes ASCII base-85 encoded data and produces binary data.
+ * parms should be empty, because the filter has no parameters
+ */
+HParseResult*
+ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
+{
+	fprintf(stderr, "ASCII85Decode: not implemented\n");
+	return NULL;
+}
+
 /*
  * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
  * and parse the result with 'p'.
@@ -1066,11 +1122,15 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 		return NULL;	// XXX filter chains not supported, yet
 	assert(v->token_type == TT_BYTES);
 	if (bytes_eq(v->bytes, "FlateDecode"))
-		filter = FlateDecode;
+		filter = FlateDecode; // XXX add ASCIIHexDecode and ASCII85Decode here
+	else if (bytes_eq(v->bytes, "ASCIIHexDecode"))
+		filter = ASCIIHexDecode;
+	else if (bytes_eq(v->bytes, "ASCII85Decode"))
+		filter = ASCII85Decode;
 	else
 		return NULL;		/* filter not supported */
 
-	v = dictentry(d, "DecodeParms");
+	v = dictentry(d, "DecodeParms"); // XXX ASCII filters don't use DecodeParms
 	if (v && v->token_type == TT_Dict)
 		parms = v->user;
 
-- 
GitLab