From b4ee6122909f038f4b38293912e9d54b378d82eb Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Wed, 19 Feb 2020 16:41:12 +0100
Subject: [PATCH] Ascii85Decode semantic actions and grammar

---
 pdf.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 92 insertions(+), 13 deletions(-)

diff --git a/pdf.c b/pdf.c
index cf3c308..1db9e37 100644
--- a/pdf.c
+++ b/pdf.c
@@ -240,7 +240,7 @@ act_ahextruncated(const HParseResult *p, void *u)
 HParsedToken *
 act_a85zero(const HParseResult *p, void *u)
 {
-	uint8_t b = 0;
+	uint32_t b = 0;
 	return H_MAKE_UINT(b);
 }
 
@@ -278,40 +278,118 @@ act_a85fivedigits(const HParseResult *p, void *u)
 	return H_MAKE_UINT(fourbytes);
 }
 
+/* Checking the following condition in the spec:
+ * The value represented by a group of 5 characters is greater than 2^32 - 1.
+ */
+bool
+validate_a85fivedigits(HParseResult *p, void *u)
+{
+	/* "s8W-!" should be the highest accepted value */
+	return H_CAST_UINT(p->ast) <= A85GRPMAX;
+}
+
 HParsedToken *
 act_a85group(const HParseResult *p, void *u)
 {
+	uint8_t *bytes = h_arena_malloc(p->arena, 4);
+	uint32_t fourbytes = H_CAST_UINT(p->ast);
+
+	bytes[0] = (fourbytes & 0xFF000000) >> 24;
+	bytes[1] = (fourbytes & 0x00FF0000) >> 16;
+	bytes[2] = (fourbytes & 0x0000FF00) >> 8;
+	bytes[3] = (fourbytes & 0x000000FF);
 
+	HParsedToken *b = H_MAKE_BYTES(bytes, 4);
+	return b;
 }
 
 HParsedToken *
 act_a85partial2group(const HParseResult *p, void *u)
 {
+	uint64_t fourbytes = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+	HParsedToken **digits = h_seq_elements(p->ast);
 
+	assert(seq->used == 2);
+	fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4));
+	fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3));
+
+	assert(fourbytes <= A85GRPMAX);
+	return H_MAKE_UINT(fourbytes);
+}
+
+bool
+validate_a85partial2group(HParseResult *p, void *u)
+{
+	return H_CAST_UINT(p->ast) <= A85GRPMAX;
 }
 
 HParsedToken *
 act_a85partial3group(const HParseResult *p, void *u)
 {
+	uint64_t fourbytes = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+	HParsedToken **digits = h_seq_elements(p->ast);
+
+	assert(seq->used == 3);
+	fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4));
+	fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3));
+	fourbytes += H_CAST_UINT(digits[2]) * ((uint64_t) pow(85,2));
 
+	assert(fourbytes <= A85GRPMAX);
+	return H_MAKE_UINT(fourbytes);
+}
+
+bool
+validate_a85partial3group(HParseResult *p, void *u)
+{
+	return H_CAST_UINT(p->ast) <= A85GRPMAX;
 }
 
 HParsedToken *
 act_a85partial4group(const HParseResult *p, void *u)
 {
+	uint64_t fourbytes = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+	HParsedToken **digits = h_seq_elements(p->ast);
+
+	assert(seq->used == 4);
+	fourbytes += H_CAST_UINT(digits[0]) * ((uint64_t) pow(85,4));
+	fourbytes += H_CAST_UINT(digits[1]) * ((uint64_t) pow(85,3));
+	fourbytes += H_CAST_UINT(digits[2]) * ((uint64_t) pow(85,2));
+	fourbytes += H_CAST_UINT(digits[3]) * ((uint64_t) pow(85,1));
 
+	assert(fourbytes <= A85GRPMAX);
+	return H_MAKE_UINT(fourbytes);
 }
 
-/* Checking the following condition in the spec:
- * The value represented by a group of 5 characters is greater than 2^32 - 1.
-*/
 bool
-validate_a85fivedigits(HParseResult *p, void *u)
+validate_a85partial4group(HParseResult *p, void *u)
 {
-	// XXX test with "s8W-!"
 	return H_CAST_UINT(p->ast) <= A85GRPMAX;
 }
 
+HParsedToken *
+act_a85partialgroup(const HParseResult *p, void *u)
+{
+	uint8_t bytes_helper[4];
+	size_t bytes_used = 1;
+	uint8_t *bytes;
+
+	uint32_t fourbytes = H_CAST_UINT(p->ast);
+
+	for (size_t i; i < 4; ++i)
+	{
+		bytes_helper[i] = (fourbytes >> (3-i * 8)) & 0xFF;
+		bytes_used += 1;
+	}
+
+	bytes = h_arena_malloc(p->arena, bytes_used);
+	return H_MAKE_BYTES(bytes, bytes_used);
+}
+
+// TODO: flatten sequence in a85string semantic action
+
 HParsedToken *
 act_nat(const HParseResult *p, void *u)
 {
@@ -1186,15 +1264,16 @@ ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
 	/* This encoding of zero is not allowed */
 	H_RULE(a85fiveexcl, h_repeat_n(MANY_LWS(h_ch('!')), 5));
 	H_VARULE(a85fivedigits,	SEQ(h_and(h_not(a85fiveexcl)), h_repeat_n(MANY_LWS(a85digit), 5)));
-	//H_RULE(a85digitws, SEQ(a85digit, OPT(
-	H_ARULE(a85group,	CHX(a85zero, h_repeat_n(MANY_LWS(a85digit), 5)));
-	// XXX semantic actions need cleaning
 
-	H_ARULE(a85partial2group,	h_repeat_n(MANY_LWS(a85digit), 2));
-	H_ARULE(a85partial3group,	h_repeat_n(MANY_LWS(a85digit), 3));
-	H_ARULE(a85partial4group,	h_repeat_n(MANY_LWS(a85digit), 4));
+	H_ARULE(a85group,	CHX(a85zero, a85fivedigits));
+
+	H_VARULE(a85partial2group,	h_repeat_n(MANY_LWS(a85digit), 2));
+	H_VARULE(a85partial3group,	h_repeat_n(MANY_LWS(a85digit), 3));
+	H_VARULE(a85partial4group,	h_repeat_n(MANY_LWS(a85digit), 4));
+
+	H_ARULE(a85partialgroup, CHX(a85partial4group, a85partial3group, a85partial4group));
 
-	H_RULE(a85string,	SEQ(h_many(a85group), OPT(CHX(a85partial2group, a85partial3group, a85partial4group)), a85eod));
+	H_RULE(a85string,	SEQ(h_many(a85group), OPT(a85partialgroup), IGN(a85eod)));
 
 	res = h_parse(a85string, b.token, b.len);
 	if(!res)
-- 
GitLab