From 61f42f8863330652b6d9f0a822e1c90ffb8e5439 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Thu, 20 Feb 2020 21:01:04 +0100
Subject: [PATCH] add (disabled) object stream parser

---
 pdf.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 48 insertions(+), 8 deletions(-)

diff --git a/pdf.c b/pdf.c
index 3197c20..3f276a0 100644
--- a/pdf.c
+++ b/pdf.c
@@ -73,6 +73,20 @@ p_return_uint__m(HAllocator *mm__, uint64_t x)
 	return h_action__m(mm__, p_epsilon, act_return_uint, (void *)x);
 }
 
+/* like h_sepBy but parses a fixed number of elements */
+HParser *
+p_sepBy_n__m(HAllocator *mm__, HParser *p, HParser *sep, size_t n)
+{
+	if (n == 0)
+		return p_epsilon;
+
+	HParser *sep_p = h_sequence__m(mm__, sep, p, NULL);
+	HParser *tail = h_repeat_n__m(mm__, sep_p, n - 1);
+	HParser *seq = h_sequence__m(mm__, p, tail, NULL);
+
+	return h_action__m(mm__, seq, h_act_flatten, NULL);
+}
+
 /* a helper to compare an HBytes to a string */
 bool
 bytes_eq(HBytes b, const char *s)
@@ -705,8 +719,11 @@ HParser *p_xref;
 HParser *p_objdef;
 HParser *p_a85string;
 HParser *p_ahexstream;
-
 HParser *p_rldstring;
+HParser *p_ws;
+HParser *p_wel;
+HParser *p_elemr;
+HParser *p_npair;
 
 /* continuations for h_bind() */
 HParser *kstream(HAllocator *, const HParsedToken *, void *);
@@ -957,11 +974,6 @@ init_parser(struct Env *aux)
 				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
 	H_RULE(body,	h_many(objdef));
 
-	/* for object streams XXX -> p_objstm__m(...) */
-	// sepBy_n(p, sep,n) = SEQ(p, REP(SEQ(sep, p), n-1))
-	//H_RULE(osidx,	sepBy_n(npair, SEQ(wel,ws), N))
-	//H_RULE(objstm,	SEQ(ws, osidx, elemr, ws)); -- elemr from array
-
 	/* cross-reference section */
 	H_RULE(xreol,	CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
 		// ^ XXX does the real world follow this rule?! cf. loop.pdf
@@ -1063,6 +1075,10 @@ init_parser(struct Env *aux)
 	p_objdef = objdef;
 	p_a85string = a85string;
 	p_ahexstream = hexstream;
+	p_ws = ws;
+	p_wel = wel;
+	p_elemr = h_action(elemr, h_act_flatten, NULL);
+	p_npair = npair;
 
 	p_fail = h_nothing_p();
 	p_epsilon = epsilon;
@@ -1075,6 +1091,7 @@ init_parser(struct Env *aux)
 			     act_nat, NULL));
 	H_RULE(violsev, SEQ(IGN(viol_preamble), severity_num));
 	p_violsev = violsev;
+
 #if 0
 	// XXX testing
 	int r;
@@ -1617,8 +1634,8 @@ p_take__m(HAllocator *mm__, size_t n, struct Env *aux)
 	return h_left__m(mm__, bytes, skip);
 }
 
-HParser *
-p_xrefdata__m(HAllocator *mm__, const Dict *dict);
+HParser *p_xrefdata__m(HAllocator *, const Dict *);
+HParser *p_objstm__m(HAllocator *, const Dict *);
 
 HParser *
 p_stream_data__m(HAllocator *mm__, const Dict *dict)
@@ -1886,6 +1903,29 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict)
 	return h_sequence__ma(mm__, (void **)p_subs);
 }
 
+HParser *
+p_objstm__m(HAllocator *mm__, const Dict *dict)
+{
+	const HParsedToken *v;
+	size_t N;
+
+	v = dictentry(dict, "N");
+	if (v == NULL || v->token_type != TT_SINT || v->sint < 0 ||
+	    (uint64_t)v->sint > SIZE_MAX) {
+		fprintf(stderr, "missing /N on object stream\n");
+		return p_fail;
+	}
+	N = v->sint;
+
+	HParser *wel_ws = h_sequence__m(mm__, p_wel, p_ws, NULL);
+	HParser *idx = p_sepBy_n__m(mm__, p_npair, wel_ws, N);
+
+	return h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL);
+		// XXX leading and trailing ws OK?
+
+	// XXX consistency-check against /First, idx, /N
+}
+
 /*
  * This continuation is very similar to kstream, except that it does not
  * rely on /Length to consume the right amount of input. If /Length is
-- 
GitLab