From f7dbb2ac23f8a4478ff097f281e2a5d8192cc82e Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Sun, 9 Feb 2020 02:35:46 +0100
Subject: [PATCH] make whitespace explicit in arrays and dictionaries

---
 pdf.c | 49 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/pdf.c b/pdf.c
index 1f869c1..686cd7b 100644
--- a/pdf.c
+++ b/pdf.c
@@ -441,13 +441,16 @@ validate_xrstm(HParseResult *p, void *u)
 }
 
 HParsedToken *
-act_dict(const HParseResult *p, void *env)
+act_dict_(const HParseResult *p, void *env)
 {
 	Dict *dict = H_CAST_SEQ(p->ast);
 
 	return H_MAKE(Dict, dict);
 }
 
+#define act_array_ h_act_flatten
+
+
 /*
  * input grammar
  */
@@ -511,8 +514,8 @@ init_parser(struct Env *aux)
 
 	/* whitespace */
 	H_RULE(comment,	SEQ(percent, line));
-	H_RULE(wel,	IGN(CHX(wchar, comment));
-	H_RULE(ws,	IGN(h_many(wel))));
+	H_RULE(wel,	IGN(CHX(wchar, comment)));
+	H_RULE(ws,	IGN(h_many(wel)));
 	H_RULE(lws,	IGN(h_many(IGN(lwchar))));
 
 	/* tokenization */
@@ -571,17 +574,39 @@ init_parser(struct Env *aux)
 	H_ARULE(string,	CHX(litstr, hexstr));
 	h_bind_indirect(snest, snest_);
 
-	/* arrays and dictionaries */
+	H_RULE(array,	h_indirect());
+	H_RULE(dict,	h_indirect());
+
+	/* classify objects by whether they start/end with a delimiter: */
+	H_RULE(robj,	CHX(ref, null, boole, real, intg));	/* rchars */
+	H_RULE(dobj,	CHX(string, array, dict));		/* dchars */
+	H_RULE(obj,	CHX(robj, name, dobj));
+
+	/* dictionaries */
 	H_RULE(dopen,	LIT("<<"));
 	H_RULE(dclose,	LIT(">>"));
-	H_RULE(obj,	h_indirect());
-	H_RULE(k_v,	SEQ(name, ws, obj));
-	//H_RULE(k_v,	CHX(SEQ(name, wel,ws, obj),
-	//		    SEQ(name, dobj)));
-	H_ARULE(dict,	h_middle(dopen, MANY_WS(k_v), dclose));
+	H_RULE(k_v,	CHX(SEQ(name, wel,ws, obj),
+			    SEQ(name, CHX(name,dobj))));
+	H_ARULE(dict_,	h_middle(dopen, MANY_WS(k_v), dclose));
 		// XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
-	H_RULE(array,	h_middle(lbrack, MANY_WS(obj), rbrack));
 		// XXX validate: dict keys must be unique
+	h_bind_indirect(dict, dict_);
+
+	/* arrays */
+	H_RULE(elemd,	h_indirect());	/* elements following a delimiter */
+	H_RULE(elemr,	h_indirect());	/* elements following a regular char */
+	H_ARULE(array_,	h_middle(lbrack, elemd, rbrack));
+	H_RULE(elemd_,	CHX(SEQ(ws, dobj, elemd),
+			    SEQ(ws, name, elemr),
+			    SEQ(ws, robj, elemr),
+			    SEQ(ws)));	// XXX ws without SEQ() should suffice
+	H_RULE(elemr_,	CHX(SEQ(ws, dobj, elemd),
+			    SEQ(ws, name, elemr),
+			    SEQ(wel,ws, robj, elemr),
+			    SEQ(ws)));	// XXX ws without SEQ() should suffice
+	h_bind_indirect(elemd, elemd_);
+	h_bind_indirect(elemr, elemr_);
+	h_bind_indirect(array, array_);
 
 	/* streams */
 	H_RULE(stmbeg,	SEQ(dict, ws, LIT("stream"), OPT(cr), lf));
@@ -589,10 +614,6 @@ init_parser(struct Env *aux)
 	H_RULE(stream,	h_left(h_bind(stmbeg, kstream, aux), stmend));
 		// XXX is whitespace allowed between the eol and "endstream"?
 
-	H_RULE(obj_,	CHX(ref, null, boole, real, intg, name, string,
-			    array, dict));
-	h_bind_indirect(obj, obj_);
-
 	/*
 	 * file structure
 	 */
-- 
GitLab