From 816e1726638c4cc40720bc0cd4d20d71d919e0c5 Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Wed, 12 Jan 2022 14:32:53 +0100
Subject: [PATCH] Enable validations for closing brackets

---
 pdf.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pdf.c b/pdf.c
index f8aa810..c824eba 100644
--- a/pdf.c
+++ b/pdf.c
@@ -1170,15 +1170,17 @@ act_rldstring(const HParseResult *p, void *u)
 /*
  * The following functions implement a fix for being able to exhaust memory by nesting strings, objects and dicts too deeply.
  * A counter in aux (separately for each type of nesting) is incremented on each opening token, and decremented on each closing token. 
- * The counters are not decremented when it is 0 to avoid underflow.
+ * The counters are not decremented when they are 0 to avoid underflow.
  *
  * The counters are global across the document (but respect document structure,
  * e.g. parentheses in streams don't count, unless for some reason they're parsed with the lparen rule.)
- * The validations make the parse fail if this nesting depth is exceeded.
+ * The lparen, dopen, and array_begin validations make the parse fail if this nesting depth is exceeded.
  * Because currently there are no diagnostic messages, this can probably result in unexpected parses.
  *
  * validate_rparen, validate_array_end, and validate_dclose will make the parse fail if a respective closing token ( ')', ']', '>>' )
- * is encountered before an lparen, array_begin, or dopen.
+ * is encountered before an lparen, array_begin, or dopen (or there are more closing parentheses than opening ones.) This is not exactly
+ * the same as balanced parentheses: the order different types of parentheses appear in is not considered. Different brackets can be considered
+ * independently because other H_RULEs already reject cases with interleaved opening/closing tokens such as "<< [ /Foo /Bar >> ]".
  */
 
 #define PAREN_MAX_NEST_DEPTH 256
@@ -2578,8 +2580,8 @@ init_parser(struct Env *aux)
 	H_RULE(obj,	CHX(robj, name, dobj));
 
 	/* dictionaries */
-	H_AVDRULE(dopen,	LIT("<<"), aux); // XXX: limit nesting
-	H_ADRULE(dclose,	LIT(">>"), aux);
+	H_AVDRULE(dopen,	LIT("<<"), aux);
+	H_AVDRULE(dclose,	LIT(">>"), aux);
 	H_RULE(k_v,	CHX(CHX(SEQ(name, wel,ws, obj),
 				SEQ(name, CHX(name,dobj))),
 			    VIOL(SEQ(name, wel,ws), "Key with no value (severity=2)")));
@@ -2591,8 +2593,9 @@ init_parser(struct Env *aux)
 	/* arrays */
 	H_RULE(elemd,	h_indirect());	/* elements following a delimiter */
 	H_RULE(elemr,	h_indirect());	/* elements following a regular char */
-	H_AVDRULE(array_begin, lbrack, aux); // XXX: these rules exist to hold the nesting depth counter without affecting text processing (as the rules lbrack/rbrack are used there too)
-	H_ADRULE(array_end, rbrack, aux);
+	H_AVDRULE(array_begin, lbrack, aux); /* semantic action and validation implementing a limit on how deeply arrays can be nested */
+
+	H_AVDRULE(array_end, rbrack, aux); /* nesting limit and counter underflow check */
 	H_ARULE(array_,	h_middle(array_begin, elemd, array_end));
 	H_RULE(elemd_,	CHX(SEQ(ws, dobj, elemd),
 			    SEQ(ws, name, elemr),
-- 
GitLab