From 7cd59c303c3e62c9cb13645fdb86914abdffbac3 Mon Sep 17 00:00:00 2001
From: "plvines (corpora)" <paul.vines@baesystems.com>
Date: Mon, 17 Feb 2020 17:41:44 +0000
Subject: [PATCH] Added relaxation/violation for missing or multiple endobj
 TODO: would like to clean up so it doesn't get triggered twice (one of which
 has the object byte offset included)

---
 pdf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pdf.c b/pdf.c
index 9ad84b3..ce648cb 100644
--- a/pdf.c
+++ b/pdf.c
@@ -661,7 +661,11 @@ init_parser(struct Env *aux)
 
 	/* body */
 	H_RULE(indobj,	CHX(stream, obj));
-	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj, KW("endobj")));
+	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj,
+			    CHX(VIOL(SEQ(KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))),
+				     "More than 1 endobj token (severity=1)"),
+				KW("endobj"),
+				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
 	H_RULE(body,	h_many(objdef));
 
 	/* for object streams */
-- 
GitLab