From b48d7f27b6a8f71331d25029d61b46b815762b3d Mon Sep 17 00:00:00 2001
From: "plvines (corpora)" <paul.vines@baesystems.com>
Date: Sat, 15 Feb 2020 22:29:57 +0000
Subject: [PATCH] Added relaxation/violation for ANY DATA (not just newlines)
 after final %%EOF

---
 pdf.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/pdf.c b/pdf.c
index 96f6879..01d13bc 100644
--- a/pdf.c
+++ b/pdf.c
@@ -700,9 +700,11 @@ init_parser(struct Env *aux)
 		// XXX should lws be allowed before EOF marker?
 		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
 			    LIT("%%EOF"),
-			    CHX(VIOL(SEQ(nl, h_many1(nl)),
+			    CHX(VIOL(SEQ(nl, h_many1(nl), end),
 				     "(offset FROM END) Multiple newlines after final %%EOF (severity=4)"),
-				h_many(nl))));
+				SEQ(h_many(nl), end),
+				VIOL(SEQ(h_many1(h_ch_range(0, 255)), end),
+				     "(offset FROM END) Data after final  %%EOF (severity=7)"))));
 	
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
@@ -711,12 +713,13 @@ init_parser(struct Env *aux)
 	H_RULE(tail,	SEQ(body, h_optional(xr_td), startxr));
 		// XXX the real world likes to omit 'startxr' from all but the
 		// last trailer. we should accept-and-warn in that case.
-	H_RULE(final_eof_junk, CHX(VIOL(h_many1(nl), "Multiple newlines after final %%EOF (severity=4)"),
+	H_RULE(final_eof_junk, CHX(VIOL(SEQ(h_many1(nl), end), "Multiple newlines after final %%EOF (severity=4)"),
+				   VIOL(h_many1(h_ch_range(0, 255)), "Data after final %%EOF (severity=7)"), 
 				   end));
 	H_RULE(pdf,	SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(header, h_many(tail), body, OPT(xr_td), OPT(startxr)));
+	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(startxr), final_eof_junk));
 
 
 	/* global parser variables */
-- 
GitLab