From 86cc2a8811c0ecaa347a5e512ea99e1bd18bda67 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Fri, 19 Jun 2020 20:40:13 +0200
Subject: [PATCH] ignore but log leading junk bytes

---
 pdf.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pdf.c b/pdf.c
index 6050dc3..129e159 100644
--- a/pdf.c
+++ b/pdf.c
@@ -1120,6 +1120,8 @@ init_parser(struct Env *aux)
 
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
+	H_RULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
+	                        "Junk bytes before %PDF header (severity=1)"));
 	H_RULE(hdr_junk,    CHX(comment,
 	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
 	    "Uncommented junk after header (severity=1)")));
@@ -1131,10 +1133,10 @@ init_parser(struct Env *aux)
 				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
 					"Data after final %%EOF (severity=7)"),
 				   end));
-	H_RULE(pdf,	SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+	H_RULE(pdf,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
 	/*
 	 * filters
-- 
GitLab