From 64149ba8340282ff816910c36adef0b987e0f463 Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Mon, 18 Oct 2021 21:57:54 +0200
Subject: [PATCH] Header fix from Sumit

Original message:
Author: sumit.ray@baesystems.com <sumit.ray@baesystems.com>
Date:   Wed Sep 15 15:13:25 2021 -0400

    Updated header to indicate a violation if followed by a white space character, rather than an eol.
---
 pdf.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pdf.c b/pdf.c
index a54c992..0fad601 100644
--- a/pdf.c
+++ b/pdf.c
@@ -2526,7 +2526,9 @@ init_parser(struct Env *aux)
 
 	/* header */
 	H_RULE(version,	SEQ(pdigit, IGN(period), digit));
-	H_ARULE(header,	h_middle(LIT("%PDF-"), version, lws));
+	// H_ARULE(header,	h_middle(LIT("%PDF-"), version, lws));
+	H_RULE(header, h_right(LIT("%PDF-"), version));
+	H_RULE(headercmt, SEQ(header, CHX(nl, VIOL(lws, "Header comment terminated with whitespace instead of EOL (severity=1)"))));
 
 	/* body */
 	H_RULE(indobj,	CHX(stream, obj));
@@ -2587,7 +2589,7 @@ init_parser(struct Env *aux)
 
 	H_ARULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
-	H_ARULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
+	H_RULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), headercmt)),
 	                        "Junk bytes before %PDF header (severity=1)"));
 	H_RULE(hdr_junk,    CHX(comment,
 	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
@@ -2600,10 +2602,10 @@ init_parser(struct Env *aux)
 				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
 					"Data after final %%EOF (severity=7)"),
 				   end));
-	H_RULE(pdf,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+	H_RULE(pdf,	SEQ(OPT(start_junk), headercmt, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+	H_RULE(pdfdbg,	SEQ(OPT(start_junk), headercmt, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
 
 
-- 
GitLab