From 86cc2a8811c0ecaa347a5e512ea99e1bd18bda67 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Fri, 19 Jun 2020 20:40:13 +0200 Subject: [PATCH] ignore but log leading junk bytes --- pdf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pdf.c b/pdf.c index 6050dc3..129e159 100644 --- a/pdf.c +++ b/pdf.c @@ -1120,6 +1120,8 @@ init_parser(struct Env *aux) H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); + H_RULE(start_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)), + "Junk bytes before %PDF header (severity=1)")); H_RULE(hdr_junk, CHX(comment, VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), "Uncommented junk after header (severity=1)"))); @@ -1131,10 +1133,10 @@ init_parser(struct Env *aux) VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))), "Data after final %%EOF (severity=7)"), end)); - H_RULE(pdf, SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); + H_RULE(pdf, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); /* debug parser to consume as much as possible */ - H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); + H_RULE(pdfdbg, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); /* * filters -- GitLab