From b48d7f27b6a8f71331d25029d61b46b815762b3d Mon Sep 17 00:00:00 2001 From: "plvines (corpora)" <paul.vines@baesystems.com> Date: Sat, 15 Feb 2020 22:29:57 +0000 Subject: [PATCH] Added relaxation/violation for ANY DATA (not just newlines) after final %%EOF --- pdf.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pdf.c b/pdf.c index 96f6879..01d13bc 100644 --- a/pdf.c +++ b/pdf.c @@ -700,9 +700,11 @@ init_parser(struct Env *aux) // XXX should lws be allowed before EOF marker? // NB: lws before xref offset is allowed, cf. p.48 (example 4) LIT("%%EOF"), - CHX(VIOL(SEQ(nl, h_many1(nl)), + CHX(VIOL(SEQ(nl, h_many1(nl), end), "(offset FROM END) Multiple newlines after final %%EOF (severity=4)"), - h_many(nl)))); + SEQ(h_many(nl), end), + VIOL(SEQ(h_many1(h_ch_range(0, 255)), end), + "(offset FROM END) Data after final %%EOF (severity=7)")))); H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); @@ -711,12 +713,13 @@ init_parser(struct Env *aux) H_RULE(tail, SEQ(body, h_optional(xr_td), startxr)); // XXX the real world likes to omit 'startxr' from all but the // last trailer. we should accept-and-warn in that case. - H_RULE(final_eof_junk, CHX(VIOL(h_many1(nl), "Multiple newlines after final %%EOF (severity=4)"), + H_RULE(final_eof_junk, CHX(VIOL(SEQ(h_many1(nl), end), "Multiple newlines after final %%EOF (severity=4)"), + VIOL(h_many1(h_ch_range(0, 255)), "Data after final %%EOF (severity=7)"), end)); H_RULE(pdf, SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); /* debug parser to consume as much as possible */ - H_RULE(pdfdbg, SEQ(header, h_many(tail), body, OPT(xr_td), OPT(startxr))); + H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(startxr), final_eof_junk)); /* global parser variables */ -- GitLab