diff --git a/pdf.c b/pdf.c index a54c9924b4a642343b754447ae9fcd65e019bfda..0fad60109d3e1fcf21aedc620bf6c245104d872c 100644 --- a/pdf.c +++ b/pdf.c @@ -2526,7 +2526,9 @@ init_parser(struct Env *aux) /* header */ H_RULE(version, SEQ(pdigit, IGN(period), digit)); - H_ARULE(header, h_middle(LIT("%PDF-"), version, lws)); + // H_ARULE(header, h_middle(LIT("%PDF-"), version, lws)); + H_RULE(header, h_right(LIT("%PDF-"), version)); + H_RULE(headercmt, SEQ(header, CHX(nl, VIOL(lws, "Header comment terminated with whitespace instead of EOL (severity=1)")))); /* body */ H_RULE(indobj, CHX(stream, obj)); @@ -2587,7 +2589,7 @@ init_parser(struct Env *aux) H_ARULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); - H_ARULE(start_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)), + H_RULE(start_junk, VIOL(h_many1(h_butnot(h_ch_range(0, 255), headercmt)), "Junk bytes before %PDF header (severity=1)")); H_RULE(hdr_junk, CHX(comment, VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), @@ -2600,10 +2602,10 @@ init_parser(struct Env *aux) VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))), "Data after final %%EOF (severity=7)"), end)); - H_RULE(pdf, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); + H_RULE(pdf, SEQ(OPT(start_junk), headercmt, OPT(hdr_junk), h_many1(tail), final_eof_junk)); /* debug parser to consume as much as possible */ - H_RULE(pdfdbg, SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); + H_RULE(pdfdbg, SEQ(OPT(start_junk), headercmt, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));