From 7cd59c303c3e62c9cb13645fdb86914abdffbac3 Mon Sep 17 00:00:00 2001 From: "plvines (corpora)" <paul.vines@baesystems.com> Date: Mon, 17 Feb 2020 17:41:44 +0000 Subject: [PATCH] Added relaxation/violation for missing or multiple endobj TODO: would like to clean up so it doesn't get triggered twice (one of which has the object byte offset included) --- pdf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pdf.c b/pdf.c index 9ad84b3..ce648cb 100644 --- a/pdf.c +++ b/pdf.c @@ -661,7 +661,11 @@ init_parser(struct Env *aux) /* body */ H_RULE(indobj, CHX(stream, obj)); - H_RULE(objdef, SEQ(ws, npair, wel, KW("obj"), ws, indobj, KW("endobj"))); + H_RULE(objdef, SEQ(ws, npair, wel, KW("obj"), ws, indobj, + CHX(VIOL(SEQ(KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))), + "More than 1 endobj token (severity=1)"), + KW("endobj"), + VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)")))); H_RULE(body, h_many(objdef)); /* for object streams */ -- GitLab