From 5c354366b032fbd5d0c94300ff9e8d70690dd6a6 Mon Sep 17 00:00:00 2001
From: "plvines (corpora)" <paul.vines@baesystems.com>
Date: Thu, 27 Feb 2020 18:50:03 +0000
Subject: [PATCH] Relaxed xref spacing

---
 pdf.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/pdf.c b/pdf.c
index 7ee7759..c301021 100644
--- a/pdf.c
+++ b/pdf.c
@@ -555,12 +555,15 @@ act_viol(const HParseResult *p, void *viol)
 	else {
 	        severity = severity_parse->ast->seq->elements[0]->uint;
 	}
-	offset = p->ast->seq->elements[1]->uint / 8;
+	offset = p->ast->seq->elements[p->ast->seq->used-1]->uint / 8;
 	fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol);
 	if (strictness && severity > strictness) {
 	        exit(1);
 	}
 	/* Just return the parse AST, drop the h_tell */
+	if (p->ast->seq->used == 1) {
+		return (HParsedToken *) NULL;
+	}
 	return (HParsedToken *) p->ast->seq->elements[0];
 }
 
@@ -1112,7 +1115,11 @@ init_parser(struct Env *aux)
 	H_RULE(xrtyp,	CHX(h_ch('n'), h_ch('f')));
 	H_ARULE(xroff,	REP(digit, 10));
 	H_ARULE(xrgen,	REP(digit, 5));
-	H_ARULE(xrent,	SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol)));
+	H_ARULE(xrent,	SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)),
+	    xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)),
+	    xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"),
+	    xreol,
+	    VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)")))));
 	H_RULE(xrhead,	SEQ(nat, IGN(sp), nat, nl));
 	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
 	H_ARULE(xrefs,	SEQ(KW("xref"), nl, h_many(xrsub)));
@@ -1148,8 +1155,8 @@ init_parser(struct Env *aux)
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
 	H_RULE(hdr_junk,    CHX(comment,
-                                VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
-                                              "Uncommented junk after header (severity=1)")));
+	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
+	    "Uncommented junk after header (severity=1)")));
 	H_RULE(tail,	SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
 				      VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
 					       OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
-- 
GitLab