From 5c354366b032fbd5d0c94300ff9e8d70690dd6a6 Mon Sep 17 00:00:00 2001 From: "plvines (corpora)" <paul.vines@baesystems.com> Date: Thu, 27 Feb 2020 18:50:03 +0000 Subject: [PATCH] Relaxed xref spacing --- pdf.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pdf.c b/pdf.c index 7ee7759..c301021 100644 --- a/pdf.c +++ b/pdf.c @@ -555,12 +555,15 @@ act_viol(const HParseResult *p, void *viol) else { severity = severity_parse->ast->seq->elements[0]->uint; } - offset = p->ast->seq->elements[1]->uint / 8; + offset = p->ast->seq->elements[p->ast->seq->used-1]->uint / 8; fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol); if (strictness && severity > strictness) { exit(1); } /* Just return the parse AST, drop the h_tell */ + if (p->ast->seq->used == 1) { + return (HParsedToken *) NULL; + } return (HParsedToken *) p->ast->seq->elements[0]; } @@ -1112,7 +1115,11 @@ init_parser(struct Env *aux) H_RULE(xrtyp, CHX(h_ch('n'), h_ch('f'))); H_ARULE(xroff, REP(digit, 10)); H_ARULE(xrgen, REP(digit, 5)); - H_ARULE(xrent, SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol))); + H_ARULE(xrent, SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)), + xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)), + xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"), + xreol, + VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)"))))); H_RULE(xrhead, SEQ(nat, IGN(sp), nat, nl)); H_RULE(xrsub, SEQ(xrhead, h_many(xrent))); H_ARULE(xrefs, SEQ(KW("xref"), nl, h_many(xrsub))); @@ -1148,8 +1155,8 @@ init_parser(struct Env *aux) H_RULE(xr_td, SEQ(xrefs, KW("trailer"), ws, dict)); H_RULE(hdr_junk, CHX(comment, - VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), - "Uncommented junk after header (severity=1)"))); + VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))), + "Uncommented junk after header (severity=1)"))); H_RULE(tail, SEQ(body, CHX(SEQ(h_optional(xr_td), startxr), VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)), OPT(nl), OPT(LIT("%%EOF")), OPT(nl)), -- GitLab