From 3a63a399696dccfa788da20621e258afb8bd0dc2 Mon Sep 17 00:00:00 2001 From: "plvines (corpora)" <paul.vines@baesystems.com> Date: Tue, 18 Feb 2020 19:48:28 +0000 Subject: [PATCH] Added relaxation/logging for stream length malformations (short-by-1, short-by-more, missing endstream) amended endobj parsing to allow whitespace before it --- pdf.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pdf.c b/pdf.c index ce648cb..bb77f32 100644 --- a/pdf.c +++ b/pdf.c @@ -647,7 +647,19 @@ init_parser(struct Env *aux) /* streams */ H_RULE(stmbeg, SEQ(dict, ws, LIT("stream"), OPT(cr), lf)); - H_RULE(stmend, SEQ(OPT(eol), LIT("endstream"))); + H_RULE(stmend, CHX(SEQ(OPT(eol), LIT("endstream")), + VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")), + "Stream length 1-too-short (severity=4)"), + VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"), + SEQ(npair, wel, KW("obj")), + KW("xref"), + LIT("endstream")))), LIT("endstream")), + "Stream length >1-too-short (severity=5)"), + VIOL(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"), + SEQ(npair, wel, KW("obj")), + KW("xref")))), + "Missing endstream token (severity=7)"))); + H_RULE(stream, h_left(h_bind(stmbeg, kstream, aux), stmend)); // XXX is whitespace allowed between the eol and "endstream"? @@ -662,9 +674,9 @@ init_parser(struct Env *aux) /* body */ H_RULE(indobj, CHX(stream, obj)); H_RULE(objdef, SEQ(ws, npair, wel, KW("obj"), ws, indobj, - CHX(VIOL(SEQ(KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))), + CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))), "More than 1 endobj token (severity=1)"), - KW("endobj"), + SEQ(OPT(ws), OPT(lws), KW("endobj")), VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)")))); H_RULE(body, h_many(objdef)); -- GitLab