From d30c92ce7dfbb8573afc552014502a9c49121e2d Mon Sep 17 00:00:00 2001 From: plv_cyber <plvines@paul-desktop.cybernet.lab> Date: Wed, 19 Feb 2020 10:45:10 -0500 Subject: [PATCH] Added more endstream violation conditions Added check for severity level in VIOLATION action to avoid segfaults Added relaxation/logging for extra << endobj --- pdf.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/pdf.c b/pdf.c index bb77f32..69c79bf 100644 --- a/pdf.c +++ b/pdf.c @@ -251,12 +251,20 @@ act_viol(const HParseResult *p, void *viol) { uint severity; uint offset; + HParseResult *severity_parse; viol = (uint8_t *) viol; H_RULE(viol_preamble, SEQ(h_many(NOT_IN("=")), LIT("="))); H_RULE(nat, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)), act_nat, NULL)); H_RULE(p_violsev, SEQ(IGN(viol_preamble), nat)); - severity = h_parse(p_violsev, viol, strlen((char *)viol))->ast->seq->elements[0]->uint; + severity_parse = h_parse(p_violsev, viol, strlen((char *)viol)); + if (!severity_parse) { + fprintf(stderr, "Severity for violaiton %s not assigned!\n", (char *)viol); + severity = 99999; + } + else { + severity = severity_parse->ast->seq->elements[0]->uint; + } offset = p->ast->seq->elements[1]->uint / 8; fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol); if (strictness && severity > strictness) { @@ -647,7 +655,11 @@ init_parser(struct Env *aux) /* streams */ H_RULE(stmbeg, SEQ(dict, ws, LIT("stream"), OPT(cr), lf)); - H_RULE(stmend, CHX(SEQ(OPT(eol), LIT("endstream")), + H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")), + VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"), + VIOL(SEQ(h_many1(wchar), LIT("endstream")), + "No newline before endstream (severity=7)"), + VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"), VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")), "Stream length 1-too-short (severity=4)"), VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"), @@ -676,6 +688,8 @@ init_parser(struct Env *aux) H_RULE(objdef, SEQ(ws, npair, wel, KW("obj"), ws, indobj, CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))), "More than 1 endobj token (severity=1)"), + VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(SEQ(dclose, h_many1(CHX(wchar, eol)), KW("endobj")))), + "More than 1 >> and endobj token (severity=2)"), SEQ(OPT(ws), OPT(lws), KW("endobj")), VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)")))); H_RULE(body, h_many(objdef)); @@ -711,7 +725,7 @@ init_parser(struct Env *aux) // the next 'tail' would be appended right after the 'F', // presumably because the previous version of the file // ended without a trailing newline. m) - // this is invalid per spec, because it creates a run-on + // this is invalid per spec, because it creates a run-on // comment, but we should probably accept-and-warn. // XXX should lws be allowed before EOF marker? // NB: lws before xref offset is allowed, cf. p.48 (example 4) @@ -737,7 +751,7 @@ init_parser(struct Env *aux) H_RULE(pdf, SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk)); /* debug parser to consume as much as possible */ - H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(startxr), final_eof_junk)); + H_RULE(pdfdbg, SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk)))); /* global parser variables */ -- GitLab