From d30c92ce7dfbb8573afc552014502a9c49121e2d Mon Sep 17 00:00:00 2001
From: plv_cyber <plvines@paul-desktop.cybernet.lab>
Date: Wed, 19 Feb 2020 10:45:10 -0500
Subject: [PATCH] Added more endstream violation conditions Added check for
 severity level in VIOLATION action to avoid segfaults Added
 relaxation/logging for extra << endobj

---
 pdf.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/pdf.c b/pdf.c
index bb77f32..69c79bf 100644
--- a/pdf.c
+++ b/pdf.c
@@ -251,12 +251,20 @@ act_viol(const HParseResult *p, void *viol)
 {
 	uint severity;
 	uint offset;
+	HParseResult *severity_parse;
 	viol = (uint8_t *) viol;
 	H_RULE(viol_preamble, SEQ(h_many(NOT_IN("=")), LIT("=")));
 	H_RULE(nat, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)),
 			     act_nat, NULL));
 	H_RULE(p_violsev, SEQ(IGN(viol_preamble), nat));
-	severity = h_parse(p_violsev, viol, strlen((char *)viol))->ast->seq->elements[0]->uint;
+	severity_parse = h_parse(p_violsev, viol, strlen((char *)viol));
+	if (!severity_parse) {
+	  fprintf(stderr, "Severity for violaiton %s not assigned!\n", (char *)viol);
+	  severity = 99999;
+	}
+	else {
+	  severity = severity_parse->ast->seq->elements[0]->uint;
+	}
 	offset = p->ast->seq->elements[1]->uint / 8;
 	fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol);
 	if (strictness && severity > strictness) {
@@ -647,7 +655,11 @@ init_parser(struct Env *aux)
 
 	/* streams */
 	H_RULE(stmbeg,	SEQ(dict, ws, LIT("stream"), OPT(cr), lf));
-	H_RULE(stmend, CHX(SEQ(OPT(eol), LIT("endstream")),
+	H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")),
+			   VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
+			   VIOL(SEQ(h_many1(wchar), LIT("endstream")),
+				"No newline before endstream (severity=7)"),
+			   VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
 			   VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
 				"Stream length 1-too-short (severity=4)"),
 			   VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
@@ -676,6 +688,8 @@ init_parser(struct Env *aux)
 	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj,
 			    CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))),
 				     "More than 1 endobj token (severity=1)"),
+				VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(SEQ(dclose, h_many1(CHX(wchar, eol)), KW("endobj")))),
+				     "More than 1 >> and endobj token (severity=2)"),
 				SEQ(OPT(ws), OPT(lws), KW("endobj")),
 				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
 	H_RULE(body,	h_many(objdef));
@@ -711,7 +725,7 @@ init_parser(struct Env *aux)
 		//     the next 'tail' would be appended right after the 'F',
 		//     presumably because the previous version of the file
 		//     ended without a trailing newline. m)
-		//     this is invalid per spec, because it creates a run-on 
+		//     this is invalid per spec, because it creates a run-on
 		//     comment, but we should probably accept-and-warn.
 		// XXX should lws be allowed before EOF marker?
 		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
@@ -737,7 +751,7 @@ init_parser(struct Env *aux)
 	H_RULE(pdf,	SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(startxr), final_eof_junk));
+	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
 
 	/* global parser variables */
-- 
GitLab