diff --git a/pdf.c b/pdf.c
index 3ec9e81fdf8c554019b3cc02fc3bb61531a0a34e..fe155c14f2fb0985436f531439af76707f0c498e 100644
--- a/pdf.c
+++ b/pdf.c
@@ -1,9 +1,11 @@
 /* beginnings of a PDF parser in hammer
  * pesco 2019
  * pompolic 2020
+ * Paul Vines 2020
  */
 
 #include <string.h>	/* strncmp(), memset() */
+#include <stdlib.h>	/* exit() */
 
 #include <hammer/hammer.h>
 #include <hammer/glue.h>
@@ -18,6 +20,13 @@
 #define IN(STR)		h_in((const uint8_t *)(STR), sizeof(STR) - 1)
 #define NOT_IN(STR)	h_not_in((const uint8_t *)(STR), sizeof(STR) - 1)
 
+#ifdef LOG
+#define VIOL(P,VIOL)	h_action(h_sequence(P, h_tell(), NULL), act_viol, VIOL)
+#else
+#define VIOL(P,VIOL)	P
+#endif
+
+
 
 /*
  * some helpers
@@ -27,6 +36,7 @@ HParser *p_fail;
 HParser *p_epsilon;
 HParser *p_return_0;
 HParser *p_return_1;
+uint8_t strictness = 0;
 
 /* a combinator to parse a given character but return a different value */
 
@@ -411,6 +421,31 @@ act_nat(const HParseResult *p, void *u)
 #define act_xroff act_nat
 #define act_xrgen act_nat
 
+HParser *p_violsev;
+HParsedToken *
+act_viol(const HParseResult *p, void *viol)
+{
+	uint severity;
+	uint offset;
+	HParseResult *severity_parse;
+	viol = (uint8_t *) viol;
+	severity_parse = h_parse(p_violsev, viol, strlen((char *)viol));
+	if (!severity_parse) {
+	        fprintf(stderr, "Severity for violaiton %s not assigned!\n", (char *)viol);
+	        severity = 99999;
+	}
+	else {
+	        severity = severity_parse->ast->seq->elements[0]->uint;
+	}
+	offset = p->ast->seq->elements[1]->uint / 8;
+	fprintf(stderr, "VIOLATION[%d]@%d (0x%x): %s\n", severity, offset, offset, (char *) viol);
+	if (strictness && severity > strictness) {
+	        exit(1);
+	}
+	/* Just return the parse AST, drop the h_tell */
+	return (HParsedToken *) p->ast->seq->elements[0];
+}
+
 bool
 validate_pnat(HParseResult *p, void *u)
 {
@@ -856,8 +891,9 @@ init_parser(struct Env *aux)
 	/* dictionaries */
 	H_RULE(dopen,	LIT("<<"));
 	H_RULE(dclose,	LIT(">>"));
-	H_RULE(k_v,	CHX(SEQ(name, wel,ws, obj),
-			    SEQ(name, CHX(name,dobj))));
+	H_RULE(k_v,	CHX(CHX(SEQ(name, wel,ws, obj),
+				SEQ(name, CHX(name,dobj))),
+			    VIOL(SEQ(name, wel,ws), "Key with no value (severity=2)")));
 	H_ARULE(dict_,	h_middle(dopen, MANY_WS(k_v), dclose));
 		// XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
 		// XXX validate: dict keys must be unique
@@ -880,8 +916,24 @@ init_parser(struct Env *aux)
 	h_bind_indirect(array, array_);
 
 	/* streams */
-	H_RULE(stmbeg,	SEQ(dict, ws, LIT("stream"), OPT(cr), lf));
-	H_RULE(stmend,	SEQ(OPT(eol), LIT("endstream")));
+	H_RULE(stmbeg,	SEQ(dict, OPT(ws), LIT("stream"), OPT(cr), lf));
+	H_RULE(stmend, CHX(SEQ(eol, LIT("endstream")),
+			   VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
+			   VIOL(SEQ(h_many1(wchar), LIT("endstream")),
+				"No newline before endstream (severity=7)"),
+			   VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
+			   VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
+				"Stream length 1-too-short (severity=4)"),
+			   VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
+									     SEQ(npair, wel, KW("obj")),
+									     KW("xref"),
+									     LIT("endstream")))), LIT("endstream")),
+				"Stream length >1-too-short (severity=5)"),
+			   VIOL(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
+									 SEQ(npair, wel, KW("obj")),
+									 KW("xref")))),
+				"Missing endstream token (severity=7)")));
+
 	H_RULE(stream,	h_left(h_bind(stmbeg, kstream, aux), stmend));
 		// XXX is whitespace allowed between the eol and "endstream"?
 
@@ -890,12 +942,18 @@ init_parser(struct Env *aux)
 	 */
 
 	/* header */
-	H_RULE(version,	SEQ(pdigit, IGN(period), pdigit));
+	H_RULE(version,	SEQ(pdigit, IGN(period), digit));
 	H_RULE(header,	h_middle(LIT("%PDF-"), version, nl));
 
 	/* body */
 	H_RULE(indobj,	CHX(stream, obj));
-	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj, KW("endobj")));
+	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj,
+			    CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))),
+				     "More than 1 endobj token (severity=1)"),
+				VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(SEQ(dclose, h_many1(CHX(wchar, eol)), KW("endobj")))),
+				     "More than 1 >> and endobj token (severity=2)"),
+				SEQ(OPT(ws), OPT(lws), KW("endobj")),
+				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
 	H_RULE(body,	h_many(objdef));
 
 	/* for object streams */
@@ -918,26 +976,44 @@ init_parser(struct Env *aux)
 		// XXX skip however much we consumed and check for "endstream endobj"?
 
 	/* trailer */
-	H_RULE(startxr,	SEQ(nl, KW("startxref"), nl,
+	H_RULE(startxr, SEQ(nl, KW("startxref"), nl,
+			    lws, nat, nl,
+			    LIT("%%EOF"), OPT(nl)));
+
+	/* used for the backwards search */
+	H_RULE(lasteof, SEQ(nl, KW("startxref"), nl,
 			    lws, nat, nl,
-			    LIT("%%EOF"), CHX(nl, end)));
 		// XXX the real world sometimes omits nl after %%EOF inside the file.
 		//     the next 'tail' would be appended right after the 'F',
 		//     presumably because the previous version of the file
 		//     ended without a trailing newline. m)
-		//     this is invalid per spec, because it creates a run-on 
+		//     this is invalid per spec, because it creates a run-on
 		//     comment, but we should probably accept-and-warn.
 		// XXX should lws be allowed before EOF marker?
 		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
+			    LIT("%%EOF"),
+			    CHX(VIOL(SEQ(nl, h_many1(nl), end),
+				     "(offset FROM END) Multiple newlines after final %%EOF (severity=4)"),
+				SEQ(h_many(nl), end),
+				VIOL(SEQ(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
+				     "(offset FROM END) Data after final  %%EOF (severity=7)"))));
+	
 	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
 
-	H_RULE(tail,	SEQ(body, h_optional(xr_td), startxr));
-		// XXX the real world likes to omit 'startxr' from all but the
-		// last trailer. we should accept-and-warn in that case.
-	H_RULE(pdf,	SEQ(header, h_many1(tail), end));
+	H_RULE(hdr_junk,    VIOL(h_many1(h_butnot(h_ch_range(0, 255), objdef)),
+				 "Uncommented junk after header (severity=1)"));
+	H_RULE(tail,	SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
+				      VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
+					       OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
+					   "Improper end of trailer - missing startxref and/or %%EOF (severity=5)"))));
+	H_RULE(final_eof_junk, CHX(VIOL(SEQ(h_many1(nl), end), "Multiple newlines after final %%EOF (severity=4)"),
+				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
+					"Data after final %%EOF (severity=7)"),
+				   end));
+	H_RULE(pdf,	SEQ(header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
 
 	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(header, h_many(tail), body, OPT(xr_td), OPT(startxr)));
+	H_RULE(pdfdbg,	SEQ(header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
 	/*
 	 * filters
@@ -979,7 +1055,7 @@ init_parser(struct Env *aux)
 	/* global parser variables */
 	p_pdf = pdf;
 	p_pdfdbg = pdfdbg;
-	p_startxref = startxr;
+	p_startxref = lasteof; //startxr;
 	p_xref = CHX(xr_td, xrstm);
 	p_objdef = objdef;
 	p_a85string = a85string;
@@ -990,6 +1066,12 @@ init_parser(struct Env *aux)
 	p_return_0 = h_action(epsilon, act_return_uint, (void *)0);
 	p_return_1 = h_action(epsilon, act_return_uint, (void *)1);
 
+	/* Parsing of severity messages */
+	H_RULE(viol_preamble, SEQ(h_many(NOT_IN("=")), LIT("=")));
+	H_RULE(severity_num, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)),
+			     act_nat, NULL));
+	H_RULE(violsev, SEQ(IGN(viol_preamble), severity_num));
+	p_violsev = violsev;
 #if 0
 	// XXX testing
 	int r;
@@ -1563,7 +1645,7 @@ kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 
 	//fprintf(stderr, "parsing stream object, length %zu.\n", sz);	// XXX debug
 
-	dict_p  = p_return__m(mm__, dict_t);
+	dict_p	= p_return__m(mm__, dict_t);
 	bytes_p = p_take__m(mm__, sz, aux);
 
 	spec = h_alloc(mm__, sizeof(struct streamspec));
@@ -1635,9 +1717,9 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict)
 	 * for field 3 (generation). in fact, these are the only defaults
 	 * defined by ISO 32000-1:2008 (PDF 1.7).
 	 *
-	 *   entry type  field no.  default value
-	 *               1 (type)   1
-	 *   1 ("n")     3 (gen.)   0
+	 *   entry type	 field no.  default value
+	 *		 1 (type)   1
+	 *   1 ("n")	 3 (gen.)   0
 	 */
 
 	/* Size (required) - total size of xref table */
@@ -1680,7 +1762,7 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict)
 	 *
 	 *   [t x y] with t,x,y > 0  full general form
 	 *   [0 x y] with x,y > 0    only type-1 ("in use") entries
-	 *   [0 x 0] with x > 0      only type-1 entries, only offsets
+	 *   [0 x 0] with x > 0	     only type-1 entries, only offsets
 	 *
 	 * however, though nonsensical, [t x 0] with t,x > 0 is not disallowed
 	 * by the spec; as long as all entries are of type 1, the xref data can
@@ -1907,10 +1989,15 @@ main(int argc, char *argv[])
 	int fd;
 
 	/* command line handling */
-	if (argc != 2) {
+	if (argc > 3) {
 		fprintf(stderr, "usage: %s file\n", argv[0]);
 		return 1;
 	}
+	if (argc == 3) {
+	  H_RULE(nat, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)),
+			       act_nat, NULL));
+	  strictness = h_parse(nat, (uint8_t *)argv[2], strlen(argv[2]))->ast->uint;
+	}
 	infile = argv[1];
 
 	/* mmap the input file */