diff --git a/pdf.c b/pdf.c
index 70e7ac5a77b5619ceacc31b04c99ecb038c3f128..bd029edfd05893c5a32d3e9f090763aa9431e214 100644
--- a/pdf.c
+++ b/pdf.c
@@ -176,12 +176,15 @@ pdf_parser(void)
 	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
 	H_RULE(crlf,	h_right(cr, lf));	/* semantic value: \n */
 	H_RULE(eol,	CHX(crlf, cr, lf));
+	H_RULE(nl,	IGN(eol));
 	H_RULE(line,	h_many(NOT_IN("\r\n")));
 
 	/* character classes */
-#define WCHARS "\0\t\n\f\r "
-#define DCHARS "()<>[]{}/%"
+#define LWCHARS	"\0\t\f "
+#define WCHARS	LWCHARS "\n\r"
+#define DCHARS	"()<>[]{}/%"
 	H_RULE(wchar,	IN(WCHARS));			/* white-space */
+	H_RULE(lwchar,	IN(LWCHARS));			/* "line" whitespace */
 	//H_RULE(dchar,	IN(DCHARS));			/* delimiter */
 	//H_RULE(rchar,	NOT_IN(WCHARS DCHARS));		/* regular */
 	H_RULE(nchar,	NOT_IN(WCHARS DCHARS "#"));	/* name */
@@ -204,8 +207,9 @@ pdf_parser(void)
 	H_RULE(rparen,	h_ch(')'));
 
 	/* whitespace */
-	H_RULE(comment,	h_right(percent, line));
-	H_RULE(ws,	h_many(CHX(wchar, comment)));
+	H_RULE(comment,	SEQ(percent, line));
+	H_RULE(ws,	IGN(h_many(CHX(wchar, comment))));
+	H_RULE(lws,	IGN(h_many(lwchar)));
 
 #define TOK(X)	h_right(ws, X)
 #define KW(S)	TOK(LIT(S))
@@ -251,9 +255,8 @@ pdf_parser(void)
 	H_RULE(bsf,	mapch('f', 0x0c));	/* FF */
 	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
 	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
-	H_RULE(wrap,	IGN(eol));
-	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
-						/* NB: a lone '\' is ignored */
+	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, nl, epsilon)));
+		/* NB: lone backslashes and escaped newlines are ignored */
 	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
 	H_RULE(snest_,	SEQ(lparen, schars, rparen));
 	H_ARULE(litstr,	TOK(h_middle(lparen, schars, rparen)));
@@ -299,17 +302,22 @@ pdf_parser(void)
 	H_ARULE(xrgen,	REP(digit, 5));
 	H_RULE(xrent,	SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol)));
 	H_ARULE(xrnat,	h_many1(digit));
-	H_RULE(xrhead,	SEQ(xrnat, IGN(sp), xrnat, IGN(eol)));
+	H_RULE(xrhead,	SEQ(xrnat, IGN(sp), xrnat, nl));
 	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
 	H_ARULE(xrefs,	SEQ(KW("xref"), eol, h_many(xrsub)));
 		// XXX whitespace allowed between "xref" and eol?
+		// XXX cross-reference streams
 
 	/* trailer */
-	H_RULE(nl,	IGN(eol));
-	H_RULE(trailer,	SEQ(KW("trailer"), dict, nl,
-			    LIT("startxref"), nl, nat, nl,
-			    LIT("%%EOF"), nl));
-		// XXX be more lenient about whitespace in the trailer?
+	H_RULE(trailer,	SEQ(KW("trailer"), dict, lws, nl,
+			    KW("startxref"), lws, nl,
+			    lws, xrnat, lws, nl,
+			    LIT("%%EOF"), OPT(nl)));	// XXX require nl?
+		// XXX ws ok before startxref?
+		// XXX lws ok after startxref?
+		// XXX lws ok after xref offset?
+		// XXX lws ok around EOF marker?
+		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
 
 	H_RULE(tail,	SEQ(body, xrefs, trailer));
 	H_RULE(pdf,	SEQ(header, h_many1(tail), end));
@@ -346,6 +354,7 @@ kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 	if (v == NULL || v->token_type != TT_UINT)
 		goto fail;
 	sz = (size_t)v->uint;
+		// XXX support indirect objects for the Length value?!
 
 	return h_repeat_n__m(mm__, h_uint8__m(mm__), sz);
 fail: