diff --git a/pdf.c b/pdf.c
index 80e63dd0aff33a6c9bd1f408a3322427d91392e2..9cfae9b1897c4eef4b1f520979378cdd042085d5 100644
--- a/pdf.c
+++ b/pdf.c
@@ -9,6 +9,7 @@
 #define SEQ(...)	h_sequence(__VA_ARGS__, NULL)
 #define CHX(...)	h_choice(__VA_ARGS__, NULL)
 #define REP(P,N)	h_repeat_n(P, N)
+#define IGN(P)		h_ignore(P)
 #define IN(STR)		h_in(STR, sizeof(STR))
 #define NOT_IN(STR)	h_not_in(STR, sizeof(STR))
 
@@ -27,6 +28,7 @@ mapch(uint8_t c, uint8_t v)
 	return h_action(h_ch(c), act_mapch, (void *)(uintptr_t)v);
 }
 
+
 /*
  * semantic actions
  */
@@ -53,6 +55,21 @@ act_hupper(const HParseResult *p, void *u)
 
 HParsedToken *
 act_nat(const HParseResult *p, void *u)
+{
+	uint64_t x = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+
+	for (size_t i = 0; i < seq->used; i++)
+		x = x*10 + H_CAST_UINT(seq->elements[i]);
+
+	return H_MAKE_UINT(x);
+}
+#define act_xrnat act_nat
+#define act_xroff act_nat
+#define act_xrgen act_nat
+
+HParsedToken *
+act_pnat(const HParseResult *p, void *u)
 {
 	uint64_t x = H_FIELD_UINT(0);
 	HCountedArray *seq = H_FIELD_SEQ(1);
@@ -62,7 +79,6 @@ act_nat(const HParseResult *p, void *u)
 	
 	return H_MAKE_UINT(x);
 }
-#define act_pnat act_nat
 
 HParsedToken *
 act_intg(const HParseResult *p, void *u)
@@ -138,7 +154,14 @@ act_octal(const HParseResult *p, void *u)
 }
 
 #define act_stream act_token
+#define act_xrefs h_act_last
+
+
+/*
+ * input grammar
+ */
 
+/* continuation for h_bind() */
 HParser *kstream(HAllocator *, const HParsedToken *, void *);
 
 HParser *
@@ -168,8 +191,10 @@ pdf_parser(void)
 	H_ARULE(hupper,	h_ch_range('A', 'F'));
 	H_RULE(hdigit,	CHX(digit, hlower, hupper));
 	H_ARULE(odigit,	h_ch_range('0', '7'));
-
 	H_RULE(sign,	IN("+-"));
+
+	H_RULE(sp,	h_ch(' '));
+	H_RULE(percent,	h_ch('%'));
 	H_RULE(period,	h_ch('.'));
 	H_RULE(slash,	h_ch('/'));
 	H_RULE(hash,	h_ch('#'));
@@ -178,17 +203,18 @@ pdf_parser(void)
 	H_RULE(rparen,	h_ch(')'));
 
 	/* whitespace */
-	H_RULE(comment,	h_right(h_ch('%'), line));
+	H_RULE(comment,	h_right(percent, line));
 	H_RULE(ws,	h_many(CHX(wchar, comment)));
 
 #define TOK(X)	h_right(ws, X)
-#define KW(S)	TOK(h_ignore(h_literal(S)))
+#define KW(S)	TOK(IGN(h_literal(S)))
 // XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
+// XXX this allows, for instance, "endstreamendobj".
 
 	/* misc */
 	H_RULE(epsilon,	h_epsilon_p());
 	H_RULE(empty,	SEQ(epsilon));
-	H_ARULE(nat,	TOK(SEQ(digit,  h_many(digit))));
+	H_ARULE(nat,	TOK(h_many1(digit)));
 	H_ARULE(pnat,	TOK(SEQ(pdigit, h_many(digit))));
 
 #define OPT(X)	CHX(X, epsilon)
@@ -223,7 +249,7 @@ pdf_parser(void)
 	H_RULE(bsf,	mapch('f', 0x0c));	/* FF */
 	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
 	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
-	H_RULE(wrap,	h_ignore(eol));
+	H_RULE(wrap,	IGN(eol));
 	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
 						/* NB: a lone '\' is ignored */
 	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
@@ -254,15 +280,29 @@ pdf_parser(void)
 	 * file structure
 	 */
 
-	H_RULE(version,	SEQ(pdigit, h_ignore(h_ch('.')), pdigit));
+	/* header */
+	H_RULE(version,	SEQ(pdigit, IGN(period), pdigit));
 	H_RULE(header,	SEQ(h_literal("%PDF-"), version, eol));
 
+	/* body */
 	H_RULE(indobj,	CHX(stream, obj));
 	H_RULE(objdef,	SEQ(pnat, nat, KW("obj"), indobj, KW("endobj")));
 	H_RULE(body,	h_many(objdef));	// XXX object streams
 
-	H_RULE(xrefs,	epsilon);
-
+	/* cross-reference table */
+	H_RULE(xreol,	CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
+		// ^ XXX does the real world follow this rule?! cf. loop.pdf
+	H_RULE(xrtyp,	CHX(h_ch('n'), h_ch('f')));
+	H_ARULE(xroff,	REP(digit, 10));
+	H_ARULE(xrgen,	REP(digit, 5));
+	H_RULE(xrent,	SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol)));
+	H_ARULE(xrnat,	h_many1(digit));
+	H_RULE(xrhead,	SEQ(xrnat, IGN(sp), xrnat, IGN(eol)));
+	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
+	H_ARULE(xrefs,	SEQ(KW("xref"), eol, h_many(xrsub)));
+		// XXX whitespace allowed between "xref" and eol?
+
+	/* trailer */
 	H_RULE(trailer,	epsilon);
 
 	H_RULE(end,	epsilon);	// XXX
@@ -310,6 +350,10 @@ fail:
 }
 
 
+/*
+ * minimal main program
+ */
+
 #include <stdio.h>
 #include <err.h>
 #include <assert.h>