diff --git a/pdf.c b/pdf.c
index 1c3441ffa543da807ea194c949fcdf7cd1275b72..b453d442f838b0c5a1a195786d0f41e68e79606e 100644
--- a/pdf.c
+++ b/pdf.c
@@ -6,10 +6,28 @@
 /* convenience macros */
 #define SEQ(...)	h_sequence(__VA_ARGS__, NULL)
 #define CHX(...)	h_choice(__VA_ARGS__, NULL)
+#define REP(P,N)	h_repeat_n(P, N)
 #define IN(STR)		h_in(STR, sizeof(STR))
 #define NOT_IN(STR)	h_not_in(STR, sizeof(STR))
 
-#include <assert.h>
+
+/* a combinator to parse a given character but return a different value */
+
+HParsedToken *
+act_mapch(const HParseResult *p, void *u)
+{
+	return H_MAKE_UINT((uint8_t)u);
+}
+
+HParser *
+mapch(uint8_t c, uint8_t v)
+{
+	return h_action(h_ch(c), act_mapch, (void *)(uintptr_t)v);
+}
+
+/*
+ * semantic actions
+ */
 
 HParsedToken *
 act_digit(const HParseResult *p, void *u)
@@ -17,6 +35,7 @@ act_digit(const HParseResult *p, void *u)
 	return H_MAKE_UINT(H_CAST_UINT(p->ast) - '0');
 }
 #define act_pdigit act_digit
+#define act_odigit act_digit
 
 HParsedToken *
 act_hlower(const HParseResult *p, void *u)
@@ -101,6 +120,20 @@ act_nesc(const HParseResult *p, void *u)
 	return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2));
 }
 
+#define act_schars h_act_flatten
+#define act_litstr act_token
+
+HParsedToken *
+act_octal(const HParseResult *p, void *u)
+{
+	uint64_t x = 0;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+
+	for (size_t i = 0; i < seq->used; i++)
+		x = x*8 + H_CAST_UINT(seq->elements[i]);
+
+	return H_MAKE_UINT(x);
+}
 
 HParser *
 pdf_parser(void)
@@ -109,11 +142,11 @@ pdf_parser(void)
 	if(p) return p;
 
 	/* lines */
-	H_RULE(crlf,	h_literal("\r\n"));
-	H_RULE(cr,	h_ch('\r'));
-	H_RULE(lf,	h_ch('\n'));
+	H_RULE(cr,	mapch('\r', '\n'));	/* semantic value: \n */
+	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
+	H_RULE(crlf,	h_right(cr, lf));	/* semantic value: \n */
 	H_RULE(eol,	CHX(crlf, cr, lf));
-	H_RULE(line,	h_many(h_not_in("\r\n", 2)));
+	H_RULE(line,	h_many(NOT_IN("\r\n")));
 
 	/* character classes */
 #define WCHARS "\0\t\n\f\r "
@@ -122,11 +155,21 @@ pdf_parser(void)
 	//H_RULE(dchar,	IN(DCHARS));			/* delimiter */
 	//H_RULE(rchar,	NOT_IN(WCHARS DCHARS));		/* regular */
 	H_RULE(nchar,	NOT_IN(WCHARS DCHARS "#"));	/* name */
+	H_RULE(schar,	NOT_IN("()\n\\"));		/* string literal */
 	H_ARULE(digit,	h_ch_range('0', '9'));
 	H_ARULE(pdigit,	h_ch_range('1', '9'));
 	H_ARULE(hlower,	h_ch_range('a', 'f'));
 	H_ARULE(hupper,	h_ch_range('A', 'F'));
 	H_RULE(hdigit,	CHX(digit, hlower, hupper));
+	H_ARULE(odigit,	h_ch_range('0', '7'));
+
+	H_RULE(sign,	IN("+-"));
+	H_RULE(period,	h_ch('.'));
+	H_RULE(slash,	h_ch('/'));
+	H_RULE(hash,	h_ch('#'));
+	H_RULE(bslash,	h_ch('\\'));
+	H_RULE(lparen,	h_ch('('));
+	H_RULE(rparen,	h_ch(')'));
 
 	/* whitespace */
 	H_RULE(comment,	h_right(h_ch('%'), line));
@@ -134,8 +177,11 @@ pdf_parser(void)
 
 #define TOK(X)	h_right(ws, X)
 #define KW(S)	TOK(h_ignore(h_literal(S)))
+// XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
 
 	/* misc */
+	H_RULE(epsilon,	h_epsilon_p());
+	H_RULE(empty,	SEQ(epsilon));
 	H_ARULE(nat,	TOK(SEQ(digit,  h_many(digit))));
 	H_ARULE(pnat,	TOK(SEQ(pdigit, h_many(digit))));
 
@@ -148,25 +194,36 @@ pdf_parser(void)
 	H_RULE(boole,	CHX(KW("true"), KW("false")));
 
 	/* numbers */
-	H_RULE(sign,	h_in("+-", 2));
-	H_RULE(period,	h_ch('.'));
 	H_RULE(digits,	h_many1(digit));
 	H_ARULE(intg,	TOK(SEQ(h_optional(sign), digits)));
-	H_RULE(empty,	SEQ(h_epsilon_p()));
 	H_RULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
 			    SEQ(digits, period, empty),		/* 123. */
 			    SEQ(empty, period, digits)));	/* .123 */
 	H_ARULE(real,	TOK(SEQ(h_optional(sign), realnn)));
 
 	/* names */
-	H_RULE(slash,	h_ch('/'));
-	H_RULE(hash,	h_ch('#'));
 	H_ARULE(nesc,	SEQ(hash, hdigit, hdigit));
 	H_ARULE(nstr,	h_many(CHX(nchar, nesc)));	/* '/' is valid */
 	H_RULE(name,	TOK(h_right(slash, nstr)));
 
 	/* strings */
-	H_RULE(string,	h_nothing_p());	// XXX
+	H_RULE(snest,	h_indirect());
+	H_RULE(bsn,	mapch('n', 0x0a));	/* LF */
+	H_RULE(bsr,	mapch('r', 0x0d));	/* CR */
+	H_RULE(bst,	mapch('t', 0x09));	/* HT */
+	H_RULE(bsb,	mapch('b', 0x08));	/* BS (backspace) */
+	H_RULE(bsf,	mapch('f', 0x0c));	/* FF */
+	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
+	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
+	H_RULE(wrap,	h_ignore(eol));
+	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
+						/* NB: lone '\' is ignored */
+	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
+	H_RULE(snest_,	SEQ(lparen, schars, rparen));
+	H_ARULE(litstr,	TOK(h_middle(lparen, schars, rparen)));
+	H_RULE(hexstr,	h_middle(KW("<"), h_many(TOK(hdigit)), KW(">")));
+	H_RULE(string,	CHX(litstr, hexstr));
+	h_bind_indirect(snest, snest_);
 
 	/* arrays and dictionaries */
 	H_RULE(obj,	h_indirect());
@@ -191,11 +248,11 @@ pdf_parser(void)
 	H_RULE(objdef,	SEQ(pnat, nat, KW("obj"), obj, KW("endobj")));
 	H_RULE(body,	h_many(objdef));	// XXX object streams
 
-	H_RULE(xrefs,	h_epsilon_p());
+	H_RULE(xrefs,	epsilon);
 
-	H_RULE(trailer,	h_epsilon_p());
+	H_RULE(trailer,	epsilon);
 
-	H_RULE(end,	h_epsilon_p());	// XXX
+	H_RULE(end,	epsilon);	// XXX
 	H_RULE(tail,	SEQ(body, xrefs, trailer));
 	H_RULE(pdf,	SEQ(header, SEQ/*XXX h_many1*/(tail), end));