diff --git a/Makefile b/Makefile
index 6154e1d8363343e12153e52a27e9826581afdc3a..19fed2b3a1e0eeec0e8e2a3757ba60d1c2cefbff 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ CFLAGS += -std=c99 -Wall -Werror -DLOG
 # lib@ -> ../hammer/build/opt/src
 HAMMER_INCLUDE = .
 HAMMER_LIB = ./lib
-CFLAGS += -I$(HAMMER_INCLUDE)
+CFLAGS += -I$(HAMMER_INCLUDE)  -g   # (-pg :: profile using gprof) (-g :: debug info)
 LDFLAGS += -L$(HAMMER_LIB)
 SOURCES = pdf.c lzw-lib.c
 
diff --git a/pdf.c b/pdf.c
index fc31efb80fffe83c59869af82fea6e8989cab8a8..50d2e7d48a8ee09b187506fbc9e395c9fa28b25d 100644
--- a/pdf.c
+++ b/pdf.c
@@ -2,12 +2,14 @@
  * pesco 2019,2020
  * pompolic 2020
  * Paul Vines 2020
+ * Kragen Sitaker 2020, 2021
  * Sumit Ray 2021
+ *
  */
 
 #include <string.h>	/* strncmp(), memset(), memcpy() */
 #include <stdlib.h>	/* exit() */
-
+#include <strings.h> /* bcopy */
 #include <hammer/hammer.h>
 #include <hammer/glue.h>
 
@@ -16,6 +18,7 @@
 #define CHX(...)	h_choice(__VA_ARGS__, NULL)
 #define OPT(X)		h_ignore(h_optional(X))
 #define REP(P,N)	h_repeat_n(P, N)
+
 #define IGN(P)		h_ignore(P)
 #define LIT(S)		h_literal(S)
 #define IN(STR)		h_in((const uint8_t *)(STR), sizeof(STR) - 1)
@@ -110,6 +113,191 @@ validate_notnull(HParseResult *p, void *u)
 }
 
 
+
+// Forward declaration of Token structures
+typedef struct { size_t nr, gen; } Ref;
+
+typedef HCountedArray Dict;
+
+
+
+// Catalog Tree
+typedef struct RsrcDict_S {
+	const HParsedToken  *resources;             // font references dictionary (resources == NULL) ==> inherit
+	const HParsedToken   *fonts;                // dictonary of fonts used in this page
+	size_t                numFonts;
+	const HParsedToken   *xobj;                 // xobj used in this page (?? is this <=1??, can page use multiple xobjects??)
+//	Dict                **seenFonts;
+//	size_t                numSeenFonts;
+//	const HParsedToken  **seenCmaps;            // memoized cmaps (should this be a bytestream?
+//	size_t                numCmapsSeen;
+} RsrcDict_T;
+
+struct PtNode_S;
+
+typedef struct PtNode_S {
+	enum {PG_TREE, PG_NODE, XO_NODE} type;
+	const HParsedToken  *parent;                // Type = Page tree -- reference
+	RsrcDict_T          *pgRsrc;                // resource structure
+	const HParsedToken  *me;                    // Reference for me
+	size_t               offset;                //
+	union {
+		struct {
+			const Dict          *dict;          // page node dictionary
+			const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
+		} pn;
+		struct {
+			struct PtNode_S   *kids;            // page table
+			size_t             count;           // number of kids
+			size_t             leaves;          // number of pages in tree
+		} pt;
+		struct {
+			const Dict          *dict;          // page node dictionary
+			const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
+		} xn;
+	};
+
+} PtNode_T;
+
+struct XoNode_S;
+typedef struct XoNode_S {
+	char 				*name;
+	PtNode_T			*node;
+	struct XoNode_S     *next;
+} XoNode_T;
+
+
+typedef struct Catalog_S {
+	const  HParsedToken  *catalog;   // reference
+	const  HParsedToken  *pRoot;     // reference
+	PtNode_T              pgTree;    // page tree
+	size_t                pgCount;   // page tree node count
+	XoNode_T              xObjs;     // list of XObjects
+	XoNode_T             *xoHead;
+	XoNode_T             *xoTail;
+	size_t				  xoCount;   // number of xobjects
+} Catalog_T;
+
+
+// Forward declaration of text extraction related structures
+struct textnode;
+struct textstr;
+struct TextEntry_S;
+
+// ***********************************************************
+/*
+ * Text data structures
+ */
+
+typedef struct {
+	struct PtNode_S    *page;
+	struct TextEntry_S *font;
+} TextState_T;
+
+
+struct textnode {
+	struct textstr  *tstr;
+	struct textnode *next;
+};
+
+
+struct fontref {
+	const uint8_t *fontname;
+	uint32_t       namelen;
+	double         fontsize;
+	char          *fn;   /* null terminated string */
+};
+struct textpos {
+	double tx;
+	double ty;
+};
+struct textmat { double cell[6]; };
+struct textstr {
+	uint8_t            *text;
+	uint32_t            nchars;
+	const HParsedToken *tobj;
+};
+
+
+struct textwfmt {         /* text with formatting specifications */
+	double        aw;    /* word spacing */
+	double        ac;    /* character spacing */
+	struct textstr tstr;  /* the string */
+};
+struct tarrayelt {
+	union {
+		double          adj;
+		struct textstr  tstr;
+	};
+	bool                isStr;
+};
+struct textarray {
+	struct tarrayelt  *elts;
+	uint32_t           nelts;
+	struct textstr     flattened;
+};
+
+
+
+/* operator::
+ * TS -- Text state    : Table 105
+ * TP -- Text position : Table 108
+ * TW -- Test showing  : Table 109
+ * */
+typedef struct TextEntry_S {
+	enum {TS_Tc, TS_Tw, TS_Tz, TS_TL, TS_Tf, TS_Tr, TS_Ts,
+	      TP_Td, TP_TD, TP_Tm, TP_Tstar,
+		  TW_Tj, TW_Tq, TW_Tqq, TW_TJ} type;
+	TextState_T         ts;     // text state associated with this string (TBD: other state attributes)
+	union {
+		double              value; 			/* many just have a value */
+		uint8_t             mode;           /* text mode */
+		struct fontref      fref;           /* font name reference */
+		struct textpos      pos;		    /* text position */
+		struct textmat      fm;             /* font matrix */
+		struct textstr      tstr;           /* the string */
+		struct textwfmt     twfmt;          /* text with formatting -- qq_op */
+		struct textarray    tarray;         /* text contained in an array object */
+	};
+	const HParsedToken *obj;
+} TextEntry;  // text object entries
+
+
+
+#if 0
+// Haven't used this type yet - maybe OBE
+typedef struct {
+	struct textmat     fm;                  /* font matrix associated with this text object */
+	TextEntry        **ops;                 /* operators associated w/string */
+	uint8_t           *txt;                 /* the string associated with this object */
+} TextString;
+#endif
+
+// Initial use -- object streams
+typedef struct {
+	Ref                 oid;
+	const HParsedToken  *obj;
+} Objref_T;
+typedef struct {
+	size_t             numObjs;
+	Objref_T           *tok;
+	HArena             *arena;
+} Objstm;
+
+typedef struct {
+	char *name;
+	char *type;
+	char *basefont;
+	char *encoding;
+	const HParsedToken *descriptor;
+	const HParsedToken *toUnicode;
+	const HParsedToken *descendantFonts;
+} Fontinfo_T;
+
+
+// ***********************************************************
+
+
 /*
  * auxiliary global data structure needed by the parser
  */
@@ -120,13 +308,26 @@ struct Env {
 
 	const HParsedToken **xrefs;	/* all xref sections of the file */
 	size_t nxrefs;
+
+	struct textnode     *txthead;  /* parsed text objects from the file */
+	struct textnode     *txttail;  /* parsed text objects from the file */
+	size_t               ntextobjs;
+
+	Catalog_T            catalog;  /* Catalog object and document structure */
+	TextState_T          tstate;   /* current text state */
+
 };
 
+Fontinfo_T *lookup_font(TextState_T *state, struct Env *aux);
+
+// ***********************************************************
+
+
 
 /*
  * custom token types
  */
-HTokenType TT_XREntry, TT_Ref, TT_Dict, TT_HParseResult;
+HTokenType TT_XREntry, TT_Ref, TT_Dict, TT_HParseResult, TT_TextEntry, TT_Objstm;
 
 typedef struct {
 	enum {XR_FREE, XR_INUSE, XR_OBJSTM} type;
@@ -138,9 +339,6 @@ typedef struct {
 	const HParsedToken *obj;
 } XREntry;
 
-typedef struct { size_t nr, gen; } Ref;
-
-typedef HCountedArray Dict;
 
 /* look up a value in a dictionary */
 const HParsedToken *
@@ -230,7 +428,7 @@ pp_dict(FILE *stream, const HParsedToken *tok, int indent, int delta)
 	if (tok->seq->used > 2)
 		fprintf(stream, "\n%*s}", indent, "");
 	else
-		fprintf(stream, " }");
+		fprintf(stream, " }\n");
 }
 
 /*
@@ -580,19 +778,15 @@ act_a85string(const HParseResult *p, void *u)
 	size_t chunk_number;
 	size_t required_bytes;
 	size_t out_pos = 0;
-//	HCountedArray *seq    = H_CAST_SEQ(p->ast);
 	HCountedArray *chunks     = H_FIELD_SEQ(0);
 	HBytes        last_chunk = H_FIELD_BYTES(1);
 
 	/* Number of 4-byte chunks, minus the potential last partial group and EOD */
-//	assert(full->token_type == TT_SEQUENCE);
-//	assert(partial->->token_type == TT_BYTES);
 	chunk_number = chunks->used;
 
 	/* Special-case: last chunk before EOD may be 4, 3, 2 or 1 bytes
 	 * The latter two happening if the group was parsed from a partial
 	 * group consisting less than 5 chars */
-//	HBytes *last_chunk = &partial->elements[1]->bytes;
 	required_bytes = (chunk_number * 4 + last_chunk.len);
 
 	result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * required_bytes);
@@ -608,6 +802,7 @@ act_a85string(const HParseResult *p, void *u)
 				result_bytes[out_pos+2], result_bytes[out_pos+3], *((unsigned int *)(chunks->elements[i]->bytes.token))); // XXX DEBUG
 		out_pos += 4;
 	}
+
 	if (last_chunk.len) {
 		memcpy(result_bytes + out_pos, last_chunk.token, last_chunk.len);
 		out_pos += last_chunk.len;
@@ -806,6 +1001,13 @@ act_xrent(const HParseResult *p, void *u)
 	return H_MAKE(XREntry, xr);
 }
 
+
+HParsedToken *
+act_xr_td(const HParseResult *p, void *u)
+{
+	return (HParsedToken*)p->ast;
+}
+
 HParsedToken *
 act_xrstment(const HParseResult *p, void *u)
 {
@@ -958,469 +1160,2552 @@ act_rldstring(const HParseResult *p, void *u)
 }
 
 
+// XXX review
+/*
+ * ********************************************************************
+ * Catalog parsing
+ * ********************************************************************
+ */
+HParsedToken *
+act_cstream(const HParseResult *p, void *u)
+{
+//	HCountedArray *contents = H_FIELD_SEQ(0);
+//
+//	fprintf(stdout, "act_contentstream:: stream length = %ld\n", contents->used);
+	return (HParsedToken *)p->ast;
+}
+
+
+
+bool
+validate_pgcontents(HParseResult *p, void *u)
+{
+	return false;
+}
+
+HParsedToken *
+act_pgcontents(const HParseResult *p, void *u)
+{
+	return (HParsedToken *)p->ast;
+}
+
 
+HParsedToken *
+act_page(const HParseResult *p, void *u)
+{
+	return (HParsedToken *)p->ast;
+}
 
+HParsedToken *
+act_dictobj(const HParseResult *p, void *u)
+{
+	return (HParsedToken *)p->ast;
+}
 
 
 /*
- * input grammar
+ * ********************************************************************
+ * Start Text parsing
+ * ********************************************************************
  */
+void pp_fontstate(FILE *stream, const TextState_T *state)
+{
+	assert(state);
+	fprintf(stream, "\nFont State: Page = ");
+	if ( (state->page->type == PG_NODE) || (state->page->type == XO_NODE) )
+		pp_ref(stream, state->page->me, 0, 0);
+	if (state->font)  // not all operators need or set this
+		fprintf(stream, ", Font = %s\n", state->font->fref.fn);
+	else
+		fprintf(stream, ", Font not yet specified\n");
 
-HParser *p_pdf;
-HParser *p_pdfdbg;
-HParser *p_startxref;
-HParser *p_xref;
-HParser *p_objdef;
-HParser *p_a85string;
-HParser *p_ahexstream;
-HParser *p_rldstring;
-HParser *p_ws;
-HParser *p_wel;
-HParser *p_elemr;
-HParser *p_npair;
-
-/* continuations for h_bind() */
-HParser *kstream(HAllocator *, const HParsedToken *, void *);
-HParser *kxstream(HAllocator *, const HParsedToken *, void *);
+}
+void pp_fontinfo(FILE *stream, const TextState_T *state, const Fontinfo_T *fi)
+{
+	assert(state && fi);
+	pp_fontstate(stream, state);
+	fprintf(stream, "Font Info: ");
+	if (fi->name)     fprintf(stream, "Font= %s", fi->name);
+	if (fi->type)     fprintf(stream, ", Type= %s", fi->type);
+	if (fi->basefont) fprintf(stream, ", Basefont= %s", fi->basefont);
+	if (fi->encoding)     fprintf(stream, ", Encoding= %s", fi->encoding);
+	if (fi->descriptor) pp_dict(stream, fi->descriptor, 0, 0);
+	if (fi->toUnicode) pp_dict(stream, fi->toUnicode, 0, 0);
+	if (fi->descendantFonts) pp_dict(stream, fi->descendantFonts, 0, 0);
+	fprintf(stream, "\n");
+}
 
+/*
+ * Pretty printer for text components of the ast
+ */
 void
-init_runlengthdecode_parser(struct Env *aux)
+pp_textentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
 {
-	H_RULE(rldeod, h_ch(0x80));
-	H_ARULE(longlength, h_ch_range(0x81, 0xFF));
-	H_ARULE(shortlength, h_ch_range(0x0, 0x7F));
-
-	H_RULE(shortdata, h_uint8());
-	H_RULE(longdata, h_uint8());
+	TextEntry *txte = H_CAST(TextEntry, tok);
 
-	H_RULE(shortrun, h_length_value(shortlength, shortdata));
-	H_ARULE(longrun, SEQ(longlength, longdata));
+	switch (txte->type) {
+	/*
+	 * Always pretty print the text show operators
+	 *
+	 * If TEXT_VERBOSE is set, pretty-print the other operators
+	 */
+#define TEXT_VERBOSE
+#ifdef TEXT_VERBOSE
+	case TS_Tf:
+		fprintf(stream, "Tf_op: fn=%.*s, fontsize=%3.3f\n",
+				txte->fref.namelen, txte->fref.fontname, txte->fref.fontsize);
+		break;
+	case TP_Td:
+		fprintf(stream, "Td_op: text position ::tx=%3.3f:ty=%3.3f\n",
+				txte->pos.tx, txte->pos.ty);
+		break;
+#endif
+	case TW_Tj:
+	case TW_Tq:
+	case TW_Tqq:
+		fprintf(stream, "len=%u, ", txte->tstr.nchars);
+		fwrite((const void *)txte->tstr.text, (int) txte->tstr.nchars, 1, stream);
+		break;
+	case TW_TJ:
+		fprintf(stream, "len=%u, ", txte->tarray.flattened.nchars);
+		fwrite((const void *)txte->tarray.flattened.text,
+				(int) txte->tarray.flattened.nchars, 1, stream);
+		break;
 
-	H_ARULE(rldstring, SEQ(h_many(CHX(shortrun, longrun)), IGN(rldeod)));
 
-	p_rldstring = rldstring;
+	default:
+		;
+	}
 }
 
+
 void
-init_parser(struct Env *aux)
+pp_objstm(FILE *stream, const HParsedToken *tok, int indent, int delta)
 {
-	TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
-	TT_XREntry =	h_allocate_token_new("XREntry", NULL, pp_xrentry);
-	TT_Ref =	h_allocate_token_new("Ref", NULL, pp_ref);
-	TT_Dict =	h_allocate_token_new("Dict", NULL, pp_dict);
+	Objstm *entry = H_CAST(Objstm, tok);
 
-	/* lines */
-	H_RULE(cr,	p_mapch('\r', '\n'));	/* semantic value: \n */
-	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
-	H_RULE(crlf,	h_right(cr, lf));	/* semantic value: \n */
-	H_RULE(eol,	CHX(crlf, cr, lf));
-	H_RULE(end,	h_end_p());
+//	pp_parseresult(stream, (const HParsedToken *)entry->res, 0, 0);
+	fprintf(stream, "pp_objstm:: Num Objects = %lu\n", entry->numObjs);
 
-	/* character classes */
-#define LWCHARS	"\0\t\f "
-#define WCHARS	LWCHARS "\n\r"
-#define DCHARS	"()<>[]{}/%"
-	H_RULE(wchar,	IN(WCHARS));			/* white-space */
-	H_RULE(lwchar,	IN(LWCHARS));			/* "line" whitespace */
-	//H_RULE(dchar,	IN(DCHARS));			/* delimiter */
-	H_RULE(rchar,	NOT_IN(WCHARS DCHARS));		/* regular */
-	H_RULE(nchar,	NOT_IN(WCHARS DCHARS "#"));	/* name */
-	H_RULE(schar,	NOT_IN("()\n\r\\"));		/* string literal */
-	H_ARULE(digit,	h_ch_range('0', '9'));
-	H_ARULE(pdigit,	h_ch_range('1', '9'));
-	H_ARULE(hlower,	h_ch_range('a', 'f'));
-	H_ARULE(hupper,	h_ch_range('A', 'F'));
-	H_RULE(hdigit,	CHX(digit, hlower, hupper));
-	H_ARULE(odigit,	h_ch_range('0', '7'));
+	for (int i=0; i<entry->numObjs; i++) {
+		fprintf(stream, "oid: <nr=%lu, gen=%lu>, ", entry->tok[i].oid.nr, entry->tok[i].oid.gen);
+		fprintf(stream, "Type = %u, ", entry->tok[i].obj->token_type);
+		if (entry->tok[i].obj->token_type == TT_Dict) {
+			pp_dict(stream, entry->tok[i].obj, 0, 0);
+		}
+		fprintf(stream, "\n");
+	}
+}
 
-	H_RULE(sp,	h_ch(' '));
-	H_RULE(percent,	h_ch('%'));
-	H_RULE(period,	h_ch('.'));
-	H_RULE(slash,	h_ch('/'));
-	H_RULE(hash,	h_ch('#'));
-	H_RULE(bslash,	h_ch('\\'));
-	H_RULE(lparen,	h_ch('('));
-	H_RULE(rparen,	h_ch(')'));
-	H_RULE(langle,	h_ch('<'));
-	H_RULE(rangle,	h_ch('>'));
-	H_RULE(lbrack,	h_ch('['));
-	H_RULE(rbrack,	h_ch(']'));
-	H_RULE(plus,	h_ch('+'));
-	H_RULE(minus,	h_ch('-'));
+/*
+ * semantic actions
+ */
 
-	/* whitespace */
-	H_RULE(comment,	SEQ(percent, h_many(NOT_IN("\r\n")), CHX(cr,lf,end)));
-		/* ^ NB: must consume cr/lf for ws to be LL(k) */
-	H_RULE(wel,	IGN(CHX(wchar, comment)));
-	H_RULE(ws,	IGN(h_many(wel)));
-	H_RULE(lws,	IGN(h_many(IGN(lwchar))));
 
-	/* tokenization */
-#define MANY_WS(X)	h_many(CHX(wel, X))
-#define TOK(X)	h_middle(ws, X, h_not(rchar))
-#define KW(S)	IGN(TOK(LIT(S)))
 
-	/* misc */
-	H_RULE(nl,	IGN(h_right(lws, eol)));
-	H_RULE(epsilon,	h_epsilon_p());
-	H_RULE(empty,	SEQ(epsilon));
-	H_RULE(digits,	h_many1(digit));
-	H_VARULE(nat,	digits);
-	H_VRULE(pnat,	nat);
-	H_RULE(npair,	SEQ(pnat, wel,ws, nat));
+/*
+ *  Simplify the code by casting the choice of integer number and real number to double
+ */
+bool
+validate_tnumb(HParseResult *p, void *u)
+{
+	assert((p->ast->token_type == TT_SINT) || (p->ast->token_type == TT_DOUBLE));
 
-	/*
-	 * objects
-	 */
-	
-	H_ARULE(ref,	SEQ(npair, wel,ws, LIT("R")));
-	H_RULE(null,	LIT("null"));
-	H_RULE(boole,	CHX(LIT("true"), LIT("false")));
+	return true;
+}
 
-	/* numbers */
-	H_ARULE(sign,	CHX(minus, IGN(plus)));
-	H_VRULE(intnn,	nat);
-	H_ARULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
-			    SEQ(digits, period, empty),		/* 123. */
-			    SEQ(empty, period, digits)));	/* .123 */
-		// XXX ^ we _could_ move the "123." case into intnn...
-	H_RULE(numbnn,	CHX(realnn, intnn));
-	H_RULE(snumb,	SEQ(sign, numbnn));
-	H_VARULE(numb,	CHX(snumb, numbnn));
 
-	/* names */
-	H_ARULE(nesc,	SEQ(hash, hdigit, hdigit));
-	H_ARULE(nstr,	h_many(CHX(nchar, nesc)));	/* '/' is valid */
-	H_RULE(name,	h_right(slash, nstr));
+HParsedToken *
+act_tnumb(const HParseResult *p, void *u)
+{
 
-	/* strings */
-	H_RULE(snest,	h_indirect());
-	H_RULE(bsn,	p_mapch('n', 0x0a));	/* LF */
-	H_RULE(bsr,	p_mapch('r', 0x0d));	/* CR */
-	H_RULE(bst,	p_mapch('t', 0x09));	/* HT */
-	H_RULE(bsb,	p_mapch('b', 0x08));	/* BS (backspace) */
-	H_RULE(bsf,	p_mapch('f', 0x0c));	/* FF */
-	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
-	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
-	H_RULE(wrap,	IGN(eol));
-	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
-		/* NB: lone backslashes and escaped newlines are ignored */
-	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
-	H_RULE(snest_,	SEQ(lparen, schars, rparen));
-	H_RULE(litstr,	h_middle(lparen, schars, rparen));
-	H_RULE(hexstr,	h_middle(langle, MANY_WS(hdigit), rangle));
-	H_ARULE(string,	CHX(litstr, hexstr));
-	h_bind_indirect(snest, snest_);
+	double value;
 
-	H_RULE(array,	h_indirect());
-	H_RULE(dict,	h_indirect());
+	if (p->ast->token_type == TT_SINT)  value = (double)p->ast->sint;
+	else                                value =         p->ast->dbl;
 
-	/* classify objects by whether they start/end with a delimiter: */
-	H_RULE(robj,	CHX(ref, null, boole, numb));		/* rchars */
-	H_RULE(dobj,	CHX(string, array, dict));		/* dchars */
-	H_RULE(obj,	CHX(robj, name, dobj));
+	return H_MAKE_DOUBLE(value);
+}
 
-	/* dictionaries */
-	H_RULE(dopen,	LIT("<<"));
-	H_RULE(dclose,	LIT(">>"));
-	H_RULE(k_v,	CHX(CHX(SEQ(name, wel,ws, obj),
-				SEQ(name, CHX(name,dobj))),
-			    VIOL(SEQ(name, wel,ws), "Key with no value (severity=2)")));
-	H_ARULE(dict_,	h_middle(dopen, MANY_WS(k_v), dclose));
-		// XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
-		// XXX validate: dict keys must be unique
-	h_bind_indirect(dict, dict_);
 
-	/* arrays */
-	H_RULE(elemd,	h_indirect());	/* elements following a delimiter */
-	H_RULE(elemr,	h_indirect());	/* elements following a regular char */
-	H_ARULE(array_,	h_middle(lbrack, elemd, rbrack));
-	H_RULE(elemd_,	CHX(SEQ(ws, dobj, elemd),
-			    SEQ(ws, name, elemr),
-			    SEQ(ws, robj, elemr),
-			    ws));
-	H_RULE(elemr_,	CHX(SEQ(ws, dobj, elemd),
-			    SEQ(ws, name, elemr),
-			    SEQ(wel,ws, robj, elemr),
-			    ws));
-	h_bind_indirect(elemd, elemd_);
-	h_bind_indirect(elemr, elemr_);
-	h_bind_indirect(array, array_);
 
-	/* streams */
-	H_RULE(stmbeg,	SEQ(dict, OPT(ws), LIT("stream"), OPT(cr),
-			    CHX(lf, VIOL(epsilon, "No linefeed after 'stream' (severity=7)"))));
-	H_RULE(stmend,	CHX(SEQ(eol, LIT("endstream")),
-			   VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
-			   VIOL(SEQ(h_many(wchar), LIT("endstream")),
-				"No newline before 'endstream' (severity=7)"),
-			   VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
-			   VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
-				"Stream length 1-too-short (severity=4)"),
-			   VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
-									     SEQ(npair, wel, KW("obj")),
-									     KW("xref"),
-									     LIT("endstream")))), LIT("endstream")),
-				"Stream length >1-too-short (severity=5)"),
-			   VIOL(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
-									 SEQ(npair, wel, KW("obj")),
-									 KW("xref")))),
-				"Missing endstream token (severity=7)")));
 
-	H_RULE(stream,	h_left(h_bind(stmbeg, kstream, aux), stmend));
-		// XXX is whitespace allowed between the eol and "endstream"?
-		// peter wyatt says no. (2020-03-25)
+/*
+ * Text state operators - Table 105
+ *   TS_Tc, TS_Tw, TS_Tz, TS_TL, TS_Tf, TS_Tr, TS_Ts
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ */
+HParsedToken *
+act_Tc_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
+	const HParsedToken *tval = H_INDEX_TOKEN(p->ast, 0);
 
-	/*
-	 * file structure
-	 */
 
-	/* header */
-	H_RULE(version,	SEQ(pdigit, IGN(period), digit));
-	H_RULE(header,	h_middle(LIT("%PDF-"), version, nl));
+	txte->type  = TS_Tc;
+	txte->obj   = NULL;
 
-	/* body */
-	H_RULE(indobj,	CHX(stream, obj));
-	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj,
-			    CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))),
-				     "More than 1 endobj token (severity=1)"),
-				VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(SEQ(dclose, h_many1(CHX(wchar, eol)), KW("endobj")))),
-				     "More than 1 >> and endobj token (severity=2)"),
-				SEQ(OPT(ws), OPT(lws), KW("endobj")),
-				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
-	H_RULE(body,	h_many(objdef));
+	assert(tval->token_type == TT_DOUBLE);
+	txte->value   = tval->dbl;
 
-	/* cross-reference section */
-	H_RULE(xreol,	CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
-		// ^ XXX does the real world follow this rule?! cf. loop.pdf
-	H_RULE(xrtyp,	CHX(h_ch('n'), h_ch('f')));
-	H_ARULE(xroff,	REP(digit, 10));
-	H_ARULE(xrgen,	REP(digit, 5));
-	H_ARULE(xrent,	SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)),
-	    xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)),
-	    xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"),
-	    xreol,
-	    VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)")))));
-	H_RULE(xrhead,	SEQ(nat, IGN(sp), nat, nl));
-	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
-	H_ARULE(xrefs,	SEQ(KW("xref"), nl, h_many(xrsub)));
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
 
-	/* cross-reference streams */
-	H_RULE(xstream,	h_bind(stmbeg, kxstream, aux));
-	H_AVRULE(xrstm,	SEQ(ws, npair, wel, KW("obj"), ws, xstream));
-		// XXX skip however much we consumed and check for "endstream endobj"?
+	fprintf(stdout, "act_Tc_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
 
 
-	/* trailer */
-	H_RULE(startxr, SEQ(nl, KW("startxref"), nl,
-			    lws, nat, nl,
-			    LIT("%%EOF"), OPT(nl)));
+/*
+ * Tw operator: word spacing specification
+ * H_ARULE(Tw_op, SEQ(tnumb, ws, LIT("Tw")));  // 9.3.3 - wordSpace
+ */
+HParsedToken *
+act_Tw_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
 
-	/* used for the backwards search */
-	H_RULE(lasteof, SEQ(nl, KW("startxref"), nl,
-			    lws, nat, nl,
-		// XXX the real world sometimes omits nl after %%EOF inside the file.
-		//     the next 'tail' would be appended right after the 'F',
-		//     presumably because the previous version of the file
-		//     ended without a trailing newline. m)
-		//     this is invalid per spec, because it creates a run-on
-		//     comment, but we should probably accept-and-warn.
-		// XXX should lws be allowed before EOF marker?
-		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
-			    LIT("%%EOF"),
-			    CHX(VIOL(SEQ(nl, h_many1(nl), end),
-				     "(offset FROM END) Multiple newlines after final %%EOF (severity=4)"),
-				SEQ(h_many(nl), end),
-				VIOL(SEQ(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
-				     "(offset FROM END) Data after final  %%EOF (severity=7)"))));
 
-	H_RULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
+	txte->type  = TS_Tw;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
 
-	H_RULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
-	                        "Junk bytes before %PDF header (severity=1)"));
-	H_RULE(hdr_junk,    CHX(comment,
-	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
-	    "Uncommented junk after header (severity=1)")));
-	H_RULE(tail,	SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
-				      VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
-					       OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
-					   "Improper end of trailer - missing startxref and/or %%EOF (severity=5)"))));
-	H_RULE(final_eof_junk, CHX(VIOL(SEQ(h_many1(nl), end), "Multiple newlines after final %%EOF (severity=4)"),
-				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
-					"Data after final %%EOF (severity=7)"),
-				   end));
-	H_RULE(pdf,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
 
-	/* debug parser to consume as much as possible */
-	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+	fprintf(stdout, "act_Tw_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
 
-	/*
-	 * filters
-	 */
 
-	/* Whitespace can occur between any digit and has to be ignored, */
-	H_RULE(aws,	IGN(h_many(wchar)));  // all white space, include CR & LF, but not comments
-	#define MANY_AWS(X) h_many(CHX(aws, X))
 
+/*
+ * Tz operator: horizintal scaling specification
+ * H_ARULE(Tz_op, SEQ(tnumb, ws, LIT("Tz")));  // 9.3.4 - horizontal scaling
+ */
+HParsedToken *
+act_Tz_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
 
-	/* Ascii85Decode */
-	H_RULE(a85eod,	SEQ(h_ch('~'), aws, h_ch('>')));
-	H_ARULE(a85zero,	h_ch('z'));
-	H_ARULE(a85digit,	h_ch_range('!', 'u'));
 
-	/* Line whitespace can occur between any digit and has to be ignored, */
-	#define MANY_LWS(X) h_many(CHX(lws, X))
-	/* This encoding of zero is not allowed */
-//	H_RULE(a85fiveexcl, h_repeat_n(SEQ(h_ch('!'), aws), 5)); // seeing this is a violation
-	// Folded the test for a85fiveexcl into the validation component
-	H_VARULE(a85fivedigits,	h_repeat_n(SEQ(a85digit, aws), 5));
-	/*
-	 * Suggestion for violations. VIOL() will report error conditions, but it also relaxes validation.
-	 * A hacky way to avoid that would be to add back the validation manually to the H_RULE backing the VIOL() itself.
-	 *
-	 */
+	txte->type  = TS_Tz;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
 
-	 /* H_RULE(a85fivedigits_viol, h_repeat_n(SEQ(h_ch('!'), aws), 5));
-	  * H_RULE(a85fivedigits_report_error, CHX(a85fivedigits, VIOL(a85fivedigits_viol, "Zero ASCII85Encoded as '!!!!!'"))); // Relaxes validation, will parse
-	  *
-	  * HParser *a85fivedigits_viol_alt = h_attr_bool(h_action(h_repeat_n(SEQ(h_ch('!'), aws), 5), act_a85fivedigits, NULL), validate_a85fivedigits, NULL);
-	  * H_RULE(a85fivedigits_report_error, CHX(a85fivedigits, VIOL(a85fivedigits_viol_alt, "Zero ASCII85Encoded as '!!!!!'"))); // Validation will run and stop the parse
-	  */
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
 
-//	H_VARULE(a85fivedigits,	SEQ(h_and(h_not(a85fiveexcl)), h_repeat_n(SEQ(a85digit, aws), 5)));
-	// TODO:: will need to pull out error conditions -- a85fiveexcl or 'z' as one of the digits
-	H_ARULE(a85group,	CHX(a85zero, a85fivedigits));
+	fprintf(stdout, "act_Tz_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
 
-	H_VARULE(a85partial2group,	h_repeat_n(SEQ(a85digit, aws), 2));
-	H_VARULE(a85partial3group,	h_repeat_n(SEQ(a85digit, aws), 3));
-	H_VARULE(a85partial4group,	h_repeat_n(SEQ(a85digit, aws), 4));
-	H_RULE(a85partialgroup,	CHX(a85partial4group, a85partial3group, a85partial2group));
 
-	H_ARULE(a85string,	SEQ(h_many(a85group), h_optional(a85partialgroup), IGN(a85eod)));
 
-	//p_test = a85group;
+/*
+ * TL operator: leading (line spacing) specification
+ * H_ARULE(TL_op, SEQ(tnumb, ws, LIT("TL")));  // 9.3.5 - leading
+ */
+HParsedToken *
+act_TL_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
 
 
-	/*
-	 * Not sure whether comments can be embedded within content streams
-	 * If not, use the rule aws rather than ws
-	 */
-	/*
-	 * It seems somewhat unclear. ASCII85Decode definitely can't have
-	 * comments, because % can be part of a valid ASCII85Encoded character.
-	 * However, it seems that comments are generally allowed:
-	 * "Any occurrence of the PERCENT SIGN (25h) outside a string or
-	 * inside a content stream (see 7.8.2, "Content streams") introduces
-	 * a comment." ISO32000:2-2017 7.2.4
-	 */
-	 // XXX Ask Peter Wyatt
+	txte->type  = TS_TL;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
 
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
 
-	/* AsciiHexDecode */
-	H_RULE(ahexeod,	h_ch('>'));
-	H_ARULE(hdigitpair, SEQ(aws, hdigit, aws, hdigit));
-	H_ARULE(ahextruncated, SEQ(aws, hdigit));
+	fprintf(stdout, "act_TL_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
 
-	H_RULE(ahs_end, SEQ(h_optional(ahextruncated), aws, ahexeod));
-	H_ARULE(ahexstream, SEQ(h_many(hdigitpair), ahs_end));
 
-	init_runlengthdecode_parser(aux);
+/*
+ *  Font name and size specification
+ *  H_ARULE(Tf_op, SEQ(name, ws, nat, ws, KW("Tf"), ws));  // font and size
+ *
+ *  TODO: Verify that the name is specified in the resource dictionary
+ */
+HParsedToken *
+act_Tf_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
+	const HParsedToken *fn_token = H_FIELD_TOKEN(0);
+
+	txte->type  = TS_Tf;
+	txte->obj   = NULL;
+
+	txte->fref.fontname = fn_token->bytes.token;
+	txte->fref.namelen  = fn_token->bytes.len;
+	txte->fref.fn       = h_arena_malloc(p->arena, sizeof (char) * (fn_token->bytes.len + 1));
+	memcpy(txte->fref.fn, fn_token->bytes.token, fn_token->bytes.len);
+	txte->fref.fn[fn_token->bytes.len] = '\0';
+	HTokenType tokenType = p->ast->seq->elements[1]->token_type;
+	if (tokenType == TT_UINT)
+		txte->fref.fontsize = (double) H_FIELD_UINT(1);
+	else if (tokenType == TT_DOUBLE)
+		txte->fref.fontsize = (double) H_FIELD_DOUBLE(1);
+	else
+		fprintf(stderr, "act_Tf_op: Unexpected token type for fontsize - token_type=%u\n",
+					tokenType);
 
 
-	/* global parser variables */
-	p_pdf = pdf;
-	p_pdfdbg = pdfdbg;
-	p_startxref = lasteof; //startxr;
-	p_xref = CHX(xr_td, xrstm);
-	p_objdef = objdef;
-	p_a85string = a85string;
-	p_ahexstream = ahexstream;
-	p_ws = ws;
-	p_wel = wel;
-	p_elemr = h_action(elemr, h_act_flatten, NULL);
-	p_npair = npair;
+	// save this foont as the current state to be used by subsequent text
+	const HParsedToken * restok = H_MAKE(TextEntry, txte);
+	aux->tstate.font            = txte;
 
-	p_fail = h_nothing_p();
-	p_epsilon = epsilon;
-	p_return_0 = h_action(epsilon, act_return_uint, (void *)0);
-	p_return_1 = h_action(epsilon, act_return_uint, (void *)1);
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font; // recursive :-) defn
 
-	/* Parsing of severity messages */
-	H_RULE(viol_preamble, SEQ(h_many(NOT_IN("=")), LIT("=")));
-	H_RULE(severity_num, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)),
-			     act_nat, NULL));
-	H_RULE(violsev, SEQ(IGN(viol_preamble), severity_num));
-	p_violsev = violsev;
+	// DEBUG
+	fprintf(stdout, "act_Tf_op: fn=%.*s, fontsize=%3.3f, fontstate=%p, page=",
+			txte->fref.namelen, txte->fref.fontname, txte->fref.fontsize, (void*)txte);
+	if (aux->tstate.page->type==PG_NODE)
+		pp_ref(stdout, aux->tstate.page->me, 0, 0);
+	fprintf(stdout, "\n");
 
-#if 0
-	// XXX testing
-	int r;
-	void errx(int, const char *, ...);
-	HParser *p = obj;
-	if ((r = h_compile(p, PB_LALR, NULL)) != 0)
-		errx(1, "h_compile() failed: %d", r);
-	errx(0, "OK");
-#endif
+	return ((HParsedToken *)restok);
 }
 
 
+
 /*
- * lookup and resolution of indirect references
+ * Tr operator: rendering mode
+ * H_VRULE(tmode, nat);                        // True if <= 7
+ * H_ARULE(Tr_op, SEQ(tmode, ws, LIT("Tr")));  // 9.3.6 - rendering mode
+ *
  *
- * ** Parameter 'gen' is unused
  */
+#define TEXTMODE_MAX 7
 
-XREntry *
-lookup_xref(struct Env *aux, size_t nr, size_t gen)
+bool
+validate_tmode(HParseResult *p, void *u)
 {
-	HParsedToken *ss;	/* xref subsection */
-	size_t base, n;
+	return H_CAST_UINT(p->ast) <= TEXTMODE_MAX;
+}
 
-	/* for each cross-reference section (i.e. update) */
-	for (size_t i = 0; i < aux->nxrefs; i++) {
-		HCountedArray *subs = H_INDEX_SEQ(aux->xrefs[i], 0);
+HParsedToken *
+act_Tr_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
 
-		/* for each cross-reference subsection */
-		for (size_t j = 0; j < subs->used; j++) {
-			ss = subs->elements[j];
-			base = H_INDEX_UINT(ss, 0, 0);
-			n = H_INDEX_UINT(ss, 0, 1);
 
-			if (nr >= base && nr - base < n)
-				return H_INDEX(XREntry, ss, 1, nr - base);
-		}
-	}
+	txte->type  = TS_Tr;
+	txte->obj   = NULL;
+	txte->mode  = H_FIELD_UINT(0);
 
-	return NULL;
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	fprintf(stdout, "act_Tr_op:: %d\n", txte->mode);
+	return H_MAKE(TextEntry, txte);
 }
 
-const HParsedToken *
-parse_obj(struct Env *aux, size_t nr, size_t gen, size_t offset)
+
+/*
+ * Ts operator: rise specification
+ * H_ARULE(Ts_op, SEQ(tnumb, ws, LIT("Ts")));  // rise
+ */
+HParsedToken *
+act_Ts_op(const HParseResult *p, void *u)
 {
-	HParseResult *res;
-	size_t def_nr, def_gen;
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
 
-	if (offset >= aux->sz) {
-		fprintf(stderr, "%s: position %zu (%#zx) for object %zu %zu is "
-		    "out of bounds\n", aux->infile, offset, offset, nr, gen);
-		return NULL;
-	}
 
-	res = h_parse(p_objdef, aux->input + offset, aux->sz - offset);
-	if (res == NULL) {
+	txte->type  = TS_Ts;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
+
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	fprintf(stdout, "act_Ts_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * 9.4.2 - Text positioning operators - Table 108
+ *   TP_Td, TP_TD, TP_Tm, TP_Tstar
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ * TP_Td: String position - Translation specification
+ * H_ARULE(Td_op, SEQ(tnumb, ws, tnumb, ws, LIT("Td"), ws));   // move to next line with offset
+ */
+HParsedToken *
+act_Td_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
+
+
+	txte->type   = TP_Td;
+	txte->obj    = NULL;
+	txte->pos.tx = H_FIELD_DOUBLE(0);
+	txte->pos.ty = H_FIELD_DOUBLE(1);
+
+	// associate the text with the current state
+	// NOTE: This operator does not require a font
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	fprintf(stdout, "act_Td_op: text position ::tx=%.3f:ty=%.3f\n",
+			txte->pos.tx, txte->pos.ty);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+
+/*
+ * TP_TD: Offset to next line and set the leading parameter state
+ * H_ARULE(TD_op, SEQ(tnumb, ws, tnumb, ws, LIT("TD")));               // move to next line with offset and set state
+ */
+HParsedToken *
+act_TD_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
+
+
+	txte->type   = TP_TD;
+	txte->obj    = NULL;
+	txte->pos.tx = H_FIELD_DOUBLE(0);
+	txte->pos.ty = H_FIELD_DOUBLE(1);
+
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	fprintf(stdout, "act_TD_op: text position ::tx=%3.3f:ty=%3.3f\n", txte->pos.tx, txte->pos.ty);
+	pp_fontstate(stdout, &txte->ts);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TP_Tm: Text matrix specification
+ * H_ARULE(Tm_op, SEQ(REP(SEQ(tnumb, ws), 6), LIT("Tm"), ws));    // set text matrix
+ */
+HParsedToken *
+act_Tm_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TP_Tm;
+	txte->obj   = NULL;
+
+	assert((p->ast->token_type == TT_SEQUENCE) &&
+			(p->ast->seq->elements[0]->token_type == TT_SEQUENCE) &&
+			(p->ast->seq->elements[0]->seq->used == 6));
+	for (int i=0; i<6; i++)
+
+		txte->fm.cell[i] = p->ast->seq->elements[0]->seq->elements[i]->seq->elements[0]->dbl;
+
+	fprintf(stdout, "act_Tm_op: text matrix ::\n");
+	for (int i=0; i<3; i++)
+		fprintf(stdout, "%3.3f : %3.3f\n", txte->fm.cell[i*2], txte->fm.cell[i*2+1]);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TP_Tstar: Move to the next line
+ * H_ARULE(Tstar_op, SEQ(LIT("T*"), ws));                     // move to next line
+ */
+HParsedToken *
+act_Tstar_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	struct Env         *aux    = (struct Env*)u;
+
+	txte->type  = TP_Tstar;
+	txte->obj   = NULL;
+	txte->value = 0;
+
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	fprintf(stdout, "act_Tstar_op: position pointer\n");
+	pp_fontstate(stdout, &txte->ts);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * 9.4.3 - Text showing operators - Table 109
+ *   TW_Tj, TW_Tq, TW_Tqq, TW_TJ
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ * TW_Tj: Show string
+ * H_ARULE(Tj_op, SEQ(string, ws, LIT("Tj"), ws));          // show text string
+ */
+HParsedToken *
+act_Tj_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte   = H_ALLOC(TextEntry);
+	const HParsedToken *tstr   = H_INDEX_TOKEN(p->ast, 0);
+	struct Env         *aux    = (struct Env*)u;
+
+
+	txte->type  = TW_Tj;
+	txte->obj   = NULL;
+
+	txte->tstr.text   = (uint8_t *)tstr->bytes.token;
+	txte->tstr.nchars = tstr->bytes.len;
+
+	// associate the text with the current state
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+
+	fprintf(stdout, "\nact_Tj_op:: nchars=%u, txt=%.*s\n", txte->tstr.nchars,
+			txte->tstr.nchars, txte->tstr.text);
+	pp_fontstate(stdout, &txte->ts);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TW_Tq: Offset to next line then show string
+ * H_ARULE(TsingleQ_op, SEQ(string, ws, LIT(quote), ws));   // Move to next line and show text
+ */
+HParsedToken *
+act_TsingleQ_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tstr = H_INDEX_TOKEN(p->ast, 0);
+	struct Env          *aux = (struct Env*)u;
+
+
+	txte->type  = TW_Tq;
+	txte->obj   = NULL;
+
+	txte->tstr.text   = (uint8_t *)tstr->bytes.token;
+	txte->tstr.nchars = tstr->bytes.len;
+
+	// associate the text wth the current font
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+
+	fprintf(stdout, "\nact_TsingleQ_op:: nchars=%u, txt=%.*s\n", txte->tstr.nchars,
+			txte->tstr.nchars, txte->tstr.text);
+	pp_fontstate(stdout, &txte->ts);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TW_Tqq: Offset to next line then show string, apply formatting specifications
+ * H_ARULE(TdoubleQ_op, SEQ(tnumb, ws, tnumb, ws, string, ws, LIT(dquote), ws)); // Move to next line and show formatted text
+ *
+ */
+HParsedToken *
+act_TdoubleQ_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *aw = H_INDEX_TOKEN(p->ast, 0);
+	const HParsedToken *ac = H_INDEX_TOKEN(p->ast, 1);
+	const HParsedToken *tstr = H_INDEX_TOKEN(p->ast, 2);
+	struct Env         *aux  = (struct Env*)u;
+
+
+	txte->type  = TW_Tqq;
+	txte->obj   = NULL;
+
+	txte->twfmt.aw          = aw->dbl;
+	txte->twfmt.ac          = ac->dbl;
+	txte->twfmt.tstr.text   = (uint8_t *)tstr->bytes.token;
+	txte->twfmt.tstr.nchars = tstr->bytes.len;
+
+	// associate the text wth the current font
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+
+
+	fprintf(stdout, "act_TdoubleQ_op:: aw=%3.3f, ac=%3.3f\n", txte->twfmt.aw, txte->twfmt.ac);
+	fprintf(stdout, "\nact_TdoubleQ_op:: nchars=%u, txt=%.*s\n", txte->tstr.nchars,
+			txte->tstr.nchars, txte->tstr.text);
+	pp_fontstate(stdout, &txte->ts);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+/*
+ * TW_TJ: Show array of strings, with potentially re-positioning specifications for each string
+ * H_RULE(TArr_elem, SEQ(OPT(SEQ(nanumbs)), string, ws))
+ * H_ARULE(TJ_op, SEQ(h_many(TArr_elem), LIT("TJ"), ws));  // show one or more text strings
+ *
+ * TODO:: Implement the array parser
+ */
+HParsedToken *
+act_TJ_op(const HParseResult *p, void *u)
+{
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tarr = H_INDEX_TOKEN(p->ast, 0);
+	struct Env         *aux  = (struct Env*)u;
+
+
+	txte->type  = TW_TJ;
+	txte->obj   = NULL;
+
+
+	// associate the text wth the current font
+	txte->ts.page = aux->tstate.page;
+	txte->ts.font = aux->tstate.font;
+
+	/*
+	 * Parse each element of the array
+	 * Build up the pointers to each of the string pieces
+	 */
+	txte->tarray.nelts = tarr->seq->used;
+	txte->tarray.elts  = h_arena_malloc(p->arena, sizeof(struct tarrayelt) * txte->tarray.nelts);
+	txte->tarray.flattened.nchars = 0;
+
+	for (int i=0; i<txte->tarray.nelts; i++) {
+		const HParsedToken *elt = tarr->seq->elements[i];
+		assert( (elt->token_type == TT_SEQUENCE) && (elt->seq->used == 1) );
+		switch (elt->seq->elements[0]->token_type) {
+		case TT_DOUBLE:
+			txte->tarray.elts[i].adj         = elt->seq->elements[0]->dbl;
+			txte->tarray.elts[i].isStr       = false;
+			break;
+		case TT_BYTES:
+			txte->tarray.elts[i].tstr.text   = (uint8_t *)elt->seq->elements[0]->bytes.token;
+			txte->tarray.elts[i].tstr.nchars = elt->seq->elements[0]->bytes.len;
+			txte->tarray.elts[i].isStr       = true;
+			txte->tarray.flattened.nchars   += txte->tarray.elts[i].tstr.nchars;
+
+
+			// Debug
+//			fprintf(stdout, "act_TJ_op:Cumulative=%d/0x%x bytes,   Additional:%d bytes\n",
+//					txte->tarray.flattened.nchars, txte->tarray.flattened.nchars, txte->tarray.elts[i].tstr.nchars);
+			fprintf(stdout, "act_TJ_op::: Using font= %p - page=", txte->ts.font);
+			pp_ref(stdout, txte->ts.page->me, 0, 0);
+			fprintf(stdout, "\nact_TJ_op:: nchars=%u, txt=%.*s\n", txte->tarray.elts[i].tstr.nchars,
+					txte->tarray.elts[i].tstr.nchars, txte->tarray.elts[i].tstr.text);
+			break;
+		default:
+			fprintf(stderr, "act_TJ_op:: Unexpected element type :: %d\n", elt->seq->elements[0]->token_type);
+			fflush(stderr);
+			assert(false);
+		}
+	}
+
+	/* hold on to a flattened copy of the string */
+	txte->tarray.flattened.text = h_arena_malloc(p->arena, sizeof(char) * txte->tarray.flattened.nchars);
+	int j = 0; // current index
+	for (int i=0; i<txte->tarray.nelts; i++) {
+		if (txte->tarray.elts[i].isStr) {
+			// Debug
+//			fprintf(stdout, "act_TJ_op:Start=%p-%d/0x%xbytes,   Writing to:%p-%dbytes\n",
+//					(void *)txte->tarray.flattened.text, txte->tarray.flattened.nchars, txte->tarray.flattened.nchars,
+//					(void *)&txte->tarray.flattened.text[j], txte->tarray.elts[i].tstr.nchars);
+//			fprintf(stdout, "act_TJ_op: %.*s\n", txte->tarray.elts[i].tstr.nchars, txte->tarray.elts[i].tstr.text);
+			memcpy(&txte->tarray.flattened.text[j], txte->tarray.elts[i].tstr.text, txte->tarray.elts[i].tstr.nchars);
+			j += txte->tarray.elts[i].tstr.nchars;
+		}
+	}
+
+	fprintf(stdout, "\nact_TJ_op:: nchars=%u, txt=%.*s\n", txte->tarray.flattened.nchars,
+			txte->tarray.flattened.nchars, txte->tarray.flattened.text);
+	pp_fontstate(stdout, &txte->ts);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+
+/*
+ * Parse the text object delimited by "BT" and "ET"
+ */
+HParsedToken *
+act_txtobj(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_txtobj:: Here\n");
+
+	assert(p->ast->token_type == TT_SEQUENCE);
+
+	TextEntry          *txtobj = H_ALLOC(TextEntry);
+	TextEntry          *txte   = NULL;
+	const HParsedToken *opstream = H_INDEX_TOKEN(p->ast, 1);
+	const HParsedToken *tt_text=NULL;
+	uint8_t            *tstr=NULL;
+	int                 textlen=0;
+
+
+	fprintf(stdout, "act_txtobj:: numtokens = %lu\n", opstream->seq->used);
+
+	// Walk through the tokens to determine how much space to allocate
+	// Count the number of characters in the stream
+	// Concatenate the text into the allocated space
+	for (int i =0; i < opstream->seq->used; i++) {
+
+		txte = H_CAST(TextEntry, opstream->seq->elements[i]);
+		// Process the text showing operators
+		switch (txte->type) {
+		case TP_Td:
+		case TP_TD:
+		case TP_Tstar:
+			textlen += 1;
+			break;
+
+		case TW_TJ:
+			textlen += txte->tarray.flattened.nchars;
+			break;
+
+		case TW_Tq:
+		case TW_Tqq:
+			textlen += 1;
+		case TW_Tj:
+			textlen += txte->tstr.nchars;
+			break;
+			break;
+		default:
+			; // ignore
+		}
+	}
+	tstr = h_arena_malloc(p->arena, sizeof(uint8_t) * textlen);
+	int idx=0;
+	TextState_T      *ts;
+	// Now concatenate the pieces
+	for (int i =0; i < opstream->seq->used; i++) {
+		TextEntry *txte = H_CAST(TextEntry, opstream->seq->elements[i]);
+		ts              = &txte->ts;
+
+		// Process the text operators
+		switch (txte->type) {
+		case TP_Td:
+		case TP_TD:
+		case TP_Tstar:
+			tstr[idx] = '\n';
+			idx += 1;
+			break;
+
+		case TW_TJ:
+			memcpy(&tstr[idx], txte->tarray.flattened.text, txte->tarray.flattened.nchars);
+			idx += txte->tarray.flattened.nchars;
+			fprintf(stdout, "act_txtobj - array:: len=%u, str=", txte->tarray.flattened.nchars);
+			fwrite((const void *)txte->tarray.flattened.text, (int) txte->tarray.flattened.nchars, 1, stdout);
+			pp_fontstate(stdout, ts);
+			break;
+
+		case TW_Tq:
+		case TW_Tqq:
+			tstr[idx] = '\n';
+			idx += 1;
+
+		case TW_Tj:
+			memcpy(&tstr[idx], txte->tstr.text, txte->tstr.nchars);
+			idx += txte->tstr.nchars;
+			fprintf(stdout, "act_txtobj:: len=%u, str=", txte->tstr.nchars);
+			fwrite((const void *)txte->tstr.text, (int) txte->tstr.nchars, 1, stdout);
+			pp_fontstate(stdout, ts);
+			break;
+		default:
+			; // ignore
+		}
+	}
+	assert(idx == textlen);
+
+
+	txtobj->type         = TW_Tj;
+	txtobj->obj          = opstream;
+	txtobj->tstr.text    = tstr;
+	txtobj->tstr.nchars  = textlen;
+	txtobj->tstr.tobj    = opstream;
+	if (textlen) {
+		txtobj->ts.page = ts->page;
+		txtobj->ts.font = ts->font;
+	}
+	else {
+		txtobj->ts.page = NULL;
+		txtobj->ts.font = NULL;
+	}
+	// pretty print the information
+	tt_text = H_MAKE(TextEntry, txtobj);
+
+	// DEBUG
+	if (textlen) {
+		fprintf(stdout, "act_txtobj:: ");
+		pp_textentry(stdout, tt_text, 0, 0);
+		if (&txtobj->ts.page)
+			pp_fontstate(stdout, &txtobj->ts);
+	}
+	return (HParsedToken *)tt_text;
+}
+
+
+
+
+/*
+ * This continuation takes the text stream and saves it in the environment for further
+ * processing, e.g. writing it out to a file with the same name as the pdf input filename
+ * but woth a .psectxt suffix.
+ * It does not consume the string and returns the parser as the output.
+ *
+ * x = (txtobj ...)
+ */
+HParser *
+ktxtstream(HAllocator *mm__, const HParsedToken *x, void *env)
+{
+
+	struct Env      *aux = env;
+	struct textnode *txtnd;
+
+	assert (x->token_type == TT_SEQUENCE);
+	int n_tobjs = x->seq->used;
+	fprintf(stdout, "\n\nktxtstream: Num txtobjs = %d\n", n_tobjs);
+
+	for (int n=0; n<n_tobjs; n++) {
+
+		assert(x->seq->elements[n]->token_type == TT_TextEntry);
+		TextEntry *tste = H_CAST(TextEntry, x->seq->elements[n]);
+		assert(tste->type == TW_Tj);
+		fprintf(stdout, "ktxtstream: Value = %.*s\n", tste->tstr.nchars, tste->tstr.text);
+
+
+		// store the string in the environment
+		txtnd = h_alloc(mm__, sizeof(struct textnode));
+		txtnd->tstr = &tste->tstr;
+		txtnd->next = NULL;
+		if (aux->txthead == NULL)
+			aux->txthead = txtnd;
+		if (aux->txttail == NULL)
+			aux->txttail = txtnd;
+		else {
+			aux->txttail->next = txtnd;
+			aux->txttail       = txtnd;
+		}
+		aux->ntextobjs += 1;
+
+	}
+
+	return p_return__m(mm__, x);
+}
+
+
+
+/*
+ * This utility extracts the text stream from the global environment
+ * writes it out to a file with the same name as the pdf input filename
+ * but with a .psectxt suffix.
+ */
+void
+text_extract(struct Env *aux)
+{
+    fprintf(stdout, "text_extract:: num text objects = %ld\n", aux->ntextobjs);
+	fprintf(stdout, "text_extract:: %s\n", aux->infile);
+
+	int infnlen = strlen(aux->infile);
+	int sfxlen = strlen(".psectxt");
+	int namelen = infnlen + sfxlen;
+
+	char *outfn = (char *) malloc(sizeof(char) * namelen+1);
+	if (outfn == NULL) {
+		fprintf(stderr, "text_extract:: malloc() failed");
+		return;
+	}
+	memcpy(outfn, aux->infile, infnlen);
+	memcpy(&outfn[infnlen], ".psectxt", sfxlen);
+	outfn[namelen] = '\0'; // null terminate the string
+
+	// open the file for writing
+	FILE *stream;
+	if (!(stream = fopen(outfn, "w"))) {
+		fprintf(stderr,
+				"text_extract:: Failed to open file '%s' for writing\n", outfn);
+		return;
+	}
+
+	// DEBUG
+	char *outfn2 = (char *) malloc(sizeof(char) * namelen+1);
+	if (outfn2 == NULL) {
+		fprintf(stderr, "text_extract:: malloc() failed");
+		return;
+	}
+	sfxlen = strlen(".strtxt");
+	namelen = infnlen + sfxlen;
+
+	memcpy(outfn2, aux->infile, infnlen);
+	memcpy(&outfn2[infnlen], ".strtxt", sfxlen);
+	outfn2[namelen] = '\0'; // null terminate the string
+	// open the file for writing
+	FILE *stream2;
+	if (!(stream2 = fopen(outfn2, "w"))) {
+		fprintf(stderr,
+				"text_extract:: Failed to open file '%s' for writing\n", outfn);
+		return;
+	}
+	// DEBUG
+
+	struct textnode *curr = aux->txthead;
+	Fontinfo_T *ft; // font token
+	for (int i = 0; i < aux->ntextobjs; i++) {
+
+		// DEBUG
+//		ft = lookup_font(&curr->tstr->ts, aux);
+//		pp_fontinfo(stream2, &curr->tstr->ts, ft);
+		fwrite((const void *)curr->tstr->text, (int) curr->tstr->nchars, 1, stream2);
+		// DEBUG
+
+		const HParsedToken *tt_text = curr->tstr->tobj;
+		for (int j = 0; j < tt_text->seq->used; j++) {
+			struct textstr *tstr = NULL;
+			TextEntry *txte = H_CAST(TextEntry, tt_text->seq->elements[j]);
+			switch (txte->type) {
+			case TW_Tj:
+			case TW_Tq:
+			case TW_Tqq:
+				tstr = &txte->tstr;
+				break;
+			case TW_TJ:
+				tstr = &txte->tarray.flattened;
+				break;
+			default:
+				fprintf(stderr, "text_extract:: Text token type '%u' ignored\n",
+						txte->type);
+			}
+			if (tstr) {
+				ft = lookup_font(&txte->ts, aux);
+				if (ft) {
+					pp_fontinfo(stdout, &txte->ts, ft);
+					pp_fontinfo(stream, &txte->ts, ft);
+				}
+				else {
+					char *estr = "\nMissing Font Info!!\n";
+					fwrite((const void *)estr, strlen(estr), 1, stdout);
+					fwrite((const void *)estr, strlen(estr), 1, stream);
+				}
+				fwrite((const void *)tstr->text, (int) tstr->nchars, 1, stdout);
+				fwrite((const void *)tstr->text, (int) tstr->nchars, 1, stream);
+			}
+		}
+		curr = curr->next;
+	}
+	fclose(stream);
+	free(outfn);
+	return;
+}
+
+
+
+
+
+
+
+
+
+// *********************************************************************
+// DEBUG
+
+
+// Utility -- Handles simplistic approach to UTF-16
+char convert2char(unsigned int b1)
+{
+	char val;
+
+	if (b1 == 0)
+	{
+		val = '?';
+	}
+	else if ( (b1 < 20) || ( b1 > 127 ) )
+	{
+		fprintf(stdout, " 0X%02X ", b1);
+		val = '?';
+	}
+	else
+	{
+		val = b1;
+		fprintf(stdout, "%c", val);
+	}
+	return val;
+}
+
+
+HParsedToken *
+act_txtbegin_(const HParseResult *p, void *u)
+{
+  const HParsedToken *tok=p->ast;
+
+  fprintf(stdout, "act_txtbegin:: Here %lx\n", (long unsigned int)tok);
+
+  return (HParsedToken *)tok;
+}
+HParsedToken *
+act_txtend(const HParseResult *p, void *u)
+{
+
+  fprintf(stdout, "act_txtend:: Here\n");
+
+  return (HParsedToken *)p->ast;
+}
+
+
+
+HParsedToken *
+act_bytestream(const HParseResult *p, void *u)
+{
+	size_t n = p->ast->seq->used;
+	fprintf(stdout, "\nact_bytestream: token_type: %u, size: %lu\n", p->ast->token_type, n);
+
+	uint8_t *bytebuf = h_arena_malloc(p->arena, sizeof(uint8_t) * n);
+	for (int i=0; i<n; i++) {
+		assert(p->ast->seq->elements[i]->token_type == TT_UINT);
+		bytebuf[i] = p->ast->seq->elements[i]->uint;
+	}
+
+	fprintf(stdout, "act_bytestream: the string: %.*s\n", (int)n, (char *)bytebuf);
+
+#if 0
+	char *buf = malloc(sizeof(char) * n);
+	if (buf) {
+		for (int i=0; i<n; i++) {
+			buf[i] = convert2char(p->ast->seq->elements[i]->uint);
+		}
+		fprintf(stdout, "act_bytestream: the string: %.*s\n", (int)n, buf);
+		free (buf);
+	}
+#endif
+	HParsedToken *bb=H_MAKE_BYTES(bytebuf, n);
+	return bb;
+//	return (HParsedToken*)p->ast;
+}
+
+// *********************************************************************
+
+
+/*
+ * ********************************************************************
+ * End Text parsing
+ * ********************************************************************
+ */
+
+
+
+
+
+
+
+/*
+ * input grammar
+ */
+
+HParser *p_pdf;
+HParser *p_pdfdbg;
+HParser *p_startxref;
+HParser *p_xref;
+HParser *p_objdef;
+HParser *p_a85string;
+HParser *p_ahexstream;
+HParser *p_rldstring;
+HParser *p_ws;
+HParser *p_wel;
+HParser *p_elemr;
+HParser *p_npair;
+
+/*
+ * Parsers for text streams
+ */
+HParser *p_textbegin;
+HParser *p_textstream;
+HParser *p_trailer;
+HParser *p_cstream;
+HParser *p_byteostm;
+HParser *p_bytestream;
+HParser *p_dict;
+
+
+
+
+/* continuations for h_bind() */
+HParser *kstream(HAllocator *, const HParsedToken *, void *);
+HParser *kxstream(HAllocator *, const HParsedToken *, void *);
+HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
+HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
+HParser *kbyteostream(HAllocator *, const HParsedToken *, void *);
+
+void
+init_runlengthdecode_parser(struct Env *aux)
+{
+	H_RULE(rldeod, h_ch(0x80));
+	H_ARULE(longlength, h_ch_range(0x81, 0xFF));
+	H_ARULE(shortlength, h_ch_range(0x0, 0x7F));
+
+	H_RULE(shortdata, h_uint8());
+	H_RULE(longdata, h_uint8());
+
+	H_RULE(shortrun, h_length_value(shortlength, shortdata));
+	H_ARULE(longrun, SEQ(longlength, longdata));
+
+	H_ARULE(rldstring, SEQ(h_many(CHX(shortrun, longrun)), IGN(rldeod)));
+
+	p_rldstring = rldstring;
+}
+
+void
+init_parser(struct Env *aux)
+{
+	TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
+	TT_XREntry      = h_allocate_token_new("XREntry", NULL, pp_xrentry);
+	TT_Ref          = h_allocate_token_new("Ref", NULL, pp_ref);
+	TT_Dict         = h_allocate_token_new("Dict", NULL, pp_dict);
+	TT_TextEntry    = h_allocate_token_new("TextEntry", NULL, pp_textentry);
+	TT_Objstm       = h_allocate_token_new("Objstm", NULL, pp_objstm);
+
+	/* lines */
+	H_RULE(cr,	p_mapch('\r', '\n'));	/* semantic value: \n */
+	H_RULE(lf,	h_ch('\n'));		/* semantic value: \n */
+	H_RULE(crlf,	h_right(cr, lf));	/* semantic value: \n */
+	H_RULE(eol,	CHX(crlf, cr, lf));
+	H_RULE(end,	h_end_p());
+
+	/* character classes */
+#define LWCHARS	"\0\t\f "
+#define WCHARS	LWCHARS "\n\r"
+#define DCHARS	"()<>[]{}/%"
+	H_RULE(wchar,	IN(WCHARS));			/* white-space */
+	H_RULE(lwchar,	IN(LWCHARS));			/* "line" whitespace */
+	//H_RULE(dchar,	IN(DCHARS));			/* delimiter */
+	H_RULE(rchar,	NOT_IN(WCHARS DCHARS));		/* regular */
+	H_RULE(nchar,	NOT_IN(WCHARS DCHARS "#"));	/* name */
+	H_RULE(schar,	NOT_IN("()\n\r\\"));		/* string literal */
+	H_ARULE(digit,	h_ch_range('0', '9'));
+	H_ARULE(pdigit,	h_ch_range('1', '9'));
+	H_ARULE(hlower,	h_ch_range('a', 'f'));
+	H_ARULE(hupper,	h_ch_range('A', 'F'));
+	H_RULE(hdigit,	CHX(digit, hlower, hupper));
+	H_ARULE(odigit,	h_ch_range('0', '7'));
+
+	H_RULE(sp,	h_ch(' '));
+	H_RULE(percent,	h_ch('%'));
+	H_RULE(period,	h_ch('.'));
+	H_RULE(slash,	h_ch('/'));
+	H_RULE(hash,	h_ch('#'));
+	H_RULE(bslash,	h_ch('\\'));
+	H_RULE(lparen,	h_ch('('));
+	H_RULE(rparen,	h_ch(')'));
+	H_RULE(langle,	h_ch('<'));
+	H_RULE(rangle,	h_ch('>'));
+	H_RULE(lbrack,	h_ch('['));
+	H_RULE(rbrack,	h_ch(']'));
+	H_RULE(plus,	h_ch('+'));
+	H_RULE(minus,	h_ch('-'));
+
+	/* whitespace */
+	H_RULE(comment,	SEQ(percent, h_many(NOT_IN("\r\n")), CHX(cr,lf,end)));
+		/* ^ NB: must consume cr/lf for ws to be LL(k) */
+	H_RULE(wel,	IGN(CHX(wchar, comment)));
+	H_RULE(ws,	IGN(h_many(wel)));
+	H_RULE(lws,	IGN(h_many(IGN(lwchar))));
+
+	/* tokenization */
+#define MANY_WS(X)	h_many(CHX(wel, X))
+#define TOK(X)	h_middle(ws, X, h_not(rchar))
+#define KW(S)	IGN(TOK(LIT(S)))
+
+	/* misc */
+	H_RULE(nl,	IGN(h_right(lws, eol)));
+	H_RULE(epsilon,	h_epsilon_p());
+	H_RULE(empty,	SEQ(epsilon));
+	H_RULE(digits,	h_many1(digit));
+	H_VARULE(nat,	digits);
+	H_VRULE(pnat,	nat);
+	H_RULE(npair,	SEQ(pnat, wel,ws, nat));
+
+	/*
+	 * objects
+	 */
+	
+	H_ARULE(ref,	SEQ(npair, wel,ws, LIT("R")));
+	H_RULE(null,	LIT("null"));
+	H_RULE(boole,	CHX(LIT("true"), LIT("false")));
+
+	/* numbers */
+	H_ARULE(sign,	CHX(minus, IGN(plus)));
+	H_VRULE(intnn,	nat);
+	H_ARULE(realnn,	CHX(SEQ(digits, period, digits),	/* 12.3 */
+			    SEQ(digits, period, empty),		/* 123. */
+			    SEQ(empty, period, digits)));	/* .123 */
+		// XXX ^ we _could_ move the "123." case into intnn...
+	H_RULE(numbnn,	CHX(realnn, intnn));
+	H_RULE(snumb,	SEQ(sign, numbnn));
+	H_VARULE(numb,	CHX(snumb, numbnn));
+
+	/* names */
+	H_ARULE(nesc,	SEQ(hash, hdigit, hdigit));
+	H_ARULE(nstr,	h_many(CHX(nchar, nesc)));	/* '/' is valid */
+	H_RULE(name,	h_right(slash, nstr));
+
+	/* strings */
+	H_RULE(snest,	h_indirect());
+	H_RULE(bsn,	p_mapch('n', 0x0a));	/* LF */
+	H_RULE(bsr,	p_mapch('r', 0x0d));	/* CR */
+	H_RULE(bst,	p_mapch('t', 0x09));	/* HT */
+	H_RULE(bsb,	p_mapch('b', 0x08));	/* BS (backspace) */
+	H_RULE(bsf,	p_mapch('f', 0x0c));	/* FF */
+	H_RULE(escape,	CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash));
+	H_ARULE(octal,	CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1)));
+	H_RULE(wrap,	IGN(eol));
+	H_RULE(sesc,	h_right(bslash, CHX(escape, octal, wrap, epsilon)));
+		/* NB: lone backslashes and escaped newlines are ignored */
+	H_ARULE(schars,	h_many(CHX(schar, snest, sesc, eol)));
+	H_RULE(snest_,	SEQ(lparen, schars, rparen));
+	H_RULE(litstr,	h_middle(lparen, schars, rparen));
+	H_RULE(hexstr,	h_middle(langle, MANY_WS(hdigit), rangle));
+	H_ARULE(string,	CHX(litstr, hexstr));
+	h_bind_indirect(snest, snest_);
+
+	H_RULE(array,	h_indirect());
+	H_RULE(dict,	h_indirect());
+
+	/* classify objects by whether they start/end with a delimiter: */
+	H_RULE(robj,	CHX(ref, null, boole, numb));		/* rchars */
+	H_RULE(dobj,	CHX(string, array, dict));		/* dchars */
+	H_RULE(obj,	CHX(robj, name, dobj));
+
+	/* dictionaries */
+	H_RULE(dopen,	LIT("<<"));
+	H_RULE(dclose,	LIT(">>"));
+	H_RULE(k_v,	CHX(CHX(SEQ(name, wel,ws, obj),
+				SEQ(name, CHX(name,dobj))),
+			    VIOL(SEQ(name, wel,ws), "Key with no value (severity=2)")));
+	H_ARULE(dict_,	h_middle(dopen, MANY_WS(k_v), dclose));
+		// XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok?
+		// XXX validate: dict keys must be unique
+	h_bind_indirect(dict, dict_);
+
+	/* arrays */
+	H_RULE(elemd,	h_indirect());	/* elements following a delimiter */
+	H_RULE(elemr,	h_indirect());	/* elements following a regular char */
+	H_ARULE(array_,	h_middle(lbrack, elemd, rbrack));
+	H_RULE(elemd_,	CHX(SEQ(ws, dobj, elemd),
+			    SEQ(ws, name, elemr),
+			    SEQ(ws, robj, elemr),
+			    ws));
+	H_RULE(elemr_,	CHX(SEQ(ws, dobj, elemd),
+			    SEQ(ws, name, elemr),
+			    SEQ(wel,ws, robj, elemr),
+			    ws));
+	h_bind_indirect(elemd, elemd_);
+	h_bind_indirect(elemr, elemr_);
+	h_bind_indirect(array, array_);
+
+	/* streams */
+	H_RULE(stmbeg,	SEQ(dict, OPT(ws), LIT("stream"), OPT(cr),
+			    CHX(lf, VIOL(epsilon, "No linefeed after 'stream' (severity=7)"))));
+	H_RULE(stmend,	CHX(SEQ(eol, LIT("endstream")),
+			   VIOL(LIT("ndstream"), "Stream length >1-too-long (severity=10)"),
+			   VIOL(SEQ(h_many(wchar), LIT("endstream")),
+				"No newline before 'endstream' (severity=7)"),
+			   VIOL(LIT("endstream"), "Stream length 1-too-long (severity=9)"),
+			   VIOL(SEQ(OPT(h_ch_range(0, 255)), OPT(eol), LIT("endstream")),
+				"Stream length 1-too-short (severity=4)"),
+			   VIOL(SEQ(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
+									     SEQ(npair, wel, KW("obj")),
+									     KW("xref"),
+									     LIT("endstream")))), LIT("endstream")),
+				"Stream length >1-too-short (severity=5)"),
+			   VIOL(h_many1(h_butnot(h_ch_range(0, 255), CHX(KW("endobj"),
+									 SEQ(npair, wel, KW("obj")),
+									 KW("xref")))),
+				"Missing endstream token (severity=7)")));
+
+	H_RULE(stream,	h_left(h_bind(stmbeg, kstream, aux), stmend));
+		// XXX is whitespace allowed between the eol and "endstream"?
+		// peter wyatt says no. (2020-03-25)
+
+	/*
+	 * file structure
+	 */
+
+	/* header */
+	H_RULE(version,	SEQ(pdigit, IGN(period), digit));
+	H_RULE(header,	h_middle(LIT("%PDF-"), version, nl));
+
+	/* body */
+	H_RULE(indobj,	CHX(stream, obj));
+	H_RULE(objdef,	SEQ(ws, npair, wel, KW("obj"), ws, indobj,
+			    CHX(VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(KW("endobj"))),
+				     "More than 1 endobj token (severity=1)"),
+				VIOL(SEQ(OPT(ws), OPT(lws), KW("endobj"), h_many(CHX(wel, eol)), h_many1(SEQ(dclose, h_many1(CHX(wchar, eol)), KW("endobj")))),
+				     "More than 1 >> and endobj token (severity=2)"),
+				SEQ(OPT(ws), OPT(lws), KW("endobj")),
+				VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)"))));
+	H_RULE(body,	h_many(objdef));
+
+	/* cross-reference section */
+	H_RULE(xreol,	CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
+		// ^ XXX does the real world follow this rule?! cf. loop.pdf
+	H_RULE(xrtyp,	CHX(h_ch('n'), h_ch('f')));
+	H_ARULE(xroff,	REP(digit, 10));
+	H_ARULE(xrgen,	REP(digit, 5));
+	H_ARULE(xrent,	SEQ(xroff, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref offset_gen entry (severity=1)"), sp)),
+	    xrgen, IGN(CHX(VIOL(SEQ(lwchar, h_many1(lwchar)), "Multi-WS in xref gen_use entry (severity=1)"), sp)),
+	    xrtyp, IGN(CHX(VIOL(SEQ(wchar, wchar, h_many1(wchar)), "Greater-than-2-byte WS at end of xref entry (severity=1)"),
+	    xreol,
+	    VIOL(SEQ(h_many1(wchar)), "Nonconformant WS at end of xref entry (severity=1)")))));
+	H_RULE(xrhead,	SEQ(nat, IGN(sp), nat, nl));
+	H_RULE(xrsub,	SEQ(xrhead, h_many(xrent)));
+	H_ARULE(xrefs,	SEQ(KW("xref"), nl, h_many(xrsub)));
+
+	/* cross-reference streams */
+	H_RULE(xstream,	h_bind(stmbeg, kxstream, aux));
+	H_AVRULE(xrstm,	SEQ(ws, npair, wel, KW("obj"), ws, xstream));
+		// XXX skip however much we consumed and check for "endstream endobj"?
+
+
+	/* trailer */
+	H_RULE(startxr, SEQ(nl, KW("startxref"), nl,
+			    lws, nat, nl,
+			    LIT("%%EOF"), OPT(nl)));
+
+	/* used for the backwards search */
+	H_RULE(lasteof, SEQ(nl, KW("startxref"), nl,
+			    lws, nat, nl,
+		// XXX the real world sometimes omits nl after %%EOF inside the file.
+		//     the next 'tail' would be appended right after the 'F',
+		//     presumably because the previous version of the file
+		//     ended without a trailing newline. m)
+		//     this is invalid per spec, because it creates a run-on
+		//     comment, but we should probably accept-and-warn.
+		// XXX should lws be allowed before EOF marker?
+		// NB: lws before xref offset is allowed, cf. p.48 (example 4)
+			    LIT("%%EOF"),
+			    CHX(VIOL(SEQ(nl, h_many1(nl), end),
+				     "(offset FROM END) Multiple newlines after final %%EOF (severity=4)"),
+				SEQ(h_many(nl), end),
+				VIOL(SEQ(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
+				     "(offset FROM END) Data after final  %%EOF (severity=7)"))));
+
+	H_ARULE(xr_td,	SEQ(xrefs, KW("trailer"), ws, dict));
+
+	H_RULE(start_junk,  VIOL(h_many1(h_butnot(h_ch_range(0, 255), header)),
+	                        "Junk bytes before %PDF header (severity=1)"));
+	H_RULE(hdr_junk,    CHX(comment,
+	    VIOL(h_many1(h_butnot(h_ch_range(0, 255), SEQ(npair, wel, KW("obj")))),
+	    "Uncommented junk after header (severity=1)")));
+	H_RULE(tail,	SEQ(body, CHX(SEQ(h_optional(xr_td), startxr),
+				      VIOL(SEQ(xr_td, OPT(SEQ(nl, KW("startxref"), nl, lws, nat, nl)),
+					       OPT(nl), OPT(LIT("%%EOF")), OPT(nl)),
+					   "Improper end of trailer - missing startxref and/or %%EOF (severity=5)"))));
+	H_RULE(final_eof_junk, CHX(VIOL(SEQ(h_many1(nl), end), "Multiple newlines after final %%EOF (severity=4)"),
+				   VIOL(h_many1(h_butnot(h_ch_range(0, 255), LIT("%%EOF"))),
+					"Data after final %%EOF (severity=7)"),
+				   end));
+	H_RULE(pdf,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many1(tail), final_eof_junk));
+
+	/* debug parser to consume as much as possible */
+	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
+
+	/*
+	 * filters
+	 */
+
+	/* Whitespace can occur between any digit and has to be ignored, */
+	H_RULE(aws,	IGN(h_many(wchar)));  // all white space, include CR & LF, but not comments
+	#define MANY_AWS(X) h_many(CHX(aws, X))
+
+
+	/* Ascii85Decode */
+	H_RULE(a85eod,	SEQ(h_ch('~'), aws, h_ch('>')));
+	H_ARULE(a85zero,	h_ch('z'));
+	H_ARULE(a85digit,	h_ch_range('!', 'u'));
+
+	/* Line whitespace can occur between any digit and has to be ignored, */
+	#define MANY_LWS(X) h_many(CHX(lws, X))
+	// Folded the test for a85fiveexcl into the validation component
+	H_VARULE(a85fivedigits,	h_repeat_n(SEQ(a85digit, aws), 5));
+	// TODO:: will need to pull out error conditions -- a85fiveexcl or 'z' as one of the digits
+	H_ARULE(a85group,	CHX(a85zero, a85fivedigits));
+
+	H_VARULE(a85partial2group,	h_repeat_n(SEQ(a85digit, aws), 2));
+	H_VARULE(a85partial3group,	h_repeat_n(SEQ(a85digit, aws), 3));
+	H_VARULE(a85partial4group,	h_repeat_n(SEQ(a85digit, aws), 4));
+	H_RULE(a85partialgroup,	CHX(a85partial4group, a85partial3group, a85partial2group));
+
+	H_ARULE(a85string,	SEQ(h_many(a85group), h_optional(a85partialgroup), IGN(a85eod)));
+
+	//p_test = a85group;
+
+
+	/*
+	 * Not sure whether comments can be embedded within content streams
+	 * If not, use the rule aws rather than ws
+	 */
+	/*
+	 * It seems somewhat unclear. ASCII85Decode definitely can't have
+	 * comments, because % can be part of a valid ASCII85Encoded character.
+	 * However, it seems that comments are generally allowed:
+	 * "Any occurrence of the PERCENT SIGN (25h) outside a string or
+	 * inside a content stream (see 7.8.2, "Content streams") introduces
+	 * a comment." ISO32000:2-2017 7.2.4
+	 */
+	 // XXX Ask Peter Wyatt
+
+
+	/* AsciiHexDecode */
+	H_RULE(ahexeod,	h_ch('>'));
+	H_ARULE(hdigitpair, SEQ(aws, hdigit, aws, hdigit));
+	H_ARULE(ahextruncated, SEQ(aws, hdigit));
+
+	H_RULE(ahs_end, SEQ(h_optional(ahextruncated), aws, ahexeod));
+	H_ARULE(ahexstream, SEQ(h_many(hdigitpair), ahs_end));
+
+	init_runlengthdecode_parser(aux);
+
+
+	// ==========================================================================
+	/*
+	 * Text Objects Extraction - embedded in content streams
+	 *
+	 */
+	// ==========================================================================
+	/*
+	 * Text Objects Extraction - embedded in content streams
+	 */
+
+// XXX cleanup: indentation
+    H_RULE(txtbegin, h_indirect());
+    H_RULE(txt_before_junk, IGN(SEQ(h_not(LIT("BT")), CHX(comment, h_uint8()))));
+	H_ARULE(txtbegin_, SEQ(IGN(h_many(txt_before_junk)), LIT("BT"), aws));
+	h_bind_indirect(txtbegin, txtbegin_);
+	H_ARULE(txtend, KW("ET"));
+	/* 9.3 - Text state operators */
+    H_AVRULE(tnumb, numb);
+    HParser *Tc_op = h_action(SEQ(tnumb, aws, LIT("Tc"), aws), act_Tc_op, aux);  /* 9.3.2 - charSpace */
+    HParser *Tw_op = h_action(SEQ(tnumb, aws, LIT("Tw"), aws), act_Tw_op, aux);  /* 9.3.3 - wordSpace */
+    HParser *Tz_op = h_action(SEQ(tnumb, aws, LIT("Tz"), aws), act_Tz_op, aux);  /* 9.3.4 - horizontal scaling */
+    HParser *TL_op = h_action(SEQ(tnumb, aws, LIT("TL"), aws), act_TL_op, aux);  /* 9.3.5 - leading */
+	HParser *Tf_op = h_action(SEQ(name, aws, numbnn, aws, KW("Tf"), aws), act_Tf_op, aux);  /* font and size */
+	/* TDO: must map to an existing font dictionary */
+	H_VRULE(tmode, nat);                        /* True if <= 7 */
+	HParser *Tr_op = h_action(SEQ(tmode, aws, LIT("Tr"), aws), act_Tr_op, aux);  /* 9.3.6 - rendering mode */
+	HParser *Ts_op = h_action(SEQ(tnumb, aws, LIT("Ts"), aws), act_Ts_op, aux);  /* rise */
+	H_RULE(textstate_ops, CHX(Tc_op, Tw_op, Tz_op, TL_op, Tf_op, Tr_op, Ts_op));
+
+	/* 9.4.2 - Text positioning operators */
+	HParser *Td_op = h_action(SEQ(tnumb, aws, tnumb, aws, LIT("Td"), aws), act_Td_op, aux);      /* move to next line with offset */
+	HParser *TD_op = h_action(SEQ(tnumb, aws, tnumb, aws, LIT("TD"), aws), act_TD_op, aux);      /* move to next line with offset and set state */
+	HParser *Tm_op = h_action(SEQ(REP(SEQ(tnumb, aws), 6), LIT("Tm"), aws), act_Tm_op, aux);     /* set text matrix */
+	HParser *Tstar_op = h_action(SEQ(LIT("T*"), aws), act_Tstar_op, aux);                           /* move to next line */
+	H_RULE(textpos_ops, CHX(Td_op, TD_op, Tm_op, Tstar_op));
+
+	/* 9.4.3 - Text showing operators */
+	H_RULE(quote,	h_ch('\''));
+	H_RULE(dquote,	h_ch('"'));
+//	H_ARULE(Tj_op, SEQ(string, aws, LIT("Tj"), aws), aux);          /* show text string */
+	HParser *Tj_op       = h_action(SEQ(string, aws, LIT("Tj"), aws), act_Tj_op, aux);          /* show text string */
+	HParser *TsingleQ_op = h_action(SEQ(string, aws, quote, aws), act_TsingleQ_op, aux);        /* Move to next line and show text */
+	HParser *TdoubleQ_op = h_action(SEQ(tnumb, aws, tnumb, aws, string, aws, dquote, aws), act_TdoubleQ_op, aux); /* Move to next line and show formatted text */
+	H_RULE(TArr_elem, SEQ(CHX(tnumb, string), aws));
+	HParser *TJ_op       = h_action(SEQ(IGN(lbrack), aws, h_many(TArr_elem), IGN(rbrack), aws, LIT("TJ"), aws), act_TJ_op, aux);      /* show one or more text strings */
+	H_RULE(textshow_ops, CHX(Tj_op, TsingleQ_op, TdoubleQ_op, TJ_op));
+
+    H_RULE(text_inbetween_junk, IGN(SEQ(h_not(txtend), h_uint8())));
+    H_RULE(text_ops, CHX(textstate_ops, textpos_ops, textshow_ops, text_inbetween_junk));
+
+	/* Text object */
+	H_ARULE(txtobj, SEQ(txtbegin, h_many(text_ops), txtend));
+	H_RULE(txtobjs, h_many1(txtobj));
+
+
+	/* text streams */
+	H_RULE(txtstream, h_bind(txtobjs, ktxtstream, aux));
+
+	// Page Tree
+	H_RULE(contentstream, h_left(h_bind(stmbeg, kcontentstream, aux), stmend));
+	H_ARULE(cstream, SEQ(ws, npair, wel, KW("obj"), ws, contentstream,
+			OPT(ws), OPT(lws), KW("endobj")));
+	H_RULE(byteostream, h_left(h_bind(stmbeg, kbyteostream, aux), stmend));
+	H_RULE(byteostm, SEQ(ws, npair, wel, KW("obj"), ws, byteostream,
+			OPT(ws), OPT(lws), KW("endobj")));
+
+	// convenient parser to just get a chunk of bytes
+	H_ARULE(bytestream, h_many(h_uint8()));
+
+
+	/* global parser variables */
+	p_pdf        = pdf;
+	p_pdfdbg     = pdfdbg;
+	p_startxref  = lasteof; //startxr;
+	p_xref       = CHX(xr_td, xrstm);
+	p_objdef     = objdef;
+	p_a85string  = a85string;
+	p_ahexstream = ahexstream;
+	p_ws         = ws;
+	p_wel        = wel;
+	p_elemr      = h_action(elemr, h_act_flatten, NULL);
+	p_npair      = npair;
+
+	/* text parser variables */                                                  \
+	p_textbegin  = txtbegin;                                                       \
+	p_textstream = txtstream;
+	p_cstream    = cstream;
+	p_byteostm   = byteostm;
+	p_bytestream = bytestream;
+	p_dict       = dict;
+
+	p_fail = h_nothing_p();
+	p_epsilon = epsilon;
+	p_return_0 = h_action(epsilon, act_return_uint, (void *)0);
+	p_return_1 = h_action(epsilon, act_return_uint, (void *)1);
+
+	/* Parsing of severity messages */
+	H_RULE(viol_preamble, SEQ(h_many(NOT_IN("=")), LIT("=")));
+	H_RULE(severity_num, h_action(h_many1(h_action(h_ch_range('0', '9'), act_digit, NULL)),
+			     act_nat, NULL));
+	H_RULE(violsev, SEQ(IGN(viol_preamble), severity_num));
+	p_violsev = violsev;
+
+#if 0
+	// XXX testing
+	int r;
+	void errx(int, const char *, ...);
+	HParser *p = obj;
+	if ((r = h_compile(p, PB_LALR, NULL)) != 0)
+		errx(1, "h_compile() failed: %d", r);
+	errx(0, "OK");
+#endif
+}
+
+
+/*
+ * lookup and resolution of indirect references
+ *
+ * ** Parameter 'gen' is unused
+ */
+
+XREntry *
+lookup_xref(struct Env *aux, size_t nr, size_t gen)
+{
+	HParsedToken *ss;	/* xref subsection */
+	size_t base, n;
+
+	/* for each cross-reference section (i.e. update) */
+	for (size_t i = 0; i < aux->nxrefs; i++) {
+		HCountedArray *subs = H_INDEX_SEQ(aux->xrefs[i], 0);
+
+		/* for each cross-reference subsection */
+		for (size_t j = 0; j < subs->used; j++) {
+			ss = subs->elements[j];
+			base = H_INDEX_UINT(ss, 0, 0);
+			n = H_INDEX_UINT(ss, 0, 1);
+
+			if (nr >= base && nr - base < n)
+				return H_INDEX(XREntry, ss, 1, nr - base);
+			// TODO: Generate a more meaningful error message -- p_viol, error ontology category
+		}
+	}
+
+	return NULL;
+}
+
+const HParsedToken *
+parse_obj(struct Env *aux, size_t nr, size_t gen, size_t offset)
+{
+	HParseResult *res;
+	size_t def_nr, def_gen;
+
+	if (offset >= aux->sz) {
+		fprintf(stderr, "%s: position %zu (%#zx) for object %zu %zu is "
+		    "out of bounds\n", aux->infile, offset, offset, nr, gen);
+		return NULL;
+	}
+
+//	res = h_parse(p_objdef, aux->input + offset, aux->sz - offset);
+	HParser *p = h_right(h_seek(offset * 8, SEEK_SET), p_objdef);	// XXX
+	res = h_parse(p, aux->input, aux->sz); // XXX review
+	if (res == NULL) {
+		fprintf(stderr, "%s: error parsing object %zu %zu at position "
+		    "%zu (%#zx)\n", aux->infile, nr, gen, offset, offset);
+		return NULL;
+	}
+	assert(res->ast != NULL && res->ast->token_type == TT_SEQUENCE);
+
+	def_nr = H_INDEX_UINT(res->ast, 0, 0);
+	def_gen = H_INDEX_UINT(res->ast, 0, 1);
+	if (def_nr != nr || def_gen != gen) {
+		fprintf(stderr, "%s: object ID mismatch at position %zu "
+		    "(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile,
+		    offset, offset, nr, gen, def_nr, def_gen);
+		return NULL;
+	}
+
+
+	// DEBUG
+	fprintf(stdout, "\nparse_obj: Parsed Result:\n");
+	h_pprintln(stdout, res->ast);	// XXX debug
+	fprintf(stdout, "\nparse_obj: Returning:\n");
+	h_pprintln(stdout, H_INDEX_TOKEN(res->ast, 1));	// XXX debug
+	// DEBUG
+
+	return H_INDEX_TOKEN(res->ast, 1);
+}
+
+const HParsedToken *
+parse_objstm_obj(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
+{
+	XREntry *ent;
+	const HParsedToken *stm;
+
+	/*
+	 * acquire the stream object
+	 */
+
+	ent = lookup_xref(aux, stm_nr, 0);
+	if (ent == NULL)
+		return NULL;		/* stream not found */
+
+	switch (ent->type)
+	{
+	case XR_FREE:
+		return NULL;		/* stream deleted */
+	case XR_INUSE:
+		if (ent->n.gen != 0)
+			return NULL;	/* stream replaced */
+		if (ent->obj == NULL) {
+			fprintf(stdout, "\nparse_objstm_obj:: Parsing object stream id = %lu, %d, at offset = %zu (%#zx)\n",
+					stm_nr, 0, ent->n.offs, ent->n.offs); // XXX DEBUG
+			ent->obj = parse_obj(aux, stm_nr, 0, ent->n.offs);
+		}
+		break;
+	case XR_OBJSTM:
+		return NULL;		/* invalid: nested streams */
+	}
+
+	if ((stm = ent->obj) == NULL) {
+		fprintf(stderr, "%s: error parsing object stream at position "
+		    "%zu (%#zx)\n", aux->infile, ent->n.offs, ent->n.offs);
+		return NULL;
+	}
+	else {
+		/*
+		 * decode the stream and find the target object in it
+		 */
+		// XXX debug
+		fprintf(stdout, "\nparse_objstm_obj:: token type = %u, \n", ent->obj->token_type);
+		h_pprintln(stdout, stm);	// XXX debug
+		// XXX debug
+		// get the object at index
+
+		// XXX review: stm might be used uninitialized
+		stm = H_INDEX_TOKEN(stm, 1);  // the first field is the stream dictionary
+		if (stm->token_type == TT_HParseResult){
+			const HParseResult *res = H_CAST(HParseResult, stm);
+			stm = res->ast;
+			// Now get the index if the index is valid
+			const Objstm *ostm = H_CAST(Objstm, stm);
+			if ( (idx>=0) && (idx < ostm->numObjs)) {
+				stm = ostm->tok[idx].obj;
+			}
+			else
+				return NULL;
+		}
+	}
+	fprintf(stdout, "\npparse_objstm_obj:: Returning token of type = %u, \n", stm->token_type);
+	h_pprintln(stdout, stm);	// XXX debug
+	return stm;
+}
+
+const HParsedToken *
+resolve(struct Env *aux, const HParsedToken *v)
+{
+	XREntry *ent = NULL;
+	Ref *r;
+
+	/* direct objects pass through */
+	if (v == NULL || v->token_type != TT_Ref) {
+		fprintf (stdout, "resolve: Returning token of token_type = %u\n", v->token_type); // XXX DEBUG
+		return v;
+	}
+
+	/* we are looking at an indirect reference */
+	r = v->user;
+
+	/* find the xref entry for this reference */
+	fprintf(stdout, "\nresolve:: Looking up xref = %lu, %lu\n", r->nr, r->gen); // XXX DEBUG
+	ent = lookup_xref(aux, r->nr, r->gen);
+	if (ent == NULL)
+		return NULL;			/* obj not found */
+	if (ent->obj != NULL) {
+		fprintf(stdout, "\nresolve:: ent->obj->token_type = %u\n", ent->obj->token_type); // XXX DEBUG
+		return resolve(aux, ent->obj);
+	}
+
+	/* parse the object and memoize */
+	ent->obj = v;				/* break loops */
+	switch (ent->type)
+	{
+	case XR_FREE:
+		return NULL;			/* obj deleted */
+	case XR_INUSE:
+		if (ent->n.gen != r->gen)
+			return NULL;		/* obj nr reused */
+		fprintf(stdout, "resolve:: parse object at offset = %zu (%#zx)\n", ent->n.offs, ent->n.offs);
+		ent->obj = parse_obj(aux, r->nr, r->gen, ent->n.offs);
+		break;
+	case XR_OBJSTM:
+		if (r->gen != 0)
+			return NULL;		/* invalid entry! */
+		fprintf(stdout, "resolve:: parse object stream - oid = %lu, stm_oid = %lu, stm_idx = %lu\n",
+				r->nr, ent->o.stm, ent->o.idx); // XXX DEBUG
+		ent->obj = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx);
+		break;
+	}
+
+	fprintf (stdout, "resolve: Recursive call to resolve - token_type = %u\n", ent->obj->token_type); // XXX DEBUG
+	return resolve(aux, ent->obj);
+}
+
+
+/*
+ * stream object handling incl. filters and cross-reference streams
+ */
+
+#include <limits.h>	/* INT_MAX */
+#include <zlib.h>
+#include <err.h>
+
+struct predictor {
+	/* parameters */
+	int num;	/* default: 1 (no prediction) */
+	int colors;	/* default: 1 */
+	int bpc;	/* bits per component; default: 8 */
+	int columns;	/* default: 1 */
+
+	int rowsz;	/* bytes per row = ceil(colors * bpc * columns / 8) */
+
+	/* state */
+	HSuspendedParser *sp;
+	uint8_t	(*predfun)(int, int, int);
+	uint8_t	*buf;		/* previous row of input */
+	uint8_t	 c;		/* byte 'c' (upper left) */
+	int	 x;		/* current position */
+
+#ifndef ITERATIVE // XXX
+	uint8_t	*out;
+	size_t	 nout;
+#endif
+};
+
+int
+depred_none(struct predictor *pred, uint8_t *inp, size_t sz)
+{
+#ifdef ITERATIVE // XXX
+	return h_parse_chunk(pred->sp, inp, sz);
+#else
+	pred->out = realloc(pred->out, pred->nout + sz);
+	assert(pred->out != NULL);
+	memcpy(pred->out + pred->nout, inp, sz);
+	pred->nout += sz;
+	return false;
+#endif
+}
+
+uint8_t pp_none(int a, int b, int c)	{ return 0; }
+uint8_t pp_sub(int a, int b, int c)	{ return a; }
+uint8_t pp_up(int a, int b, int c)	{ return b; }
+uint8_t pp_avg(int a, int b, int c)	{ return (a + b) / 2; }
+
+uint8_t
+pp_paeth(int a, int b, int c)
+{
+	int p	= a + b - c;
+	int pa	= abs(p - a);
+	int pb	= abs(p - b);
+	int pc	= abs(p - c);
+
+	if (pa <= pb && pa <= pc) return a;
+	if (pb <= pc) return b;
+	return c;
+}
+
+int
+depred_png(struct predictor *pred, uint8_t *inp, size_t sz)
+{
+	/* NB:
+	 * at this point, the specific value of pred->num no longer matters.
+	 * the PNG predictor tags each row with the function used for that row
+	 * and decoding always follows the tag.
+	 */
+	static uint8_t (*predfuns[])(int, int, int) =
+	    {pp_none, pp_sub, pp_up, pp_avg, pp_paeth};
+
+	bool done = false;
+	int bpp;
+
+	bpp = (pred->colors * pred->bpc + 7) / 8;	/* bytes per pixel */
+	assert (bpp > 0);
+
+	for (size_t i=0; i < sz && !done; i++) {
+		int x = pred->x;
+		int a = x<bpp ? 0 : pred->buf[x-bpp];	/* left */
+		int b = pred->buf[x];			/* up */
+		int c = pred->c;			/* up left */
+
+		if (pred->predfun == NULL) {	/* we are before a new row */
+			/* select predictor function */
+			if (inp[i] > 4) {
+				fprintf(stderr, "unknown PNG predictor %d\n",
+				    (int)inp[i]);
+				return -1;
+			}
+			pred->predfun = predfuns[inp[i]];
+
+			/* consume the tag */
+			if (++i == sz)
+				break;
+		}
+
+		/* undo the prediction and save the decoded value */
+		pred->buf[x] = inp[i] + pred->predfun(a, b, c);
+
+		/* advance to the right */
+		pred->c = b;
+		pred->x = ++x;
+
+		/* when row complete, pass it to parser and start a new row */
+		if (x == pred->rowsz) {
+#ifdef ITERATIVE // XXX
+			done = h_parse_chunk(pred->sp, pred->buf, pred->rowsz);
+#else
+			pred->out = realloc(pred->out, pred->nout + pred->rowsz);
+			assert(pred->out != NULL);
+			memcpy(pred->out + pred->nout, pred->buf, pred->rowsz);
+			pred->nout += pred->rowsz;
+#endif
+			pred->c = pred->x = 0;
+			if (pred->num != 2)	/* support for 8-bpc TIFF */
+				pred->predfun = NULL;
+		}
+	}
+
+	return done;
+}
+
+HParseResult *
+FlateDecode(const Dict *parms, HBytes b, HParser *p)
+{
+	size_t const BUFSIZE = 8 * 1024;
+	uint8_t *buf;
+#ifdef ITERATIVE // XXX
+	HSuspendedParser *sp;
+#endif
+	HParseResult *res;
+	const HParsedToken *v;
+	size_t sz;
+	int done;
+	z_stream strm = {0};
+	int ret;
+	struct predictor pred = {1, 1, 8, 1};
+	int (*depredict)(struct predictor *, uint8_t *, size_t);
+
+	/* set up the predictor (if any) */
+	#define SETPARM(VAR,STR) do {					\
+		v = dictentry(parms, (STR));				\
+		if (v != NULL) {					\
+			if (v->token_type != TT_SINT || v->sint < 0)	\
+				return NULL;				\
+			VAR = v->sint;					\
+		} } while(0)
+	SETPARM(pred.num,	"Predictor");
+	SETPARM(pred.colors,	"Colors");
+	SETPARM(pred.bpc,	"BitsPerComponent");
+	SETPARM(pred.columns,	"Columns");
+	#undef SETPARM
+	if (pred.num == 1)
+		depredict = depred_none;
+	else {
+		if (pred.num >= 10 && pred.num <= 15)
+			depredict = depred_png;
+		else if (pred.num == 2) {
+			/* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
+			if (pred.bpc == 8) {
+				pred.predfun = pp_sub;	/* predict left */
+				depredict = depred_png;
+			} else {
+				// XXX add general TIFF predictor (bpc != 8)
+				fprintf(stderr, "FlateDecode: /Predictor %d "
+				    "not supported for /BitsPerComponent %d\n",
+				    pred.num, pred.bpc);
+				return NULL;
+			}
+		} else {
+			fprintf(stderr, "FlateDecode: /Predictor %d"
+			    " not supported\n", pred.num);
+			return NULL;
+		}
+
+		/* allocate row buffer */
+		if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
+			fprintf(stderr, "FlateDecode: overflow\n");
+			return NULL;
+		}
+		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
+		pred.buf = calloc(1, pred.rowsz);
+		if (pred.buf == NULL)
+			err(1, "FlateDecode");
+	}
+
+	/* set up zlib */
+	// XXX pass our allocator to zlib
+	ret = inflateInit(&strm);
+	if (ret != Z_OK)
+		errx(1, "inflateInit: %s (%d)", strm.msg, ret);
+	buf = malloc(BUFSIZE);
+	if (buf == NULL)
+		err(1, "FlateDecode");
+
+#ifdef ITERATIVE // XXX
+	/* initialize target parser */
+	sp = h_parse_start(p);
+	assert(sp != NULL);
+	pred.sp = sp;
+#endif
+
+	done = 0;
+	strm.avail_in = b.len;
+	strm.next_in = (unsigned char *)b.token;
+	do {
+		strm.avail_out = BUFSIZE;
+		strm.next_out = buf;
+
+		ret = inflate(&strm, Z_NO_FLUSH);
+		if (ret != Z_STREAM_END && ret != Z_OK) {
+			fprintf(stderr, "inflate: %s (%d)\n", strm.msg, ret);
+			break;
+		}
+
+		sz = BUFSIZE - strm.avail_out;
+		done = depredict(&pred, buf, sz);
+	} while (done == 0 && ret == Z_OK);
+
+#ifdef ITERATIVE // XXX
+	res = h_parse_finish(sp);
+		// XXX always return NULL on error?
+#else
+
+	// DEBUG -- will not always work depending on the font encoding used
+//	fprintf (stdout, "FlateDecode:: Inflated string (%lu):\n%.*s\n", pred.nout, (int)pred.nout, pred.out);
+	// XXX DEBUG
+
+	res = h_parse(p, pred.out, pred.nout);
+	free(pred.out);
+#endif
+	inflateEnd(&strm);
+	free(pred.buf);
+	free(buf);
+
+	if (done == -1)
+		return NULL;
+	return res;
+}
+
+
+#if 0
+	// decoded stream in pred.out
+//	FILE *decodef = fopen ("flatecode.out", "w");
+//	fprintf (decodef, "FlateDecode:: Inflated string (%lu)\n:%.*s\n", pred.nout, (int)pred.nout, pred.out);
+	fprintf (stdout, "FlateDecode:: Inflated string (%lu)\n:%.*s\n", pred.nout, (int)pred.nout, pred.out);
+	unsigned char *fdec = pred.out;
+//	char _l;
+	int i;
+	for (i=0; i<(pred.nout/2); ++i)
+	{
+		convert2char(*fdec);
+//		_l = convert2char(*fdec);
+//		fprintf(decodef, " %c-%d ", _l, _l);
+		fdec ++;
+	}
+	res = NULL;
+
+
+#endif
+
+
+
+
+/* LZW helpers */
+
+typedef struct
+{
+	uint8_t *lzw_buf;
+	size_t total_buf_size;
+	size_t write_head;
+	size_t write_tail;
+	uint8_t write_checksum;
+	size_t eof_loc;
+
+	HBytes *input_stream;
+	size_t read_head;
+	size_t read_tail;
+	uint8_t read_checksum;
+} lzwspec;
+
+lzwspec *cur_lzw_spec;
+
+/* used by write_lzw_buffer to get more space for decoding if needed */
+void
+grow_lzw_buffer(size_t amount)
+{
+	uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t));
+	if(ret_buf != NULL)
+	{
+		cur_lzw_spec->total_buf_size += amount;
+		cur_lzw_spec->lzw_buf = ret_buf;
+	}
+	else
+	{
+		fprintf(stderr, "LZWDecode: h_arena_realloc() failed");
+		return;
+	}
+}
+
+lzwspec *
+new_lzw_spec(HBytes *bytes)
+{
+	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
+	lzwspec *ret = malloc(sizeof(lzwspec));
+	memset(ret, 0, sizeof(lzwspec));
+	ret->input_stream = bytes;
+	ret->lzw_buf = malloc(BUFSIZE);
+	ret->total_buf_size = BUFSIZE;
+	return ret;
+}
+
+void
+delete_lzw_spec(lzwspec *spec)
+{
+	free(spec->lzw_buf);
+	free(spec);
+}
+
+void
+bind_lzw_spec(lzwspec *spec)
+{
+	cur_lzw_spec = spec;
+}
+
+
+#include "lzw-lib.h"
+
+/* Buffer writer function for the lzw-ab implementation, with a fixed signature.
+ * Although the type is defined as int, it is expected to write one byte at a time.
+ * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */
+
+void
+write_lzw_buffer(int value)
+{
+	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
+
+	if(!cur_lzw_spec->lzw_buf)
+	{
+		fprintf(stderr, "LZWDecode: lzw_buf is null!");
+		assert(cur_lzw_spec->lzw_buf != NULL);
+	}
+
+	assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size);
+
+	if (value == EOF) {
+        cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value;
+        cur_lzw_spec->eof_loc = cur_lzw_spec->write_head;
+        cur_lzw_spec->write_head++;
+        return;
+    }
+
+	/* We can get away with this cast due to writing single bytes. */
+    cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value;
+
+	/* If you looked at lzw-ab's code, the write head is reset here
+	 * This function uses write_head as the offset of the last written item */
+    if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size)
+    {
+        grow_lzw_buffer(BUFSIZE);
+    }
+
+    cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value;
+}
+
+
+/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec
+ * with bind_lzw_spec() */
+int read_lzw_buffer(void)
+{
+	uint8_t byte_read;
+	int ret_value;
+
+	/* Input data is already waiting in the buffer */
+    if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail)
+        cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len;
+
+    if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail)
+    {
+        byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++];
+        cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read;
+        ret_value = byte_read;
+    }
+    else
+        ret_value = EOF;
+
+    return ret_value;
+}
+
+
+HParseResult *
+LZWDecode(const Dict *parms, HBytes b, HParser *p)
+{
+	struct predictor pred = {1, 1, 8, 1};
+	int (*depredict)(struct predictor *, uint8_t *, size_t);
+	HParseResult *res;
+	int done;
+	int ret;
+	const HParsedToken *v;
+
+	/* set up the predictor (if any) */
+	#define SETPARM(VAR,STR) do {					\
+		v = dictentry(parms, (STR));				\
+		if (v != NULL) {					\
+			if (v->token_type != TT_SINT || v->sint < 0)	\
+				return NULL;				\
+			VAR = v->sint;					\
+		} } while(0)
+	SETPARM(pred.num,	"Predictor");
+	SETPARM(pred.colors,	"Colors");
+	SETPARM(pred.bpc,	"BitsPerComponent");
+	SETPARM(pred.columns,	"Columns");
+	#undef SETPARM
+	if (pred.num == 1)
+		depredict = depred_none;
+	else {
+		if (pred.num >= 10 && pred.num <= 15)
+			depredict = depred_png;
+		else if (pred.num == 2) {
+			/* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
+			if (pred.bpc == 8) {
+				pred.predfun = pp_sub;	/* predict left */
+				depredict = depred_png;
+			} else {
+				// XXX add general TIFF predictor (bpc != 8)
+				fprintf(stderr, "LZWDecode: /Predictor %d "
+				    "not supported for /BitsPerComponent %d\n",
+				    pred.num, pred.bpc);
+				return NULL;
+			}
+		} else {
+			fprintf(stderr, "LZWDecode: /Predictor %d"
+			    " not supported\n", pred.num);
+			return NULL;
+		}
+
+		/* allocate row buffer */
+		if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
+			fprintf(stderr, "LZWDecode: overflow\n");
+			return NULL;
+		}
+		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
+		pred.buf = calloc(1, pred.rowsz);
+		if (pred.buf == NULL)
+			err(1, "LZWDecode");
+	}
+
+	lzwspec *lzw_spec = new_lzw_spec(&b);
+	bind_lzw_spec(lzw_spec);
+
+	ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer);
+	if (ret) {
+		fprintf(stderr, "lzw_decompress: error (%d)\n", ret);
+		assert(!"LZWDecode: failed to decompress\n");
+	}
+	done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
+	assert(!done);	// XXX ITERATIVE
+
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
+	res = h_parse(p, pred.out, pred.nout);
+	free(pred.out);
+
+	bind_lzw_spec(NULL);
+	delete_lzw_spec(lzw_spec);
+
+	return res;
+}
+
+HParseResult *
+RunLengthDecode(const Dict *parms, HBytes b, HParser *p)
+{
+	HParseResult *res;
+
+	res = h_parse(p_rldstring, b.token, b.len);
+	if(!res)
+	{
+		fprintf(stderr, "parse error in RunLengthDecode filter\n");
+		return NULL;
+	}
+
+	assert(res->ast && res->ast->token_type == TT_BYTES);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
+	res = h_parse(p, res->ast->bytes.token, res->ast->bytes.len);
+
+	return res;
+}
+
+/*
+ * Decodes ASCII hexadecimal data into binary data.
+ * parms should be empty, because the filter has no parameters
+ */
+HParseResult *
+ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
+{
+	HParseResult *f_res, *res;
+
+	// XXX debug
+	fprintf(stdout, "ASCIIHexDecode:: bytes=[%.*s]\n", (int)b.len, b.token);
+
+	f_res = h_parse(p_ahexstream, b.token, b.len);
+	if(!f_res)
+	{
+		fprintf(stderr, "parse error in ASCIIHexDecode filter\n");
+		return NULL;
+	}
+
+	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
+	fprintf(stdout, "ASCIIHexDecode::string = [%.*s]\n",
+			(int)f_res->ast->bytes.len, (char*)f_res->ast->bytes.token);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
+	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
+
+	if (res == NULL)
+		res = f_res; // return the decoded stream
+
+	return res;
+}
+
+/*
+ * Decodes ASCII base-85 encoded data and produces binary data.
+ * parms should be empty, because the filter has no parameters
+ */
+HParseResult*
+ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
+{
+	HParseResult *f_res, *res;
+
+	// XXX debug
+	fprintf(stdout, "ASCII85Decode:: bytes=[%.*s]\n", (int)b.len, b.token);
+
+	f_res = h_parse(p_a85string, b.token, b.len);
+	if(!f_res)
+	{
+		fprintf(stderr, "parse error in ASCII85Decode filter\n");
+		return NULL;
+	}
+
+	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
+	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
+
+	if (res == NULL)
+		res = f_res; // return the decoded stream
+
+	return res;
+}
+
+/*
+ * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
+ * and parse the result with 'p'.
+ */
+HParseResult *
+decode_stream(const Dict *d, HBytes b, HParser *p)
+{
+	HParseResult *(*filter)(const Dict *, HBytes, HParser *);
+	const Dict *parms = NULL;
+	const HParsedToken *v;
+
+	v = dictentry(d, "Filter");
+	if (v == NULL)
+		return h_parse(p, b.token, b.len);
+
+#ifdef ITERATIVE // XXX
+	/* compile to a CF backend to enable incremental parsing */
+	if (h_compile(p, PB_LLk, NULL) == -1)
+		errx(1, "stream data parser: LL(1) compile failed");
+#endif
+
+	if (v->token_type != TT_BYTES) {
+		// XXX TT_SEQUENCE would be a filter chain; thatâ€™s not supported, yet.
+		// But it might also be something bogus, in which case we should fail.
+		return NULL;
+	}
+
+	if (bytes_eq(v->bytes, "FlateDecode"))
+		filter = FlateDecode;
+	else if (bytes_eq(v->bytes, "ASCIIHexDecode"))
+		filter = ASCIIHexDecode;
+	else if (bytes_eq(v->bytes, "ASCII85Decode"))
+		filter = ASCII85Decode;
+	else if (bytes_eq(v->bytes, "RunLengthDecode"))
+		filter = RunLengthDecode;
+	else if (bytes_eq(v->bytes, "LZWDecode"))
+		filter = LZWDecode;
+	else
+		return NULL;		/* filter not supported */
+
+	v = dictentry(d, "DecodeParms");
+	if (v && v->token_type == TT_Dict)
+		parms = v->user;
+
+	return filter(parms, b, p);
+}
+
+HParsedToken *
+act_rest(const HParseResult *p, void *env)
+{
+	struct Env *aux = env;
+	size_t offset = H_CAST_UINT(p->ast) / 8;
+
+	return H_MAKE_BYTES(aux->input + offset, aux->sz - offset);
+}
+
+HParser *
+p_rest__m(HAllocator *mm__, struct Env *aux)
+{
+	return h_action__m(mm__, h_tell__m(mm__), act_rest, aux);
+}
+
+/* combine current position with env=(input,sz) into HBytes */
+HParsedToken *
+act_take_bytes(const HParseResult *p, void *env)
+{
+	const HBytes *bs = env;
+	size_t offset = H_CAST_UINT(p->ast) / 8;
+
+	/*
+	 * NB: we must allocate a new HBytes struct here because the old one is
+	 * allocated only temporarily for the lifetime of the continuation
+	 * below.
+	 */
+	// DEBUG
+	fprintf (stdout, "act_take_bytes: Current position (bytes)= %p, len=%ld\n",
+			(void *)bs->token + offset, bs->len);
+	return H_MAKE_BYTES(bs->token + offset, bs->len);
+}
+
+HParser *
+p_take__m(HAllocator *mm__, size_t n, struct Env *aux)
+{
+	HParser *skip, *bytes;
+	HBytes *bs;
+
+	/* dummy struct to hold the pair (input,n) */
+	bs = h_alloc(mm__, sizeof(HBytes));
+	bs->token = aux->input;
+	bs->len = n;
+
+	bytes = h_action__m(mm__, h_tell__m(mm__), act_take_bytes, bs);
+	skip  = h_skip__m(mm__, n * 8);
+
+	return h_left__m(mm__, bytes, skip);
+}
+
+
+// Parser for object streams
+HParser *p_objstm__m(HAllocator *, const Dict *);
+
+// Action for stream continuation
+HParsedToken *act_ks_value(const HParseResult *p, void *u);
+struct streamspec {
+	Dict *dict;		/* stream dictionary */
+	HParser *parser;	/* data parser */
+};
+
+
+
+/*
+ * ********************************************************************
+ * Start Catalog parsing
+ * ********************************************************************
+ */
+/*
+ * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
+ * and parse the result with 'p'.
+ */
+HParseResult *
+decode_contentstream(const Dict *d, HBytes b, HParser *p)
+{
+	HParseResult *(*filter)(const Dict *, HBytes, HParser *);
+	const Dict *parms = NULL;
+	const HParsedToken *v;
+	HParseResult *res = NULL;
+
+
+	/*
+	 *  Check if there is additional information in the dictionary
+	 *  that we should use to process the content stream
+	 *
+	 *  If the data in the stream is encoded, a filter will be specified in
+	 *  the dictionary that must be used to decode the data first
+	 *
+	 *  TODO:: Handle arrays of filters (chained) and their decode parameters
+	 */
+	v = dictentry(d, "Filter"); // look for a filter
+
+	if (v != NULL) { // data is encoded
+
+
+		if (v->token_type != TT_BYTES) {
+			// XXX TT_SEQUENCE would be a filter chain; thatâ€™s not supported, yet.
+			// But it might also be something bogus, in which case we should fail.
+			return NULL;
+		}
+
+		if (bytes_eq(v->bytes, "FlateDecode"))
+			filter = FlateDecode;
+		else if (bytes_eq(v->bytes, "ASCIIHexDecode"))
+			filter = ASCIIHexDecode;
+		else if (bytes_eq(v->bytes, "ASCII85Decode"))
+			filter = ASCII85Decode;
+		else if (bytes_eq(v->bytes, "RunLengthDecode"))
+			filter = RunLengthDecode;
+		else if (bytes_eq(v->bytes, "LZWDecode"))
+			filter = LZWDecode;
+		else {		/* filter not supported */
+			fprintf(stderr, "decode_stream:: Unsupported Filter [%.*s]\n",
+					(int)v->bytes.len, v->bytes.token);
+			return NULL; /* Treat the stream as a byte array */
+		}
+		/* Check for parameters for the filter */
+		v = dictentry(d, "DecodeParms");
+		if (v && v->token_type == TT_Dict)
+			parms = v->user;
+
+		res = filter(parms, b, p);
+
+		/* Debug */
+		if (res){
+			fprintf(stdout, "decode_contentstream: parsed token type is = %u\n", res->ast->token_type);
+		}
+	} /* The dictionary provided direction for processing the stream */
+
+	/*
+	 * It is possible that we should always process the stream as a content stream
+	 * But not yet sure that covers all case.
+	 */
+	else { // content stream is not encoded
+		res = h_parse(p, b.token, b.len);
+		if (res == NULL) { // Probably does not need to be flagged
+			fprintf(stderr, "decode_contentstream::Text String parse failed!!\n");
+		}
+	}
+
+	/*
+	 * There are other parameters that can be passed in the dictionary
+	 * They are not being handled currently
+	 */
+//	const int numOptKeys = 3;
+//	char *optionalKeys[3] = { "F", "FDecodeParms", "DL" };
+//	for (int i=0; i<numOptKeys; i++) {
+//		v = dictentry(d, optionalKeys[i]);
+//		if (v) fprintf(stderr, "decode_contentstream:: Unsupported Specifications [%s\n]", optionalKeys[i]);
+//	}
+	return res;
+}
+
+
+HParsedToken *
+act_kcontentstream_value(const HParseResult *p, void *u)
+{
+	struct streamspec *spec = u;
+	HBytes bytes = H_CAST_BYTES(p->ast);
+	HParseResult *res;
+
+	/* decode and parse the stream data */
+	res = decode_contentstream(spec->dict, bytes, spec->parser);
+//	if (!res) {
+//		res = (HParseResult *)p;
+//	}
+
+	if (res) return (HParsedToken *)res->ast;
+	else     return (HParsedToken *)p->ast;
+}
+
+
+
+
+
+const HParsedToken *
+parse_item(struct Env *aux, size_t nr, size_t gen, size_t offset, HParser *p)
+{
+	HParseResult *res;
+	size_t def_nr, def_gen;
+
+	if (offset >= aux->sz) {
+		fprintf(stderr, "%s: position %zu (%#zx) for object %lu %lu is "
+		    "out of bounds\n", aux->infile, offset, offset, nr, gen);
+		return NULL;
+	}
+
+	if (p == NULL) {
+		fprintf(stderr, "parse_item: Attempt to use a NULL parser!\n");
+		return NULL;
+	}
+	fprintf(stdout, "\nparse_item:: Parsing reference = %lu, %lu, at offset = %zu (%#zx)\n",
+			nr, gen, offset, offset);
+	HParser *pItem = h_right(h_seek(offset * 8, SEEK_SET), p);
+	res = h_parse(pItem, aux->input, aux->sz);
+	if (res == NULL) {
 		fprintf(stderr, "%s: error parsing object %zu %zu at position "
 		    "%zu (%#zx)\n", aux->infile, nr, gen, offset, offset);
 		return NULL;
 	}
 	assert(res->ast != NULL && res->ast->token_type == TT_SEQUENCE);
-	/* res->ast = ((nr gen) obj) */
 
+
+
+//	size_t ntok =res->ast->seq->used;
+//	assert(ntok==2 || ntok==3);
 	def_nr = H_INDEX_UINT(res->ast, 0, 0);
 	def_gen = H_INDEX_UINT(res->ast, 0, 1);
 	if (def_nr != nr || def_gen != gen) {
 		fprintf(stderr, "%s: object ID mismatch at position %zu "
-		    "(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile,
-		    offset, offset, nr, gen, def_nr, def_gen);
+			"(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile,
+			offset, offset, nr, gen, def_nr, def_gen);
 		return NULL;
 	}
 
@@ -1428,10 +3713,12 @@ parse_obj(struct Env *aux, size_t nr, size_t gen, size_t offset)
 }
 
 const HParsedToken *
-parse_objstm_obj(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
+parse_objstm_item(struct Env *aux, size_t nr, size_t stm_nr, size_t idx, size_t *offset, HParser *p)
 {
 	XREntry *ent;
-	const HParsedToken *stm;
+	const HParsedToken *stm = NULL;
+
+	*offset = 0; // initialize the offset
 
 	/*
 	 * acquire the stream object
@@ -1448,8 +3735,15 @@ parse_objstm_obj(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
 	case XR_INUSE:
 		if (ent->n.gen != 0)
 			return NULL;	/* stream replaced */
-		if (ent->obj == NULL)
-			ent->obj = parse_obj(aux, stm_nr, 0, ent->n.offs);
+		if (ent->obj == NULL) {
+			/*
+			 * decode the stream and find the target object in it
+			 */
+			fprintf(stdout, "\nparse_objstm_item:: Parsing object stream id = %lu, %d, at offset = %zu (%#zx)\n",
+					stm_nr, 0, ent->n.offs, ent->n.offs);
+			ent->obj = parse_item(aux, stm_nr, 0, ent->n.offs, p);
+			*offset = ent->n.offs;
+		}
 		break;
 	case XR_OBJSTM:
 		return NULL;		/* invalid: nested streams */
@@ -1460,32 +3754,61 @@ parse_objstm_obj(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
 		    "%zu (%#zx)\n", aux->infile, ent->n.offs, ent->n.offs);
 		return NULL;
 	}
+	else {
+		/*
+		 * decode the stream and find the target object in it
+		 */
+		// XXX debug
+		fprintf(stdout, "\nparse_objstm_item:: Type of object looked up = %u at offset = %zu (%#zx)\n",
+				stm->token_type, ent->n.offs, ent->n.offs);
+		h_pprintln(stdout, ent->obj);
+		// XXX debug
+		// get the object at index
+
+		stm = H_INDEX_TOKEN(stm, 1);  // the first field is the stream dictionary
+		if (stm->token_type == TT_HParseResult){
+			const HParseResult *res = H_CAST(HParseResult, stm);
+			stm = res->ast;
+			// Now get the index if the index is valid
+			const Objstm *ostm = H_CAST(Objstm, stm);
+			if ( (idx>=0) && (idx < ostm->numObjs)) {
+				stm = ostm->tok[idx].obj;
+			}
+			else
+				return NULL;
+		}
+	}
 
-	/*
-	 * decode the stream and find the target object in it
-	 */
-	return NULL;	// XXX
+	fprintf(stdout, "\nparse_objstm_item:: Returning token of type = %u, \n", stm->token_type);
+	h_pprintln(stdout, stm);	// XXX debug
+	return stm;
 }
 
+
 const HParsedToken *
-resolve(struct Env *aux, const HParsedToken *v)
+resolve_item(struct Env *aux, const HParsedToken *v, size_t *offset, HParser *p)
 {
 	XREntry *ent = NULL;
 	Ref *r;
 
+
 	/* direct objects pass through */
 	if (v == NULL || v->token_type != TT_Ref)
 		return v;
 
 	/* we are looking at an indirect reference */
-	r = v->user;
+	*offset = 0; // initialize the offset
+	r       = v->user;
 
 	/* find the xref entry for this reference */
+	fprintf(stdout, "\nresolve_item:: Looking up xref = %lu, %lu\n", r->nr, r->gen);
 	ent = lookup_xref(aux, r->nr, r->gen);
 	if (ent == NULL)
-		return NULL;			/* obj not found */
-	if (ent->obj != NULL)
-		return resolve(aux, ent->obj);
+		return NULL;			/* obj not found -- xref error */
+	if (ent->obj != NULL) {
+		fprintf(stdout, "\nresolve_item:: ent->obj->token_type = %u\n", ent->obj->token_type);
+		return resolve_item(aux, ent->obj, offset, p);
+	}
 
 	/* parse the object and memoize */
 	ent->obj = v;				/* break loops */
@@ -1496,536 +3819,1120 @@ resolve(struct Env *aux, const HParsedToken *v)
 	case XR_INUSE:
 		if (ent->n.gen != r->gen)
 			return NULL;		/* obj nr reused */
-		ent->obj = parse_obj(aux, r->nr, r->gen, ent->n.offs);
+		fprintf(stdout, "resolve_item:: parse object at offset = %lu\n", ent->n.offs);
+		ent->obj = parse_item(aux, r->nr, r->gen, ent->n.offs, p);
+		*offset = ent->n.offs;
 		break;
 	case XR_OBJSTM:
 		if (r->gen != 0)
 			return NULL;		/* invalid entry! */
-		ent->obj = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx);
+		fprintf(stdout, "resolve_item:: parse object stream - oid = %lu, stm_oid = %lu, stm_idx = %lu\n",
+				r->nr, ent->o.stm, ent->o.idx);
+		ent->obj = parse_objstm_item(aux, r->nr, ent->o.stm, ent->o.idx, offset, p);
 		break;
 	}
 
-	return resolve(aux, ent->obj);
+	// DEBUG
+	if (ent->obj)
+		fprintf (stdout, "resolve_item: Recursive call to resolve - token_type = %u  at offset = %zu (%#zx)\n",
+				ent->obj->token_type, *offset, *offset);
+	return resolve_item(aux, ent->obj, offset, p);
 }
 
 
-/*
- * stream object handling incl. filters and cross-reference streams
- */
-
-#include <limits.h>	/* INT_MAX */
-#include <stdlib.h>	/* abs() */
-#include <zlib.h>
-#include <err.h>
 
-struct predictor {
-	/* parameters */
-	int num;	/* default: 1 (no prediction) */
-	int colors;	/* default: 1 */
-	int bpc;	/* bits per component; default: 8 */
-	int columns;	/* default: 1 */
+bool
+is_parent(Dict *dict, const HParsedToken *expected)
+{
+	const HParsedToken *item;
+	Ref *ref, *pRef;
+	bool res=false;
 
-	int rowsz;	/* bytes per row = ceil(colors * bpc * columns / 8) */
+	item = dictentry(dict, "Parent");
+	if (item == NULL) {
+		res = item == expected;
+	}
+	else if (item->token_type == TT_Ref) {
+		ref = H_CAST(Ref, item);
+		if (expected == NULL) {
+			fprintf(stderr, "is_parent: Inconsistent parent field=<%zu, %zu>, expected = NULL!\n",
+						ref->nr, ref->gen);
+		}
+		else {
+			assert(expected->token_type == TT_Ref);
+			pRef = (Ref *)expected->user;
+			res = (ref->nr == pRef->nr) && (ref->gen == pRef->gen);
+		}
+	}
+	else {
+		fprintf(stderr, "is_parent: Unexpected token type = %u!\n", item->token_type);
+	}
 
-	/* state */
-	HSuspendedParser *sp;
-	uint8_t	(*predfun)(int, int, int);
-	uint8_t	*buf;		/* previous row of input */
-	uint8_t	 c;		/* byte 'c' (upper left) */
-	int	 x;		/* current position */
-#ifndef ITERATIVE // XXX
-	uint8_t	*out;
-	size_t	 nout;
-#endif
-};
+	return res;
+}
 
-int
-depred_none(struct predictor *pred, uint8_t *inp, size_t sz)
+bool
+has_value(Dict *dict, char *fn, char *value)
 {
-#ifdef ITERATIVE // XXX
-	return h_parse_chunk(pred->sp, inp, sz);
-#else
-	pred->out = realloc(pred->out, pred->nout + sz);
-	assert(pred->out != NULL);
-	memcpy(pred->out + pred->nout, inp, sz);
-	pred->nout += sz;
-	return false;
-#endif
+	const HParsedToken *item;
+	bool res=false;
+
+	item = dictentry(dict, fn);
+	if ( (item != NULL) && (item->token_type == TT_BYTES) &&
+			(bytes_eq(item->bytes, value)) ) {
+		res = true;
+	}
+
+
+	// DEBUG
+	if (item == NULL) {
+		fprintf(stderr, "has_value: No such field (%s) in dictionary!\n", fn);
+	}
+	else if (item->token_type != TT_BYTES)  {
+		fprintf(stderr, "has_value: Field:%s has token type %u for value!\n",
+				fn, item->token_type);
+	}
+	// DEBUG
+
+
+	return res;
 }
 
-uint8_t pp_none(int a, int b, int c)	{ return 0; }
-uint8_t pp_sub(int a, int b, int c)	{ return a; }
-uint8_t pp_up(int a, int b, int c)	{ return b; }
-uint8_t pp_avg(int a, int b, int c)	{ return (a + b) / 2; }
 
-uint8_t
-pp_paeth(int a, int b, int c)
+
+Dict *
+get_fontdict(const HParsedToken *obj, struct Env* aux)
 {
-	int p	= a + b - c;
-	int pa	= abs(p - a);
-	int pb	= abs(p - b);
-	int pc	= abs(p - c);
+	const HParsedToken *item;
+	Ref                *ref;
+	Dict *dict = NULL;
+	Objstm *stm = NULL;
+
+
+	assert(obj->token_type == TT_Ref);
+	ref = H_CAST(Ref, obj);
+	fprintf(stdout, "\n\nget_fontdict: Ref = [%lu, %lu]\n\n", ref->nr, ref->gen);
+	item = resolve(aux, obj);
+	if ( (item)  && (item->token_type == TT_Dict) ) {
+		dict = H_CAST(Dict, item);
+		if (! has_value(dict, "Type", "Font"))
+			dict = NULL;
+	}
+	else if ( (item)  && (item->token_type == TT_Objstm) ) {
+		stm = H_CAST(Objstm, item);
+		for (int i=0; i<stm->numObjs; i++) {
+			if ( (stm->tok[i].oid.nr == ref->nr) &&
+					(stm->tok[i].oid.gen == ref->gen) ) {
+				if (stm->tok[i].obj->token_type != TT_Dict) {
+					fprintf(stdout, "\nget_fontdict:Expected Dictionary, Got a token of type=%u\n",
+							stm->tok[i].obj->token_type);
+					dict = NULL;
+				}
+				else {
+					dict = H_CAST(Dict, stm->tok[i].obj);
+					if (! has_value(dict, "Type", "Font"))
+						dict = NULL;
+				}
+				break;
+			}
+		}
+	}
 
-	if (pa <= pb && pa <= pc) return a;
-	if (pb <= pc) return b;
-	return c;
+
+	return dict;
 }
 
-int
-depred_png(struct predictor *pred, uint8_t *inp, size_t sz)
+const HParsedToken *
+get_dictoftype(
+		const HParsedToken *obj,
+		const HParsedToken *pRefT,
+		char               *value,
+		struct Env         *aux)
 {
-	/* NB:
-	 * at this point, the specific value of pred->num no longer matters.
-	 * the PNG predictor tags each row with the function used for that row
-	 * and decoding always follows the tag.
-	 */
-	static uint8_t (*predfuns[])(int, int, int) =
-	    {pp_none, pp_sub, pp_up, pp_avg, pp_paeth};
-
-	bool done = false;
-	int bpp;
+	Dict *dict = NULL;
+	const HParsedToken *tok;
+	Objstm *stm = NULL;
+
+	if (obj->token_type == TT_Dict) {
+		dict = H_CAST(Dict, obj);
+		if (is_parent(dict, pRefT) && has_value(dict, "Type", value))
+			tok = obj;
+		else
+			dict = NULL;
+	}
+	else if (obj->token_type == TT_Objstm) {
+		stm = H_CAST(Objstm, obj);
+		for (int i=0; i<stm->numObjs; i++) {
+			h_pprintln(stdout, stm->tok[i].obj);
+			size_t ioff = 0;
+			const HParsedToken *sitem = resolve_item(aux, stm->tok[i].obj, &ioff, p_objdef);
+			if ((sitem) && (sitem->token_type == TT_Dict)) {
+				dict = H_CAST(Dict, sitem);
+				if (is_parent(dict, pRefT) && has_value(dict, "Type", value)) {
+					tok = sitem;
+					break;
+				}
+				else
+					dict = NULL;
+			}
+		}
+	}
+	else {
+		fprintf(stdout, "get_dictoftype: token type not yet handled: %u\n",
+				obj->token_type);
+		fprintf(stdout, "get_dictoftype: Possibly needed for CMAPS\n");
+		h_pprintln(stdout, obj);
+	}
 
-	bpp = (pred->colors * pred->bpc + 7) / 8;	/* bytes per pixel */
-	assert (bpp > 0);
+	if (dict == NULL)
+		tok = NULL;
 
-	for (size_t i=0; i < sz && !done; i++) {
-		int x = pred->x;
-		int a = x<bpp ? 0 : pred->buf[x-bpp];	/* left */
-		int b = pred->buf[x];			/* up */
-		int c = pred->c;			/* up left */
+	// DEBUG
+	if (pRefT) {
+		fprintf(stdout, "\nget_dictoftype: Parent = ");
+		pp_ref(stdout, pRefT, 0, 0);
+	}
+	else
+		fprintf(stdout, "get_dictoftype: Parent = NULL");
+	if (tok) {
+		fprintf(stdout, "\nget_dictoftype: Type = %s\n", value);
+		pp_dict(stdout, tok, 0, 0);
+	}
+	else {
+		fprintf(stdout, "\nget_dictoftype: Null dictionary of Type = %s\n", value);
+	}
+	return tok;
+}
 
-		if (pred->predfun == NULL) {	/* we are before a new row */
-			/* select predictor function */
-			if (inp[i] > 4) {
-				fprintf(stderr, "unknown PNG predictor %d\n",
-				    (int)inp[i]);
-				return -1;
-			}
-			pred->predfun = predfuns[inp[i]];
 
-			/* consume the tag */
-			if (++i == sz)
-				break;
-		}
+/*
+ * This continuation takes the content stream, decodes it if necessary and returns
+ * the byte stream for concatenation with other byte streams priot to test extraction.
+ * It is very similar to kstream in approach.
+ */
+HParser *
+kbyteostream(HAllocator *mm__, const HParsedToken *x, void *env)
+{
 
-		/* undo the prediction and save the decoded value */
-		pred->buf[x] = inp[i] + pred->predfun(a, b, c);
+	struct Env *aux = env;
+	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
+	Dict *dict = H_CAST(Dict, dict_t);
+	const HParsedToken *v = NULL;
+	HParser *bytes_p, *dict_p, *value_p;
+	struct streamspec *spec;
+	size_t sz=0, nOffset=0;
 
-		/* advance to the right */
-		pred->c = b;
-		pred->x = ++x;
+	fprintf(stdout, "kbyteostream: dictionary\n");
+	pp_dict(stdout, dict_t, 5, 0);
 
-		/* when row complete, pass it to parser and start a new row */
-		if (x == pred->rowsz) {
-#ifdef ITERATIVE // XXX
-			done = h_parse_chunk(pred->sp, pred->buf, pred->rowsz);
-#else
-			pred->out = realloc(pred->out, pred->nout + pred->rowsz);
-			assert(pred->out != NULL);
-			memcpy(pred->out + pred->nout, pred->buf, pred->rowsz);
-			pred->nout += pred->rowsz;
-#endif
-			pred->c = pred->x = 0;
-			if (pred->num != 2)	/* support for 8-bpc TIFF */
-				pred->predfun = NULL;
-		}
+	/* look for the Length entry -- could be a reference */
+	v = dictentry(dict, "Length");
+	v = resolve_item(aux, v, &nOffset, p_objdef);		/* resolve indirect references */
+	if (v == NULL || v->token_type != TT_SINT || v->sint < 0) {
+		if (v == NULL)
+			fprintf(stderr, "kbyteostream: stream /Length missing\n");
+		else if (v -> token_type != TT_SINT)
+			fprintf(stderr, "kbyteostream: stream /Length not an integer\n");
+		else if (v < 0)
+			fprintf(stderr, "kbyteostream: stream /Length negative\n");
+
+		//h_pprintln(stderr, p);	// XXX debug
+		return p_fail;
 	}
 
-	return done;
+	sz = (size_t)v->sint;
+
+	dict_p	= p_return__m(mm__, dict_t);
+	bytes_p = p_take__m(mm__, sz, aux);
+
+	spec = h_alloc(mm__, sizeof(struct streamspec));
+	spec->dict = dict;
+	spec->parser = p_bytestream;
+
+	value_p = h_action__m(mm__, bytes_p, act_ks_value, spec);
+
+	return h_sequence__m(mm__, dict_p, value_p, NULL);
+
 }
 
-HParseResult *
-FlateDecode(const Dict *parms, HBytes b, HParser *p)
+
+
+
+
+/*
+ * This continuation takes the content stream and processes it for test extraction.
+ * It is very similar to kstream in approach. It decodes and extracts the stream contents
+ * and
+ * It does not consume the string and returns the token as the output.
+ *
+ * x = (txtobj ...)
+ */
+HParser *
+kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env)
 {
-	size_t const BUFSIZE = 8 * 1024;
-	uint8_t *buf;
-#ifdef ITERATIVE // XXX
-	HSuspendedParser *sp;
-#endif
-	HParseResult *res;
-	const HParsedToken *v;
-	size_t sz;
-	int done;
-	z_stream strm = {0};
-	int ret;
-	struct predictor pred = {1, 1, 8, 1};
-	int (*depredict)(struct predictor *, uint8_t *, size_t);
 
-	/* set up the predictor (if any) */
-	#define SETPARM(VAR,STR) do {					\
-		v = dictentry(parms, (STR));				\
-		if (v != NULL) {					\
-			if (v->token_type != TT_SINT || v->sint < 0)	\
-				return NULL;				\
-			VAR = v->sint;					\
-		} } while(0)
-	SETPARM(pred.num,	"Predictor");
-	SETPARM(pred.colors,	"Colors");
-	SETPARM(pred.bpc,	"BitsPerComponent");
-	SETPARM(pred.columns,	"Columns");
-	#undef SETPARM
-	if (pred.num == 1)
-		depredict = depred_none;
+	struct Env *aux = env;
+	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
+	Dict *dict = H_CAST(Dict, dict_t);
+	const HParsedToken *v = NULL;
+	HParser *bytes_p, *dict_p, *value_p;
+	struct streamspec *spec;
+	size_t sz=0, nOffset=0;
+
+	fprintf(stdout, "kcontentstream: dictionary\n");
+	pp_dict(stdout, dict_t, 5, 0);
+
+	/* look for the Length entry -- could be a reference */
+	v = dictentry(dict, "Length");
+	v = resolve_item(aux, v, &nOffset, p_objdef);		/* resolve indirect references */
+	if (v == NULL || v->token_type != TT_SINT || v->sint < 0) {
+		if (v == NULL)
+			fprintf(stderr, "kcontentstream: stream /Length missing\n");
+		else if (v -> token_type != TT_SINT)
+			fprintf(stderr, "kcontentstream: stream /Length not an integer\n");
+		else if (v < 0)
+			fprintf(stderr, "kcontentstream: stream /Length negative\n");
+
+		//h_pprintln(stderr, p);	// XXX debug
+		return p_fail;
+	}
+
+	sz = (size_t)v->sint;
+
+	dict_p	= p_return__m(mm__, dict_t);
+	bytes_p = p_take__m(mm__, sz, aux);
+
+	spec = h_alloc(mm__, sizeof(struct streamspec));
+	spec->dict = dict;
+
+	v = dictentry(dict, "Type");
+	if (v == NULL)	// XXX -> custom type
+		spec->parser = p_textstream;
+	else if ( (v->token_type == TT_BYTES) && bytes_eq(v->bytes, "ObjStm") )
+		spec->parser = p_objstm__m(mm__, dict);
 	else {
-		if (pred.num >= 10 && pred.num <= 15)
-			depredict = depred_png;
-		else if (pred.num == 2) {
-			/* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
-			if (pred.bpc == 8) {
-				pred.predfun = pp_sub;	/* predict left */
-				depredict = depred_png;
-			} else {
-				// XXX add general TIFF predictor (bpc != 8)
-				fprintf(stderr, "FlateDecode: /Predictor %d "
-				    "not supported for /BitsPerComponent %d\n",
-				    pred.num, pred.bpc);
-				return NULL;
+		fprintf(stdout, "kcontentstream: Not a text or object stream!\n");
+		return p_fail;
+	}
+
+	value_p = h_action__m(mm__, bytes_p, act_kcontentstream_value, spec);
+
+	return h_sequence__m(mm__, dict_p, value_p, NULL);
+
+}
+
+const
+HParsedToken *create_strmdict(HArena *arena, size_t len)
+{
+	uint8_t            *buf = NULL;
+	const HParsedToken *tok = NULL;
+	const HParseResult *res = NULL;
+	const int bufSz = 48; // supports more than gigabytes of stream length
+
+	buf = (uint8_t*) h_arena_malloc(arena, bufSz);
+	assert(buf);
+	sprintf((char*)buf, "<< /Length %ld >>", len);
+	res = h_parse(p_dict, buf, strlen((char*)buf));
+	assert (res && res->ast);
+	tok = res->ast;
+	return tok;
+}
+
+Fontinfo_T *
+getFontinfo(const Dict *fontdict, char *name, struct Env *aux)
+{
+
+	assert (fontdict);
+	Fontinfo_T *fontinfo = h_arena_malloc(fontdict->arena, sizeof(Fontinfo_T));
+	fontinfo->name            = NULL;
+	fontinfo->type            = NULL;
+	fontinfo->basefont        = NULL;
+	fontinfo->encoding        = NULL;
+	fontinfo->descriptor      = NULL;
+	fontinfo->toUnicode       = NULL;
+	fontinfo->descendantFonts = NULL;
+
+	assert(fontinfo);
+	const HParsedToken *item = dictentry(fontdict, "Name");
+	if (item) {
+		assert(item->token_type == TT_BYTES);
+		if (bytes_eq(item->bytes, name)) {
+			fontinfo->name = h_arena_malloc(fontdict->arena, sizeof(char)*(item->bytes.len+1));
+			memcpy(fontinfo->name, (char *)item->bytes.token, item->bytes.len);
+			fontinfo->name[item->bytes.len] = '\0';
+			fprintf(stdout, "getFontinfo: Subtype = %s\n", fontinfo->type);
+		}
+	}
+	item = dictentry(fontdict, "Subtype");
+	if (item) {
+		assert (item->token_type == TT_BYTES);
+		fontinfo->type = h_arena_malloc(fontdict->arena, sizeof(char)*(item->bytes.len+1));
+		memcpy(fontinfo->type, (char *)item->bytes.token, item->bytes.len);
+		fontinfo->type[item->bytes.len] = '\0';
+		fprintf(stdout, "getFontinfo: Subtype = %s\n", fontinfo->type);
+	}
+	item = dictentry(fontdict, "BaseFont");
+	if (item) {
+		assert (item->token_type == TT_BYTES);
+		fontinfo->basefont = h_arena_malloc(fontdict->arena, sizeof(char)*(item->bytes.len+1));
+		memcpy(fontinfo->basefont, (char *)item->bytes.token, item->bytes.len);
+		fontinfo->basefont[item->bytes.len] = '\0';
+		fprintf(stdout, "getFontinfo: Basefont = %s\n", fontinfo->basefont);
+	}
+	size_t offset; // This is available if needed
+	item = dictentry(fontdict, "Encoding");
+	if (item) {
+		// dereference it if it is a reference
+		offset = 0;
+		item = resolve_item(aux, item, &offset, p_objdef);
+		if (item) {  // TODO: Failure ==> xref error -- Figure out how to handle
+			if (item->token_type == TT_BYTES) {
+				fontinfo->encoding = h_arena_malloc(fontdict->arena, sizeof(char)*(item->bytes.len+1));
+				memcpy(fontinfo->encoding, (char *)item->bytes.token, item->bytes.len);
+				fontinfo->encoding[item->bytes.len] = '\0';
+				fprintf(stdout, "getFontinfo: encoding = %s at offset %zu (%#zx)\n",
+						fontinfo->encoding, offset, offset);
+			}
+			else if (item->token_type == TT_Dict)
+			{
+				pp_dict(stdout, item, 0, 0);
+
+				const Dict *encodingDict = H_CAST(Dict, item);
+				item = dictentry(encodingDict, "BaseEncoding");
+				if (item) {
+					fontinfo->encoding = h_arena_malloc(fontdict->arena, sizeof(char)*(item->bytes.len+1));
+					memcpy(fontinfo->encoding, (char *)item->bytes.token, item->bytes.len);
+					fontinfo->encoding[item->bytes.len] = '\0';
+					fprintf(stdout, "getFontinfo: encoding = %s\n", fontinfo->encoding);
+				}
+			}
+			else {
+				fprintf(stdout, "\nUnexpected token type in parsing font -Encoding- attribute -"
+						"token_type = %u\n", item->token_type);
 			}
-		} else {
-			fprintf(stderr, "FlateDecode: /Predictor %d"
-			    " not supported\n", pred.num);
-			return NULL;
 		}
+	}
 
-		/* allocate row buffer */
-		if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
-			fprintf(stderr, "FlateDecode: overflow\n");
-			return NULL;
+	item = dictentry(fontdict, "FontDescriptor");
+	if (item) {
+		// dereference the reference
+		offset = 0;
+		item = resolve_item(aux, item, &offset, p_objdef);
+		if (item) { // TODO: Failure ==> xref error -- Figure out how to handle
+			fprintf(stdout, "getFontinfo: FontDescriptor item description:\n");
+			h_pprintln(stdout, item);
+			item = get_dictoftype(item, NULL, "FontDescriptor", aux);
+			if (item) {
+				fontinfo->descriptor = item;
+				fprintf(stdout, "getFontinfo: FontDescriptor at offset %zu (%#zx):\n",
+						offset, offset);
+				pp_dict(stdout, item, 0, 0);
+			}
 		}
-		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
-		pred.buf = calloc(1, pred.rowsz);
-		if (pred.buf == NULL)
-			err(1, "FlateDecode");
 	}
 
-	/* set up zlib */
-	// XXX pass our allocator to zlib
-	ret = inflateInit(&strm);
-	if (ret != Z_OK)
-		errx(1, "inflateInit: %s (%d)", strm.msg, ret);
-	buf = malloc(BUFSIZE);
-	if (buf == NULL)
-		err(1, "FlateDecode");
+	item = dictentry(fontdict, "ToUnicode");
+	if (item) {
+		// dereference the reference
+		offset = 0;
+		item = resolve_item(aux, item, &offset, p_objdef);
+		if (item) { // TODO: Failure ==> xref error -- Figure out how to handle
+			fprintf(stdout, "getFontinfo: toUnicode item description:\n");
+			h_pprintln(stdout, item);
+			item = get_dictoftype(item, NULL, "ToUnicode", aux);
+			if (item) {
+				fontinfo->toUnicode = item;
+				fprintf(stdout, "getFontinfo: toUnicode at offset %zu (%#zx):\n",
+						offset, offset);
+				pp_dict(stdout, item, 0, 0);
+			}
+		}
+	}
 
-#ifdef ITERATIVE // XXX
-	/* initialize target parser */
-	sp = h_parse_start(p);
-	assert(sp != NULL);
-	pred.sp = sp;
-#endif
+	item = dictentry(fontdict, "DescendantFonts");
+	if (item) {
+		// dereference the reference
+		offset = 0;
+		item = resolve_item(aux, item, &offset, p_objdef);
+		if (item) { // TODO: Failure ==> xref error -- Figure out how to handle
+			fprintf(stdout, "getFontinfo: descendantFonts item description:\n");
+			h_pprintln(stdout, item);
+			item = get_dictoftype(item, NULL, "DescendantFonts", aux);
+			if (item) {
+				fontinfo->descendantFonts = item;
+				fprintf(stdout, "getFontinfo: descendantFonts at offset %zu (%#zx):\n",
+						offset, offset);
+				pp_dict(stdout, item, 0, 0);
+			}
+		}
+		else { // xref error
+			goto end;
+		}
+	}
 
-	done = 0;
-	strm.avail_in = b.len;
-	strm.next_in = (unsigned char *)b.token;
-	do {
-		strm.avail_out = BUFSIZE;
-		strm.next_out = buf;
+	end:
+	return fontinfo;
+}
 
-		ret = inflate(&strm, Z_NO_FLUSH);
-		if (ret != Z_STREAM_END && ret != Z_OK) {
-			fprintf(stderr, "inflate: %s (%d)\n", strm.msg, ret);
-			break;
+Fontinfo_T *
+lookup_font(TextState_T *state, struct Env *aux)
+{
+	const HParsedToken *item = NULL;
+	Dict               *fontlist; // font list dictionary in page
+	Fontinfo_T         *fontinfo = NULL;
+
+	PtNode_T *page   = state->page;
+	assert ( (page->type == PG_NODE) || (page->type == XO_NODE) );
+	struct TextEntry_S *fentry = state->font;
+	if (fentry) {
+		assert ( fentry->type == TS_Tf);
+		struct fontref *fr = &fentry->fref;
+		char      *fn = fr->fn;
+		Dict       *dict = NULL;
+		if (page->pgRsrc  && (page->pgRsrc->numFonts > 0)) {
+			fontlist = H_CAST(Dict, page->pgRsrc->fonts);
+			assert(page->pgRsrc->numFonts == fontlist->used);
+			for (int i=0; i< page->pgRsrc->numFonts; i++) {
+				item = dictentry(fontlist, fn);  // look for the font name in the dictionary */
+				if (item) {
+					dict = get_fontdict(item, aux);
+					if (dict) {
+						fontinfo = getFontinfo(dict, fr->fn, aux);
+						fprintf(stdout, "\n\nlookup_font: fontinfo = %p\n\n", (void*)fontinfo);
+					}
+				}
+
+			}
+		}
+		else { // inherit
+			// DEBUG
+			fprintf(stdout, "\n\nlookup_font: Font Resource not found for FontState:\n");
+			pp_fontstate(stdout, state);
+			fprintf(stdout, "\n\nlookup_font: Inheritance not yet supported!\n\n");
 		}
+	}
+	return (fontinfo);
+}
 
-		sz = BUFSIZE - strm.avail_out;
-		done = depredict(&pred, buf, sz);
-	} while (done == 0 && ret == Z_OK);
+void parse_fonts(const HParsedToken *dict_t, RsrcDict_T *pgRsrc, struct Env *aux)
+{
+	Dict               *fontdict = H_CAST(Dict, dict_t);
+	const HParsedToken *item = NULL;
+	size_t              offset=0;
+
+	item = dictentry(fontdict, "Font");
+	item = resolve_item(aux, item, &offset, p_objdef);
+	if (item) { // TODO: Failure ==> xref error -- Figure out how to handle
+		assert(item->token_type == TT_Dict);
+		Dict *fontlist = H_CAST(Dict, item);
+		fprintf(stdout, "parse_fonts: Num fonts used in page = %lu \n", fontlist->used);
+		pp_dict(stdout, item, 0, 0);
+		if (pgRsrc->fonts) {
+			fprintf(stderr, "\n\nparse_fonts: Attempt to add fonts -- Supported??\n\n");
+		}
+		else {
+			pgRsrc->fonts = item;
+			pgRsrc->numFonts = fontlist->used;
+			fprintf(stdout, "\n\nparse_fonts: Number of fonts used = %lu\n\n", pgRsrc->numFonts);
+		}
+	}
+}
 
-#ifdef ITERATIVE // XXX
-	res = h_parse_finish(sp);
-		// XXX always return NULL on error?
-#else
-	res = h_parse(p, pred.out, pred.nout);
-	free(pred.out);
-#endif
-	inflateEnd(&strm);
-	free(pred.buf);
-	free(buf);
+void parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux);
 
-	if (done == -1)
-		return NULL;
-	return res;
+
+XoNode_T*
+create_XoNode(HArena *arena, Catalog_T *catalog)
+{
+	XoNode_T *node = h_arena_malloc(arena, sizeof(XoNode_T));
+	node->name = NULL;
+	node->node = NULL;
+	node->next = NULL;
+	if (catalog->xoHead == NULL)
+		catalog->xoHead = node;
+	if (catalog->xoTail == NULL)
+		catalog->xoTail = node;
+	else {
+		catalog->xoTail->next = node;
+		catalog->xoTail       = node;
+	}
+	catalog->xoCount += 1;
+
+	return node;
 }
 
-/* LZW helpers */
 
-typedef struct
+// XObject resources can be recursively specified
+void parse_xobject(
+		const HParsedToken *dict_t,
+		PtNode_T           *parent,
+		RsrcDict_T         *pgRsrc,
+		struct Env         *aux)
 {
-	uint8_t *lzw_buf;
-	size_t total_buf_size;
-	size_t write_head;
-	size_t write_tail;
-	uint8_t write_checksum;
-	size_t eof_loc;
 
-	HBytes *input_stream;
-	size_t read_head;
-	size_t read_tail;
-	uint8_t read_checksum;
-} lzwspec;
+	Dict               *xodict = H_CAST(Dict, dict_t);
+	const HParsedToken *item   = NULL;   // generic token
+	const HParsedToken *tok    = NULL;   // resolved token
+	const Dict         *xobj_d = NULL;  // dictionary associated with reference token
+	const HParsedToken *xobj_t = NULL;  // xobject resource token
+	size_t              offset = 0;
+
+
+	item = dictentry(xodict, "XObject");
+	item = resolve_item(aux, item, &offset, p_objdef);
+	if (item) {  // test for no XObject key (TODO: Failure ==> xref error)
+		assert(item->token_type == TT_Dict);
+		Dict *xolist = H_CAST(Dict, item);
+
+		// DEBUG
+		fprintf(stdout, "\nparse_xobject: Node for Parent = ");
+		if (parent->me) pp_ref(stdout, parent->me, 0, 0);
+		fprintf(stdout, "\nOld XO Count = %lu, Num xobjects used in page = %lu \n",
+				aux->catalog.xoCount, xolist->used);
+		pp_dict(stdout, item, 0, 0);
+		// DEBUG
+
+		aux->catalog.xoCount += xolist->used;
+		// work on each element of the dictionary
+		for (int i=0; i<xolist->used; i++) {
+			XoNode_T *xobj_r = create_XoNode(xodict->arena, &aux->catalog);
+			HBytes k = H_INDEX_BYTES(xolist->elements[i], 0);
+			xobj_r->name = h_arena_malloc(xodict->arena, k.len+1);
+
+			memcpy(xobj_r->name, k.token, k.len);
+			xobj_r->name[k.len] = '\0';
+			const HParsedToken *ref = H_INDEX_TOKEN(xolist->elements[i], 1);
+			assert(ref->token_type == TT_Ref);
+			xobj_r->node =  h_arena_malloc(xodict->arena, sizeof(PtNode_T));
+			xobj_r->node->type   = XO_NODE;
+			xobj_r->node->parent = parent->me;
+			xobj_r->node->pgRsrc = pgRsrc;
+			xobj_r->node->me     = ref;
+			xobj_r->node->offset = 0;    // TODO: get the offset to the stream
+			tok = resolve_item(aux, ref, &xobj_r->node->offset, p_objdef);
+
+			if (tok == NULL) continue;
+
+			// DEBUG
+			fprintf(stdout, "\nparse_xobject: XObject Reference = : ");
+			pp_ref(stdout, ref, 0, 0);
+			fprintf(stdout, "\n");
+			h_pprintln(stdout, tok);
+			// DEBUG
+
+			// tok can be an image dictionary -- which we are ignoring
+			if (tok->token_type == TT_SEQUENCE)
+				xobj_t = H_INDEX_TOKEN(tok, 0);  // expecting an XObject dictionary token
+			if (xobj_t == NULL) continue;
+
+			xobj_t = get_dictoftype(xobj_t, NULL, "XObject", aux); // test it
+			if (xobj_t == NULL) continue;
+
+			xobj_d = H_CAST(Dict, xobj_t);
+			item = dictentry(xobj_d, "Subtype");
+			if (item == NULL || item->token_type != TT_BYTES)
+				continue; // no "Subtype" field
+
+			/*
+			 *  TODO:: external objects can be images, forms, or postscript objects
+			 *  We are only handling forms at the moment
+			 */
+			if (bytes_eq(item->bytes, "Form")) {
+				fprintf(stdout, "\n\nparse_xobject:: Parsing Form XObject\n");
+				const HParsedToken *xoRsrc_t;
+				const HParsedToken *rsrcdict_t;
+				size_t offset = 0;
+
+				xoRsrc_t = dictentry(xobj_d, "Resources");
+				if (xoRsrc_t) {
+					fprintf(stdout, "\n\nparse_xobject: Found resources in node\n");
+					rsrcdict_t = resolve_item(aux, xoRsrc_t, &offset, p_objdef);
+					// DEBUG
+					fprintf(stdout, "\nparse_xobject: Resource token type = %u\n",rsrcdict_t->token_type);
+					h_pprintln(stdout, rsrcdict_t);
+
+					parse_rsrcdict(xodict->arena, rsrcdict_t, xobj_r->node, aux);
+
+					// set the text state to this xobject
+					// parse the text stream, which is field 2 of the sequence
+					aux->tstate.page = xobj_r->node;
+					xobj_t = H_INDEX_TOKEN(tok, 1);  // expecting an HParseResult token
+					const HParseResult *res = H_CAST(HParseResult, xobj_t);
+					// DEBUG
+					fprintf(stdout, "\nparse_xobject: Byte Stream = : ");
+					h_pprintln(stdout, res->ast);
+
+					HBytes stm = H_CAST_BYTES(res->ast);
+					res = h_parse(p_textstream, stm.token, stm.len);
+
+					if (res) { // text found in stream
+						// DEBUG
+						fprintf(stdout, "\nparse_xobject: Parsing text : ");
+						h_pprintln(stdout, res->ast);
+
+//						xobj_r->node->xn.dict       = 0;
+						xobj_r->node->xn.textStream = res->ast;
+					}
+				}
+			}
+		}
+	}
+}
+void parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux)
+{
+	RsrcDict_T        *rsrc = NULL;
+
+
+	// Process the dictionary
+	if ( (dict_t->token_type == TT_Dict) || (dict_t->token_type == TT_Objstm) ) {
+		rsrc = h_arena_malloc(arena, sizeof(RsrcDict_T));
+		rsrc->resources   = dict_t;
+		rsrc->fonts       = NULL;
+		rsrc->numFonts    = 0;
+		rsrc->xobj        = NULL;
+//		rsrc->seenCmaps   = NULL;
+//		rsrc->numCmapsSeen = 0;
+	}
+	else {
+		fprintf (stderr, "\nparse_rsrcdict: What token type is this? - %u\n",
+				dict_t->token_type);
+	}
 
-lzwspec *cur_lzw_spec;
+	// Resource is a simple dictionary
+	if (dict_t->token_type == TT_Dict) {
+		// DEBUG
+		fprintf(stdout, "\nparse_rsrcdict: Simple dictionary:\n");
+		h_pprintln(stdout, dict_t);
 
-/* used by write_lzw_buffer to get more space for decoding if needed */
-void
-grow_lzw_buffer(size_t amount)
-{
-	uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t));
-	if(ret_buf != NULL)
-	{
-		cur_lzw_spec->total_buf_size += amount;
-		cur_lzw_spec->lzw_buf = ret_buf;
+		parse_fonts(dict_t, rsrc, aux);
+		parse_xobject(dict_t, pgNode, rsrc, aux);
 	}
-	else
-	{
-		fprintf(stderr, "LZWDecode: h_arena_realloc() failed");
-		return;
+	else if (dict_t->token_type == TT_Objstm) {
+		const Objstm *strmc = H_CAST(Objstm, dict_t);
+		fprintf(stdout, "\nparse_rsrcdict: stream object -numObjs = %lu\n", strmc->numObjs);
+		h_pprintln(stdout, dict_t);
+
+		for (int i=0; i<strmc->numObjs; i++) {
+			fprintf(stdout, "\nparse_rsrcdict: oid = [%zu, %zu]\n",
+					strmc->tok[i].oid.nr, strmc->tok[i].oid.gen);
+			if (strmc->tok[i].obj->token_type == TT_Dict) {
+				parse_fonts(strmc->tok[i].obj, rsrc, aux);
+				parse_xobject(strmc->tok[i].obj, pgNode, rsrc, aux);
+			}
+		}
 	}
+
+	pgNode->pgRsrc = rsrc;
+	return;
+
 }
 
-lzwspec *
-new_lzw_spec(HBytes *bytes)
+void pp_pgrsrc(FILE *stream, const RsrcDict_T *pgRsrc)
 {
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-	lzwspec *ret = malloc(sizeof(lzwspec));
-	memset(ret, 0, sizeof(lzwspec));
-	ret->input_stream = bytes;
-	ret->lzw_buf = malloc(BUFSIZE);
-	ret->total_buf_size = BUFSIZE;
-	return ret;
+	if (pgRsrc) {
+		fprintf(stream, "\npp_pgrsrc: Num fonts used in this page = %lu\n", pgRsrc->numFonts);
+		fprintf(stream, "pp_pgrsrc: Resources\n");
+		if (pgRsrc->resources) h_pprintln(stream, pgRsrc->resources);
+		fprintf(stream, "pp_pgrsrc: Fonts\n");
+		if (pgRsrc->fonts)     h_pprintln(stream, pgRsrc->fonts);
+		fprintf(stream, "pp_pgrsrc: XObjects\n");
+		if (pgRsrc->xobj)      h_pprintln(stream, pgRsrc->xobj);
+	}
 }
 
-void
-delete_lzw_spec(lzwspec *spec)
+void pp_ptnode(FILE *stream, const PtNode_T *node)
 {
-	free(spec->lzw_buf);
-	free(spec);
+	fprintf(stream, "\nPage Tree Node Info:\n");
+	fprintf(stream, "pp_ptnode: parent = ");
+	if (node->parent) h_pprintln(stream, node->parent);
+	fprintf(stream, "\npp_ptnode: me = ");
+	if (node->me)     h_pprintln(stream, node->me);
+	if (node->pgRsrc) pp_pgrsrc(stream, node->pgRsrc);
 }
 
-void
-bind_lzw_spec(lzwspec *spec)
+void parse_pagenode (
+		struct Env         *aux,
+		PtNode_T           *myNode,
+		const HParsedToken *myRef,     // my page node reference
+		const Dict         *myDict,    // my page node specification
+		const HParsedToken *parent,
+		HArena             *arena
+		)
 {
-	cur_lzw_spec = spec;
-}
 
+	const HParsedToken *item        = NULL;
+	const HParsedToken *contents_t  = NULL; // dictionary token
+	Ref                *contents_r  = NULL;
+	const HParsedToken *contents    = NULL; // resolved token
+	const HParsedToken *entry       = NULL;
+	const HParsedToken *rsrcdict_t  = NULL;
+	size_t             nOffset = 0;
 
-#include "lzw-lib.h"
+	// DEBUG
+	fprintf(stdout, "\nparse_pagenode: parsing Page Node = ");
+	pp_ref(stdout, myRef, 0, 0);
 
-/* Buffer writer function for the lzw-ab implementation, with a fixed signature.
- * Although the type is defined as int, it is expected to write one byte at a time.
- * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */
 
-void
-write_lzw_buffer(int value)
-{
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
+	// set some global state variables
+	aux->tstate.page = myNode;
+	myNode->type     = PG_NODE;
+	myNode->me       = myRef;
+	myNode->pn.dict  = myDict;
 
-	if(!cur_lzw_spec->lzw_buf)
-	{
-		fprintf(stderr, "LZWDecode: lzw_buf is null!");
-		assert(cur_lzw_spec->lzw_buf != NULL);
+
+
+	item = dictentry(myDict, "Parent");
+	assert(item->token_type == TT_Ref);
+	if ( !( ( ((Ref*)item->user)->nr == ((Ref*)parent->user)->nr ) &&
+			( ((Ref*)item->user)->gen == ((Ref*)parent->user)->gen ) ) ) {
+		fprintf(stderr, "parse_pagenode: Inconsistent parent pointer [p = %p]!\n",
+				(void *)item);
+		// should this just be a warning?
+		goto end;
+	}
+	myNode->parent = item;
+
+	// Hold on to the Resources dictionary
+	// This dictionary may be empty
+	// If there is no dictionary ==> inherit resources from parent
+	myNode->pgRsrc = NULL;
+	item = dictentry(myDict, "Resources");
+	if (item) {
+		fprintf(stdout, "\n\nparse_pagenode: Found resources in node\n");
+		rsrcdict_t = resolve(aux, item);
+		fprintf(stdout, "\nparse_pagenode: Resource token type = %u\n",rsrcdict_t->token_type);
+		parse_rsrcdict(arena, rsrcdict_t, myNode, aux);
+		pp_ptnode(stdout, myNode);
 	}
 
-	assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size);
 
-	if (value == EOF) {
-        cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value;
-        cur_lzw_spec->eof_loc = cur_lzw_spec->write_head;
-        cur_lzw_spec->write_head++;
-        return;
-    }
+	// Process the contents stream or array
+	contents_t = dictentry(myDict, "Contents");
+	if (contents_t == NULL) {
+			fprintf(stderr, "parse_pagenode: Page node without contents!\n");
+			goto end;
+		}
+	else if (contents_t->token_type == TT_Ref) {
+		contents_r = H_CAST(Ref, contents_t);
+		fprintf(stdout, "parse_pagenode: ref.nr = %ld, ref.gen=%ld\n", contents_r->nr, contents_r->gen);
 
-	/* We can get away with this cast due to writing single bytes. */
-    cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value;
+		contents = resolve_item(aux, contents_t, &myNode->offset, p_cstream);
+		if (!contents) {  // TODO: Failure ==> xref error -- Figure out how to handle
+			goto end;
+		}
+		if (contents->token_type == TT_Objstm) { // Resources for the page node
+			parse_rsrcdict(arena, contents, myNode, aux);
+			pp_ptnode(stdout, myNode);
+		}
+		else {
+			// DEBUG
+			fprintf(stdout, "\n\nparse_pagenode: What is token 0 anyway?\n");
+			HParsedToken *tok0     = H_INDEX_TOKEN(contents, 0);
+			h_pprintln(stdout, tok0);
 
-	/* If you looked at lzw-ab's code, the write head is reset here
-	 * This function uses write_head as the offset of the last written item */
-    if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size)
-    {
-        grow_lzw_buffer(BUFSIZE);
-    }
+			HParsedToken *res_strm = H_INDEX_TOKEN(contents, 1);
+			if (res_strm->token_type == TT_SEQUENCE) { // this seems like a big assumption
+				myNode->pn.textStream = res_strm;
 
-    cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value;
-}
+				fprintf(stdout, "parse_pagenode: Page node contents = %p\n", (void *)contents);
+			}
+			else
+				myNode->pn.textStream = NULL;
+		}
+	}
+	else if (contents_t->token_type == TT_SEQUENCE) {
+		size_t   numelts = contents_t->seq->used;
+		size_t   bufsz = 0;
+		HBytes   bstrm;
+		const HParsedToken  **pieces = h_arena_malloc(arena, sizeof(HBytes*) * numelts);
+		for (int i=0; i<numelts; i++) {
+			entry = H_INDEX_TOKEN(contents_t, i);
+  			contents_r = H_CAST(Ref, entry);
+			fprintf(stdout, "\n\nparse_pagenode: objstream contents: strm obj#:%d, oid=<%zu, %zu>\n",
+					i+1, contents_r->nr, contents_r->gen);
+			contents   = resolve_item(aux, entry, &nOffset, p_byteostm);
+			if (!contents) {  // TODO: Failure ==> xref error -- Figure out how to handle
+				goto end;
+			}
+			HParsedToken *res_strm = H_INDEX_TOKEN(contents, 1);
+			fprintf(stdout, "\nparse_pagenode: Field 2 type = %u\n", res_strm->token_type);
+			HParseResult *bstrm_r = H_CAST(HParseResult, res_strm);
+			pieces[i] = bstrm_r->ast;
+			bstrm = H_CAST_BYTES(bstrm_r->ast);
+			bufsz += bstrm.len;
+			fprintf(stdout, "\n\nparse_pagenode: the extracted byte stream:\n%.*s, lensofar = %lu\n",
+					(int)bstrm.len, (char*)bstrm.token, bufsz);
 
+			fprintf(stdout, "\n\nparse_pagenode: Done parsing strm obj# = %d\n\n", i+1);
 
-/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec
- * with bind_lzw_spec() */
-int read_lzw_buffer(void)
-{
-	uint8_t byte_read;
-	int ret_value;
+		}
+		uint8_t *whole = h_arena_malloc(arena, sizeof(uint8_t) * bufsz);
+		size_t offset=0;
+		for (int i=0; i<numelts; i++) {
+			bstrm = H_CAST_BYTES(pieces[i]);
+
+			memcpy(&whole[offset], bstrm.token, bstrm.len);
+			offset+=bstrm.len;
+			fprintf(stdout, "\n**** index=%d, offset=%lu\n", i, offset);
+		}
+		assert(offset == bufsz);
+		fprintf(stdout, "\n\nparse_pagenode: the extracted byte stream array:\n%.*s, bufsz = %lu\n",
+				(int)bufsz, (char*)whole, bufsz);
+		HParseResult *tstrm=h_parse(p_textstream, whole, bufsz);
+		if (tstrm) {
+			fprintf(stdout, "\n\nparse_pagenode: textstream token_type = %u\n\n", tstrm->ast->token_type);
+			myNode->pn.textStream = tstrm->ast;
+		}
+	}
+	else {
+		fprintf(stdout, "parse_pagenode: Unexpected page node contents token type = %u\n", contents_t->token_type);
+		goto end;
+	}
 
-	/* Input data is already waiting in the buffer */
-    if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail)
-        cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len;
+//	fprintf(stdout, "parse_pagenode:: Contents token type = %d\n",
+//			contents->token_type);
 
-    if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail)
-    {
-        byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++];
-        cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read;
-        ret_value = byte_read;
-    }
-    else
-        ret_value = EOF;
 
-    return ret_value;
+end:
+
+//fail:
+	return;
 }
 
 
-HParseResult *
-LZWDecode(const Dict *parms, HBytes b, HParser *p)
-{
-	struct predictor pred = {1, 1, 8, 1};
-	int (*depredict)(struct predictor *, uint8_t *, size_t);
-	HParseResult *res;
-	int done;
-	int ret;
-	const HParsedToken *v;
 
-	/* set up the predictor (if any) */
-	#define SETPARM(VAR,STR) do {					\
-		v = dictentry(parms, (STR));				\
-		if (v != NULL) {					\
-			if (v->token_type != TT_SINT || v->sint < 0)	\
-				return NULL;				\
-			VAR = v->sint;					\
-		} } while(0)
-	SETPARM(pred.num,	"Predictor");
-	SETPARM(pred.colors,	"Colors");
-	SETPARM(pred.bpc,	"BitsPerComponent");
-	SETPARM(pred.columns,	"Columns");
-	#undef SETPARM
-	if (pred.num == 1)
-		depredict = depred_none;
-	else {
-		if (pred.num >= 10 && pred.num <= 15)
-			depredict = depred_png;
-		else if (pred.num == 2) {
-			/* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */
-			if (pred.bpc == 8) {
-				pred.predfun = pp_sub;	/* predict left */
-				depredict = depred_png;
-			} else {
-				// XXX add general TIFF predictor (bpc != 8)
-				fprintf(stderr, "LZWDecode: /Predictor %d "
-				    "not supported for /BitsPerComponent %d\n",
-				    pred.num, pred.bpc);
-				return NULL;
-			}
-		} else {
-			fprintf(stderr, "LZWDecode: /Predictor %d"
-			    " not supported\n", pred.num);
-			return NULL;
-		}
+/*
+ * This helper implements the standard backwards parsing strategy to read
+ * the trailer dictionaries found at the very end of the input.
+ *
+ * It then follows the catalog dictionary to enumerate the pages in the pdf file
+ * identifying text streams and contents streams, saving the information to support
+ * text extraction in the environment structure.
+ *
+ *
+ * A return value of false indicates some parsing error.
+*/
+// need to maintain information about pages
+void
+parse_pagetree(
+		struct Env         *aux,
+		PtNode_T           *myNode,
+		const HParsedToken *myRef,     // my page tree node reference
+		const Dict         *myDict,    // my page tree specification
+		const HParsedToken *pRefT,     // parent reference token
+		size_t              curr       // number of pages seen so far
+		)
+{
 
-		/* allocate row buffer */
-		if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) {
-			fprintf(stderr, "LZWDecode: overflow\n");
-			return NULL;
-		}
-		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
-		pred.buf = calloc(1, pred.rowsz);
-		if (pred.buf == NULL)
-			err(1, "LZWDecode");
+	const HParsedToken *item      = NULL;
+	const HParsedToken *kids      = NULL;
+	PtNode_T           *kid       = NULL;
+	const HParsedToken *kidRef    = NULL; // page tree or page node reference
+	const HParsedToken *kidDict_t = NULL;
+	const HParsedToken *pageDict_t = NULL;
+	const HParsedToken *treeDict_t = NULL;
+	const Dict         *kidDict   = NULL;
+	const HParsedToken *rsrcdict_t = NULL;
+
+
+	fprintf(stdout, "\nparse_pagetree: parsing Page Tree Node = ");
+	pp_ref(stdout, myRef, 0, 0);
+
+
+    myNode->type     = PG_TREE;
+
+	item = dictentry(myDict, "Parent");  // if root node ==> parent should be NULL
+	myNode->parent = item;
+
+	// Count is a required field except for the root
+	item = dictentry(myDict, "Count");
+	if ( (item == NULL) || (item->token_type != TT_SINT) ) {
+		fprintf(stderr, "parse_pagetree: Required page node count missing!\n");
+		goto end; // This should just be a warning
+	}
+	else {
+		myNode->pt.leaves = H_CAST_SINT(item);
+		if (aux->catalog.pgCount == 0)
+			aux->catalog.pgCount = myNode->pt.leaves;
 	}
 
-	lzwspec *lzw_spec = new_lzw_spec(&b);
-	bind_lzw_spec(lzw_spec);
 
-	ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer);
-	if (ret) {
-		fprintf(stderr, "lzw_decompress: error (%d)\n", ret);
-		assert(!"LZWDecode: failed to decompress\n");
+
+
+
+	// Kids is a required field
+	kids = dictentry(myDict, "Kids");  // array of references to page or page tree nodes
+	if ( (kids == NULL) || (kids->token_type != TT_SEQUENCE) ) {
+		fprintf(stderr, "parse_pagetree: This tree node has no pages!\n");
+		goto end; // Nothing more to do here
 	}
-	done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
-	assert(!done);	// XXX ITERATIVE
 
-	res = h_parse(p, pred.out, pred.nout);
-	free(pred.out);
 
-	bind_lzw_spec(NULL);
-	delete_lzw_spec(lzw_spec);
 
-	return res;
-}
 
-HParseResult *
-RunLengthDecode(const Dict *parms, HBytes b, HParser *p)
-{
-	HParseResult *res;
+	// get the kids (pgTable)
+	HCountedArray  *pgTable = H_CAST_SEQ(kids);
+	size_t          pgtSz   = pgTable->used;
+	myNode->pt.kids  = (PtNode_T*)h_arena_malloc(pgTable->arena, pgtSz * sizeof(PtNode_T));
+	myNode->pt.count = pgtSz;
 
-	res = h_parse(p_rldstring, b.token, b.len);
-	if(!res)
+	// Process the kids
+	for (int i=0; i<pgtSz; i++)
 	{
-		fprintf(stderr, "parse error in RunLengthDecode filter\n");
-		return NULL;
-	}
+		kid         = &myNode->pt.kids[i];
+		kid->parent = myRef;
+		kidRef      = pgTable->elements[i];
+		kidDict_t   = resolve(aux, kidRef);     // page or tree node dictionary or object stream token
+
+		if (kidDict_t) {
+			// Look for a tree node
+			treeDict_t = get_dictoftype(kidDict_t, myRef, "Pages", aux);
+			if (treeDict_t) {
+				kidDict = H_CAST(Dict, treeDict_t);
+				parse_pagetree(aux, kid, kidRef, kidDict, myRef, curr);
+			}
+			// Look for a page node
+			pageDict_t = get_dictoftype(kidDict_t, myRef, "Page", aux);
+			if (pageDict_t) {
+				kidDict = H_CAST(Dict, pageDict_t);
+				if (++curr > aux->catalog.pgCount) {
+					fprintf(stderr, "parse_pagetree: More kids then specified leaves!\n");
+					// TODO:: probably just a warning is enough here -- run the VIOL parser?
+				}
+				parse_pagenode(aux, kid, kidRef, kidDict, myRef, pgTable->arena);
+			}
 
-	assert(res->ast && res->ast->token_type == TT_BYTES);
-	res = h_parse(p, res->ast->bytes.token, res->ast->bytes.len);
+			// Look for Resources dictionary
+			myNode->pgRsrc = NULL;
+			item = dictentry(myDict, "Resources");
+			if (item) {
+				fprintf(stdout, "\n\nparse_pagetree: Found resources in node\n");
+				size_t offset = 0;
+				rsrcdict_t = resolve_item(aux, item, &offset, p_objdef);
+				if (!rsrcdict_t) {  // TODO: Failure ==> xref error -- Figure out how to handle
+					goto end;
+				}
+				fprintf(stdout, "\nparse_pagetree: Resource token type = %u\n",rsrcdict_t->token_type);
+				parse_rsrcdict(pgTable->arena, rsrcdict_t, myNode, aux);
+				pp_ptnode(stdout, myNode);
+			}
 
-	return res;
+		}
+		else {
+			Ref *ref = (Ref *)kidRef->user;
+			fprintf(stderr, "parse_pagetree: Reference <%zu, %zu> not found -- Deleted?!\n",
+					ref->nr, ref->gen);
+		}
+
+	} // end loop
+
+
+
+end:
+	return;
 }
 
+
+
+
 /*
- * Decodes ASCII hexadecimal data into binary data.
- * parms should be empty, because the filter has no parameters
+ * This helper starts the process of elaborating the page tree
+ * starting with the trailer dictionary
  */
-HParseResult *
-ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
+void
+parse_catalog(struct Env *aux, const HParsedToken *root)
 {
-	HParseResult *f_res, *res;
+	const HParsedToken *dict_t  = NULL;
+	const Dict         *catalog = NULL;
+	const HParsedToken *ptRef   = NULL; // page tree reference
+	const Dict         *ptRoot  = NULL; // page tree root Dictionary
+
+
+	// initialize the catalog structure
+	aux->catalog.catalog = NULL;
+	aux->catalog.pRoot   = NULL;
+	aux->catalog.pgCount = 0;
+    // Initialize the xobject structure
+	aux->catalog.xObjs.name = NULL;
+	aux->catalog.xObjs.node = NULL;
+	aux->catalog.xObjs.next = NULL;
+	aux->catalog.xoHead     = NULL;
+	aux->catalog.xoTail     = NULL;
+	aux->catalog.xoCount    = 0;
+
+	// DEBUG
+	fprintf(stdout, "\nparse_catalog: parsing Catalog = ");
+	if (root->token_type == TT_Ref)
+		pp_ref(stdout, root, 0, 0);
+	else if (root->token_type == TT_Dict)
+		pp_dict(stdout, root, 0, 0);
+
+
+
+	// Ensure the reference is to the catalog dictionary
+	size_t offset = 0;
+	dict_t = resolve_item(aux, root, &offset, p_objdef);
+	if (!dict_t) { // TODO: Failure ==> xref error -- Figure out how to handle
+		goto end;
+	}
 
-	// XXX debug
-	fprintf(stdout, "ASCIIHexDecode:: bytes=[%.*s]\n", (int)b.len, b.token);
+	aux->catalog.catalog = get_dictoftype(dict_t, NULL, "Catalog", aux); // catalog dictionary token
+	if (aux->catalog.catalog) { // Caution:: relying on the short-circuiting behavior here
+		catalog = H_CAST(Dict, aux->catalog.catalog);
 
-	f_res = h_parse(p_ahexstream, b.token, b.len);
-	if(!f_res)
-	{
-		fprintf(stderr, "parse error in ASCIIHexDecode filter\n");
-		return NULL;
-	}
 
-	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
-	fprintf(stdout, "ASCIIHexDecode::string = [%.*s]\n",
-			(int)f_res->ast->bytes.len, (char*)f_res->ast->bytes.token);
-	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
+		// Catalog found -- Now get the root of the page tree associated with the catalog
+		ptRef = dictentry(catalog, "Pages"); // indirect reference to a dictionary
+		if ( (ptRef == NULL) || (ptRef->token_type != TT_Ref) ) {
+			fprintf(stderr, "parse_catalog: Page Tree not found!\n");
+			goto end;
+		}
+		aux->catalog.pRoot = ptRef; // indirect reference to the page tree
 
-	if (res == NULL)
-		res = f_res; // return the undecoded stream
 
-	return res;
+		/* resolve and process the page tree root reference to extract the dictionary --> Page Tree Object */
+		dict_t = resolve_item(aux, ptRef, &offset, p_objdef);                     // page tree root node
+		if (!dict_t) { // TODO: Failure ==> xref error -- Figure out how to handle
+			goto end;
+		}
+		dict_t = get_dictoftype(dict_t, NULL, "Pages", aux);   // page tree root dictionary (parent is NULL)
+		ptRoot = H_CAST(Dict, dict_t);
+
+		if (ptRoot == NULL) {
+			fprintf(stderr, "parse_catalog: No page table!\n");
+			goto end; // Nothing more to do here
+		}
+		// parse_pagetree
+		parse_pagetree(aux, &aux->catalog.pgTree, ptRef, ptRoot, NULL, 0);
+	}
+	else {   // looks like the field "Type:Catalog" is a hint, not a requirement for a valid pdf
+		fprintf (stdout, "\n\nThe Catalog is missing!!");
+		goto end;
+
+	}
+
+	end:
+	return;
 }
 
 /*
- * Decodes ASCII base-85 encoded data and produces binary data.
- * parms should be empty, because the filter has no parameters
+ * ********************************************************************
+ * End Catalog parsing
+ * ********************************************************************
  */
-HParseResult*
-ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
-{
-	HParseResult *f_res, *res;
 
-	// XXX debug
-	fprintf(stdout, "ASCII85Decode:: bytes=[%.*s]\n", (int)b.len, b.token);
 
-	f_res = h_parse(p_a85string, b.token, b.len);
-	if(!f_res)
-	{
-		fprintf(stderr, "parse error in ASCII85Decode filter\n");
-		return NULL;
-	}
 
-	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
-	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
 
-	if (res == NULL)
-		res = f_res; // return the undecoded stream
 
-	return res;
-}
 
+
+
+/*
+ * ********************************************************************
+ * Start xref parsing
+ * ********************************************************************
+ */
 /*
  * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
  * and parse the result with 'p'.
@@ -2063,8 +4970,11 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 		filter = RunLengthDecode;
 	else if (bytes_eq(v->bytes, "LZWDecode"))
 		filter = LZWDecode;
-	else
-		return NULL;		/* filter not supported */
+	else {		/* filter not supported */
+		fprintf(stderr, "decode_stream:: Unsupported Filter [%.*s\n]",
+				(int)v->bytes.len, v->bytes.token);
+		return NULL; /* Treat the stream as a byte array */
+	}
 
 	v = dictentry(d, "DecodeParms");
 	if (v && v->token_type == TT_Dict)
@@ -2073,87 +4983,52 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 	return filter(parms, b, p);
 }
 
-HParsedToken *
-act_rest(const HParseResult *p, void *env)
-{
-	struct Env *aux = env;
-	size_t offset = H_CAST_UINT(p->ast) / 8;
-
-	return H_MAKE_BYTES(aux->input + offset, aux->sz - offset);
-}
-
-HParser *
-p_rest__m(HAllocator *mm__, struct Env *aux)
-{
-	return h_action__m(mm__, h_tell__m(mm__), act_rest, aux);
-}
-
-/* combine current position with env=(input,sz) into HBytes */
-HParsedToken *
-act_take_bytes(const HParseResult *p, void *env)
-{
-	const HBytes *bs = env;
-	size_t offset = H_CAST_UINT(p->ast) / 8;
-
-	/*
-	 * NB: we must allocate a new HBytes struct here because the old one is
-	 * allocated only temporarily for the lifetime of the continuation
-	 * below.
-	 */
-	return H_MAKE_BYTES(bs->token + offset, bs->len);
-}
-
-HParser *
-p_take__m(HAllocator *mm__, size_t n, struct Env *aux)
-{
-	HParser *skip, *bytes;
-	HBytes *bs;
-
-	/* dummy struct to hold the pair (input,n) */
-	bs = h_alloc(mm__, sizeof(HBytes));
-	bs->token = aux->input;
-	bs->len = n;
-
-	bytes = h_action__m(mm__, h_tell__m(mm__), act_take_bytes, bs);
-	skip  = h_skip__m(mm__, n * 8);
-
-	return h_left__m(mm__, bytes, skip);
-}
 
 HParser *p_xrefdata__m(HAllocator *, const Dict *);
-HParser *p_objstm__m(HAllocator *, const Dict *);
-HParser *p_raw_test__m(HAllocator *, const Dict *);
 
-/*
- * Look into the dictionary associated with the stream to see if there is data
- * needed to interpret the stream
- */
+
 HParser *
-p_stream_data__m(HAllocator *mm__, const Dict *dict)
+p_stream_data__m(HAllocator *mm__, const Dict *dict, struct Env *aux)
 {
 	const HParsedToken *v;
 
 	v = dictentry(dict, "Type");
 	if (v == NULL || v->token_type != TT_BYTES)	// XXX -> custom type
-		//return p_raw_test__m(mm__, dict);				/* no /Type field */
-		return NULL;									/* no /Type field */
+		return NULL;				/* no /Type field */
 
 	/* interpret known stream types */
 	if (bytes_eq(v->bytes, "XRef"))
 		return p_xrefdata__m(mm__, dict);
-#ifndef NOOBJSTM
-	if (bytes_eq(v->bytes, "ObjStm"))
+
+	if (bytes_eq(v->bytes, "ObjStm")) {
+		fprintf(stdout, "\np_stream_data__m:: Parsing object stream\n");
 		return p_objstm__m(mm__, dict);
-#endif
+	}
 
+	if (bytes_eq(v->bytes, "XObject")) {
+		/*
+		 *  external objects can be images, forms, or postscript objects
+		 *  Forms and postscript objects can be handled as bytestreams
+		 *  Additional XObject Forms processing will be handled during page traversal
+		 *  Explicitly avoid parsing Image objects to improve speed -- send back NULL
+		 */
+		v = dictentry(dict, "Subtype");
+		if (bytes_eq(v->bytes, "Form")) {
+			fprintf(stdout, "\n\np_stream_data_m:: Found Form XObject\n");
+			fprintf(stdout, "p_stream_data_m:: Current XObject count = %lu\n", aux->catalog.xoCount);
+//			parse_xobject(mm__, dict, aux);
+			return p_bytestream;
+#if 0
+		if (bytes_eq(v->bytes, "Image")) {
+			fprintf(stdout, "\n\np_stream_data_m:: Found XObject - Image\n");
+			return p_fail;
+		}
+#endif
+		}
+	}
 	return NULL;					/* unrecognized type */
 }
 
-struct streamspec {
-	Dict *dict;		/* stream dictionary */
-	HParser *parser;	/* data parser */
-};
-
 HParsedToken *
 act_ks_value(const HParseResult *p, void *u)
 {
@@ -2163,19 +5038,20 @@ act_ks_value(const HParseResult *p, void *u)
 
 	/* decode and parse the stream data */
 	res = decode_stream(spec->dict, bytes, spec->parser);
-	// XXX: test a85_integration branch's version
 	if (res == NULL) {
 		HBytes b = {NULL, 0};
 		const HParsedToken *v = dictentry(spec->dict, "Type");
-		if (v != NULL && v->token_type == TT_BYTES)
-			b = v->bytes;
+		if (v != NULL && v->token_type == TT_BYTES) {
+			b.token = v->bytes.token;
+			b.len   = v->bytes.len;
+		}
 		if (b.len > INT_MAX)
 			b.len = INT_MAX;
 		fprintf(stderr, "parse error in stream (%*s)\n",
 		    (int)b.len, b.token);
 		// XXX return the undecoded stream (p->ast)?
 	}
-
+	fprintf(stdout, "\n\nact_ks_value\n\n");
 	return H_MAKE(HParseResult, res);
 }
 
@@ -2189,6 +5065,12 @@ act_ks_value(const HParseResult *p, void *u)
 HParser *
 kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 {
+    // DEBUG
+    fprintf (stdout, "\n\nkstream:");
+    h_pprintln(stdout, x);
+    // DEBUG
+
+
 	struct Env *aux = env;
 	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
 	Dict *dict = H_CAST(Dict, dict_t);
@@ -2204,9 +5086,10 @@ kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 		goto fail;
 	sz = (size_t)v->sint;
 
+	//fprintf(stderr, "parsing stream object, length %zu.\n", sz);	// XXX debug
 
 	dict_p	= p_return__m(mm__, dict_t);
-	bytes_p = p_take__m(mm__, sz, aux);  // parser for the byte stream
+	bytes_p = p_take__m(mm__, sz, aux);
 
 	spec = h_alloc(mm__, sizeof(struct streamspec));
 	spec->dict = dict;
@@ -2223,15 +5106,15 @@ kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 		value_p = bytes_p;
 
 	return h_sequence__m(mm__, dict_p, value_p, NULL);
+
 fail:
-#if 0
 	if (v == NULL)
 		fprintf(stderr, "stream /Length missing\n");
 	else if (v -> token_type != TT_SINT)
 		fprintf(stderr, "stream /Length not an integer\n");
 	else if (v < 0)
 		fprintf(stderr, "stream /Length negative\n");
-#endif
+
 	//h_pprintln(stderr, p);	// XXX debug
 	return p_fail;
 }
@@ -2402,6 +5285,39 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict)
 	return h_sequence__ma(mm__, (void **)p_subs);
 }
 
+
+
+HParsedToken *
+act_ostm(const HParseResult *p, void *u)
+{
+	assert(((HParsedToken *)u)->token_type == TT_SINT);
+	size_t N = ((HParsedToken *)u)->sint;
+
+	Objstm   *ostrm = H_ALLOC(Objstm);
+	ostrm->numObjs  = N;
+	ostrm->tok      = h_arena_malloc(p->arena, N* sizeof(Objref_T));
+	ostrm->arena    = p->arena;
+
+	for (int i=0; i<ostrm->numObjs; i++) {
+		const HParsedToken *num = H_FIELD_TOKEN(0, 2*i);
+		assert(num->token_type == TT_UINT);
+		ostrm->tok[i].oid.nr  = H_CAST_UINT(num);
+		ostrm->tok[i].oid.gen = 0;
+		ostrm->tok[i].obj     = H_FIELD_TOKEN(1, i);
+	}
+
+//	const HCountedArray *indices = H_FIELD_SEQ(0);
+//	const HCountedArray *ostrm   = H_FIELD_SEQ(1);
+
+	const HParsedToken *tok = H_MAKE(Objstm, ostrm);
+
+	// DEBUG
+	fprintf (stdout, "act_ostm:: Object Stream Details:\n");
+	pp_objstm(stdout, tok, 0, 0);
+
+	return (HParsedToken *)tok;
+}
+
 HParser *
 p_objstm__m(HAllocator *mm__, const Dict *dict)
 {
@@ -2411,7 +5327,7 @@ p_objstm__m(HAllocator *mm__, const Dict *dict)
 	v = dictentry(dict, "N");
 	if (v == NULL || v->token_type != TT_SINT || v->sint < 0 ||
 	    (uint64_t)v->sint > SIZE_MAX) {
-		fprintf(stderr, "missing /N on object stream\n");
+		fprintf(stderr, "p_objstm__m: missing /N on object stream\n");
 		return p_fail;
 	}
 	N = v->sint;
@@ -2419,7 +5335,10 @@ p_objstm__m(HAllocator *mm__, const Dict *dict)
 	HParser *wel_ws = h_sequence__m(mm__, p_wel, p_ws, NULL);
 	HParser *idx = p_sepBy_n__m(mm__, p_npair, wel_ws, N);
 
-	return h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL);
+	HParser *p_ostm = h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL);
+	HParser *ostm_p = h_action__m(mm__, p_ostm, act_ostm, (void *)v);
+
+	return ostm_p;
 		// XXX leading and trailing ws OK?
 
 	// XXX consistency-check against /First, idx, /N
@@ -2466,9 +5385,6 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 	/* construct the parser for the stream data */
 	spec = h_alloc(mm__, sizeof(struct streamspec));
 	spec->dict = dict;
-	// TODO: Seems the assumption is that this form of content stream is
-	//       strictly used for xrefs. Is that true? Ask Peter Wyatt
-	//       Also, do we have an instance of a pdf file that uses this feature?
 	spec->parser = p_xrefdata__m(mm__, dict);
 	assert (spec->parser != NULL);
 
@@ -2479,18 +5395,6 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 }
 
 
-/*
- * main program
- */
-
-#include <stdio.h>
-#include <inttypes.h>
-#include <stdlib.h>	/* realloc() */
-#include <fcntl.h>	/* open() */
-#include <unistd.h>	/* lseek() */
-#include <sys/mman.h>	/* mmap() */
-
-const char *infile = NULL;
 
 /*
  * This helper implements the standard backwards parsing strategy to read all
@@ -2500,17 +5404,20 @@ const char *infile = NULL;
  * Allocates and returns an array of HParsedTokens, each containing the result
  * of a successful 'p_xref' parse. Sets the output parameter 'nxrefs' to the
  * number of elements.
- *
- * A return value of NULL indicates an empty result.
  */
 const HParsedToken **
 parse_xrefs(const uint8_t *input, size_t sz, size_t *nxrefs)
 {
+	const uint8_t *input = aux->input;
+	size_t         sz    = aux->sz;
 	HParseResult *res = NULL;
 	const HParsedToken **xrefs = NULL;	/* empty result */
 	const HParsedToken *tok = NULL;
 	size_t n = 0, nfwd = 0;
 	size_t offset = 0;
+	bool processRoot = true;
+	size_t maxObjNum = 0;
+	Dict *trailer = NULL;
 
 	// XXX try formulating this as a parser using h_seek()
 
@@ -2545,9 +5452,19 @@ parse_xrefs(const uint8_t *input, size_t sz, size_t *nxrefs)
 			err(1, "realloc");
 		xrefs[n++] = res->ast;
 
+
+		/* process the root */
+		if (processRoot) {
+			// Size is a required field in the trailer dictionary
+			trailer = H_INDEX(Dict, res->ast, 1);
+			maxObjNum = H_CAST_SINT(dictentry(trailer, "Size"));
+
+			processRoot = false;
+		}
+
+
 		/* look up the next offset (to the previous xref section) */
 		tok = dictentry(H_INDEX(Dict, res->ast, 1), "Prev");
-
 		if (tok == NULL)
 			break;
 		if (tok->token_type != TT_SINT) {
@@ -2579,10 +5496,50 @@ parse_xrefs(const uint8_t *input, size_t sz, size_t *nxrefs)
 	}
 
 end:
-	*nxrefs = n;
-	return xrefs;
+	aux->xrefs = xrefs;
+	aux->nxrefs = n;
+	if (n > maxObjNum) {
+		fprintf(stderr, "%s: Number of xrefs found -%ld- "
+						"Greater than specified /Size -%ld-.\n"
+						"Ignoring objects numberd greater than -%ld-!\n",
+						infile, n, maxObjNum, n);
+		aux->nxrefs = maxObjNum;
+	}
+
+
+	// Process the trailer dictionary
+	if (trailer) { // trailer==NULL or n==0 ==> xrefs were not parsed correctly
+		const HParsedToken *root = dictentry(trailer, "Root");
+		assert(root->token_type == TT_Ref);
+		parse_catalog(aux, root);
+	}
+	return;
 }
 
+
+
+/*
+ * ********************************************************************
+ * End xref parsing
+ * ********************************************************************
+ */
+
+
+
+
+/*
+ * main program
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <fcntl.h>	/* open() */
+#include <unistd.h>	/* lseek() */
+#include <sys/mman.h>	/* mmap() */
+
+
+
+
 int
 main(int argc, char *argv[])
 {
@@ -2621,7 +5578,10 @@ main(int argc, char *argv[])
 	init_parser(&aux);
 
 	/* parse all cross-reference sections and trailer dictionaries */
-	aux.xrefs = parse_xrefs(input, sz, &aux.nxrefs);
+	parse_xrefs(&aux);
+
+	fprintf(stdout, "\n\nmain:: Done parsing xrefs and page tree. Starting main parser.\n\n");
+
 
 	/* run the main parser */
 	res = h_parse(p_pdf, input, sz);
@@ -2644,5 +5604,10 @@ main(int argc, char *argv[])
 	/* print result */
 	h_pprintln(stdout, res->ast);
 
+	/* Save the extracted text */
+	if (aux.ntextobjs > 0) {
+		text_extract(&aux);
+	}
+
 	return 0;
 }