diff --git a/pdf.c b/pdf.c
index e096822e8718a4a63e7227e540d86d88a9c1838e..b2508a3735d3bfd073e2cd55623faa08af109cef 100644
--- a/pdf.c
+++ b/pdf.c
@@ -2,6 +2,7 @@
  * pesco 2019,2020
  * pompolic 2020
  * Paul Vines 2020
+ * Kragen Sitaker 2020, 2021
  * Sumit Ray 2021
  *
  */
@@ -111,6 +112,43 @@ validate_notnull(HParseResult *p, void *u)
 }
 
 
+
+// Forward declaration of Token structures
+typedef struct { size_t nr, gen; } Ref;
+
+typedef HCountedArray Dict;
+
+
+
+// Catalog Tree -- for now, just make it a table
+struct PtNode_T;
+typedef struct PtNode_T {
+	enum {PG_TREE, PG_NODE} type;
+	const HParsedToken  *parent;                // Type = Page tree -- reference
+	union {
+		struct {
+			const HParsedToken  *pageRef;
+			Dict                *page;          // page node dictionary
+			const HParsedToken  *resources;     // font references dictionary (resources == NULL) ==> inherit
+			const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
+		} pn;
+		struct PtNode_T   *kids;                // page table
+	};
+
+} PtNode_S;
+
+typedef struct {
+	const  HParsedToken  *catalog;   // reference
+	const  HParsedToken  *pRoot;     // reference
+	PtNode_S              pgTree;
+	size_t                pgCount;
+} Catalog_S;
+
+
+// Forward declaration of text extraction related structures
+struct textnode;
+struct textstr;
+
 /*
  * auxiliary global data structure needed by the parser
  */
@@ -121,13 +159,97 @@ struct Env {
 
 	const HParsedToken **xrefs;	/* all xref sections of the file */
 	size_t nxrefs;
+
+	struct textnode  *txthead;  /* parsed text objects from the file */
+	struct textnode  *txttail;  /* parsed text objects from the file */
+	size_t ntextobjs;
+
+	Catalog_S  catalog;         /* Catalog object and document structure */
 };
 
 
+
+// ***********************************************************
+/*
+ * Text data structures
+ */
+struct textnode {
+	struct textstr  *tstr;
+	struct textnode *next;
+};
+
+
+struct fontref {
+	const char   *fontname;
+	uint32_t      namelen;
+	uint32_t      fontsize;
+};
+struct textpos {
+	double	tx;
+	double ty;
+};
+struct textmat { double cell[6]; };
+struct textstr {
+	char     *text;
+	uint32_t  nchars;
+};
+struct textwfmt {         /* text with formatting specifications */
+	double        aw;    /* word spacing */
+	double        ac;    /* character spacing */
+	struct textstr tstr;  /* the string */
+};
+struct tarrayelt {
+	union {
+		double          adj;
+		struct textstr  tstr;
+	};
+	bool                isStr;
+};
+struct textarray {
+	struct tarrayelt  *elts;
+	uint32_t            nelts;
+	struct textstr     flattened;
+};
+
+
+
+/* operator::
+ * TS -- Text state    : Table 105
+ * TP -- Text position : Table 108
+ * TW -- Test showing  : Table 109
+ * */
+typedef struct {
+	enum {TS_Tc, TS_Tw, TS_Tz, TS_TL, TS_Tf, TS_Tr, TS_Ts,
+	      TP_Td, TP_TD, TP_Tm, TP_Tstar,
+		  TW_Tj, TW_Tq, TW_Tqq, TW_TJ} type;
+	union {
+		double              value; 			/* many just have a value */
+		uint8_t             mode;           /* text mode */
+		struct fontref      fref;           /* font name reference */
+		struct textpos      pos;		    /* text position */
+		struct textmat      fm;             /* font matrix */
+		struct textstr      tstr;           /* the string */
+		struct textwfmt     twfmt;          /* text with formatting -- qq_op */
+		struct textarray    tarray;         /* text contained in an array object */
+	};
+	const HParsedToken *obj;
+} TextEntry;  // text object entries
+
+// Haven't used this type yet - maybe OBE
+typedef struct {
+	struct textmat   fm;                    /* font matrix associated with this text object */
+	TextEntry        **ops;                 /* operators associated w/string */
+	char             *txt;                  /* the string associated with this object */
+} TextString;
+
+// ***********************************************************
+
+
+
 /*
  * custom token types
  */
-HTokenType TT_XREntry, TT_Ref, TT_Dict, TT_HParseResult;
+HTokenType TT_XREntry, TT_Ref, TT_Dict, TT_HParseResult, TT_TextEntry;
 
 typedef struct {
 	enum {XR_FREE, XR_INUSE, XR_OBJSTM} type;
@@ -139,9 +261,6 @@ typedef struct {
 	const HParsedToken *obj;
 } XREntry;
 
-typedef struct { size_t nr, gen; } Ref;
-
-typedef HCountedArray Dict;
 
 /* look up a value in a dictionary */
 const HParsedToken *
@@ -546,7 +665,7 @@ act_a85partialgroup(const HParseResult *p, void *u)
 	assert(bytes_used > 0);
 
 	size_t shift = 4 - bytes_used;
-	 	bytes = h_arena_malloc(p->arena, bytes_used);
+	bytes = h_arena_malloc(p->arena, bytes_used);
 	for (int i=0; i<bytes_used; i++) {
 		bytes[i] = bytes_helper[shift + i];
 	}
@@ -850,90 +969,806 @@ validate_xrstm(HParseResult *p, void *u)
 	const Dict *tdict = H_FIELD(Dict, 1, 0);
 	const HParsedToken *v = dictentry(tdict, "Type");
 
-	if (!xrefs)
-	{
-		return false;
-	}
+	if (!xrefs)
+	{
+		return false;
+	}
+
+#if 0
+	if (v == NULL)
+		fprintf(stderr, "stream dict has no /Type\n");
+	else if (v->token_type != TT_BYTES)
+		fprintf(stderr, "stream /Type is no name object\n");
+	else if (bytes_eq(v->bytes, "XRef"))
+		return true;
+	return false;
+#endif	// XXX this block can be removed
+
+	return (v != NULL && v->token_type == TT_BYTES &&
+	    bytes_eq(v->bytes, "XRef"));
+}
+
+HParsedToken *
+act_dict_(const HParseResult *p, void *env)
+{
+	Dict *dict = H_CAST_SEQ(p->ast);
+
+	return H_MAKE(Dict, dict);
+}
+
+#define act_array_ h_act_flatten
+
+HParsedToken *
+act_shortlength(const HParseResult *p, void *u)
+{
+	uint8_t length = H_CAST_UINT(p->ast);
+	/* Length can range from 0-127, corresponding to the range 1-128, inclusive */
+	uint8_t finallength = length+1;
+
+	return H_MAKE_UINT(finallength);
+}
+
+HParsedToken *
+act_longlength(const HParseResult *p, void *u)
+{
+	uint8_t length = H_CAST_UINT(p->ast);
+	uint8_t finallength = 257-length;
+
+	return H_MAKE_UINT(finallength);
+}
+
+HParsedToken *
+act_longrun(const HParseResult *p, void *u)
+{
+	HParsedToken **elements = h_seq_elements(p->ast);
+	HParsedToken *res = H_MAKE_SEQ();
+
+	uint8_t length = H_CAST_UINT(elements[0]);
+	uint8_t data = H_CAST_UINT(elements[1]);
+
+	for (size_t len = 0; len < length; ++len)
+	{
+		h_seq_snoc(res, H_MAKE_UINT(data));
+	}
+
+	return res;
+}
+
+HParsedToken *
+act_rldstring(const HParseResult *p, void *u)
+{
+	const HParsedToken *flattened = h_seq_flatten(p->arena, p->ast);
+	HCountedArray *flattened_seq = H_CAST_SEQ(flattened);
+	size_t bytes_required;
+	uint8_t *result_bytes;
+
+	bytes_required = flattened_seq->used - 1;
+	result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * bytes_required);
+
+	for (size_t i = 0; i < flattened_seq->used-1; ++i)
+	{
+		result_bytes[i] = H_CAST_UINT(flattened_seq->elements[i]);
+	}
+
+	return H_MAKE_BYTES(result_bytes, bytes_required);
+}
+
+
+
+/*
+ * ********************************************************************
+ * Catalog parsing
+ * ********************************************************************
+ */
+HParsedToken *
+act_contentstream(const HParseResult *p, void *u)
+{
+//	HCountedArray *contents = H_FIELD_SEQ(0);
+//
+//	fprintf(stdout, "act_contentstream:: stream length = %ld\n", contents->used);
+	return (HParsedToken *)p->ast;
+}
+
+
+
+bool
+validate_pgcontents(HParseResult *p, void *u)
+{
+	return false;
+}
+
+HParsedToken *
+act_pgcontents(const HParseResult *p, void *u)
+{
+	return (HParsedToken *)p->ast;
+}
+
+
+HParsedToken *
+act_page(const HParseResult *p, void *u)
+{
+	return (HParsedToken *)p->ast;
+}
+
+
+
+/*
+ * ********************************************************************
+ * Start Text parsing
+ * ********************************************************************
+ */
+
+/*
+ * Pretty printer for text components of the ast
+ */
+void
+pp_textentry(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	TextEntry *txte = H_CAST(TextEntry, tok);
+
+	switch (txte->type) {
+	/*
+	 * Always pretty print the text show operators
+	 *
+	 * If TEXT_VERBOSE is set, pretty-print the other operators
+	 */
+#define TEXT_VERBOSE
+#ifdef TEXT_VERBOSE
+	case TS_Tf:
+		fprintf(stream, "Tf_op: fn=%.*s, fontsize=%d\n",
+				txte->fref.namelen, txte->fref.fontname, txte->fref.fontsize);
+		break;
+	case TP_Td:
+		fprintf(stream, "Td_op: text position ::tx=%3.3f:ty=%3.3f\n",
+				txte->pos.tx, txte->pos.ty);
+		break;
+#endif
+	case TW_Tj:
+		fprintf(stream, "%.*s\n", txte->tstr.nchars, txte->tstr.text);
+		break;
+	case TW_Tq:
+		fprintf(stream, "%.*s\n", txte->tstr.nchars, txte->tstr.text);
+		break;
+	case TW_Tqq:
+		fprintf(stream, "%.*s\n", txte->tstr.nchars, txte->tstr.text);
+		break;
+	case TW_TJ:
+		fprintf(stream, "%.*s\n", txte->tarray.flattened.nchars,
+				txte->tarray.flattened.text);
+		break;
+
+
+	default:
+		;
+	}
+}
+
+/*
+ * semantic actions
+ */
+
+
+
+/*
+ *  Simplify the code by casting the choice of integer number and real number to double
+ */
+bool
+validate_tnumb(HParseResult *p, void *u)
+{
+	assert((p->ast->token_type == TT_SINT) || (p->ast->token_type == TT_DOUBLE));
+
+	return true;
+}
+
+
+HParsedToken *
+act_tnumb(const HParseResult *p, void *u)
+{
+
+	double value;
+
+	if (p->ast->token_type == TT_SINT)  value = (double)p->ast->sint;
+	else                                value =         p->ast->dbl;
+
+	return H_MAKE_DOUBLE(value);
+}
+
+
+
+
+/*
+ * Text state operators - Table 105
+ *   TS_Tc, TS_Tw, TS_Tz, TS_TL, TS_Tf, TS_Tr, TS_Ts
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ */
+HParsedToken *
+act_Tc_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tc_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tval = H_INDEX_TOKEN(p->ast, 0);
+
+
+	txte->type  = TS_Tc;
+	txte->obj   = NULL;
+
+	assert(tval->token_type == TT_DOUBLE);
+	txte->value   = tval->dbl;
+
+	fprintf(stdout, "act_Tc_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * Tw operator: word spacing specification
+ * H_ARULE(Tw_op, SEQ(tnumb, ws, LIT("Tw")));  // 9.3.3 - wordSpace
+ */
+HParsedToken *
+act_Tw_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tw_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TS_Tw;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
+
+	fprintf(stdout, "act_Tw_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * Tz operator: horizintal scaling specification
+ * H_ARULE(Tz_op, SEQ(tnumb, ws, LIT("Tz")));  // 9.3.4 - horizontal scaling
+ */
+HParsedToken *
+act_Tz_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tz_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TS_Tz;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
+
+	fprintf(stdout, "act_Tz_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * TL operator: leading (line spacing) specification
+ * H_ARULE(TL_op, SEQ(tnumb, ws, LIT("TL")));  // 9.3.5 - leading
+ */
+HParsedToken *
+act_TL_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_TL_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TS_TL;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
+
+	fprintf(stdout, "act_TL_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ *  Font name and size specification
+ *  H_ARULE(Tf_op, SEQ(name, ws, nat, ws, KW("Tf"), ws));  // font and size
+ *
+ *  TODO: Verify that the name is specified in the resource dictionary
+ */
+HParsedToken *
+act_Tf_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tf_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *fn_token = H_FIELD_TOKEN(0);
+
+	txte->type  = TS_Tf;
+	txte->obj   = NULL;
+
+	txte->fref.fontname = (char *)fn_token->bytes.token;
+	txte->fref.namelen  = fn_token->bytes.len;
+	txte->fref.fontsize = H_FIELD_UINT(1);
+
+	fprintf(stdout, "act_Tf_op: fn=%.*s, fontsize=%d\n",
+			txte->fref.namelen, txte->fref.fontname, txte->fref.fontsize);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * Tr operator: rendering mode
+ * H_VRULE(tmode, nat);                        // True if <= 7
+ * H_ARULE(Tr_op, SEQ(tmode, ws, LIT("Tr")));  // 9.3.6 - rendering mode
+ *
+ *
+ */
+#define TEXTMODE_MAX 7
+
+bool
+validate_tmode(HParseResult *p, void *u)
+{
+	return H_CAST_UINT(p->ast) <= TEXTMODE_MAX;
+}
+
+HParsedToken *
+act_Tr_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tr_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TS_Tr;
+	txte->obj   = NULL;
+	txte->mode  = H_FIELD_UINT(0);
+
+	fprintf(stdout, "act_Tr_op:: %d\n", txte->mode);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * Ts operator: rise specification
+ * H_ARULE(Ts_op, SEQ(tnumb, ws, LIT("Ts")));  // rise
+ */
+HParsedToken *
+act_Ts_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Ts_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TS_Ts;
+	txte->obj   = NULL;
+	txte->value = H_FIELD_DOUBLE(0);
+
+	fprintf(stdout, "act_Ts_op:: %3.3f\n", txte->value);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * 9.4.2 - Text positioning operators - Table 108
+ *   TP_Td, TP_TD, TP_Tm, TP_Tstar
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ * TP_Td: String position - Translation specification
+ * H_ARULE(Td_op, SEQ(tnumb, ws, tnumb, ws, LIT("Td"), ws));   // move to next line with offset
+ */
+HParsedToken *
+act_Td_op(const HParseResult *p, void *u)
+{
+	fprintf(stdout, "act_Td_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type   = TP_Td;
+	txte->obj    = NULL;
+	txte->pos.tx = H_FIELD_DOUBLE(0);
+	txte->pos.ty = H_FIELD_DOUBLE(1);
+
+	fprintf(stdout, "act_Td_op: text position ::tx=%.3f:ty=%.3f\n",
+			txte->pos.tx, txte->pos.ty);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+
+/*
+ * TP_TD: Offset to next line and set the leading parameter state
+ * H_ARULE(TD_op, SEQ(tnumb, ws, tnumb, ws, LIT("TD")));               // move to next line with offset and set state
+ */
+HParsedToken *
+act_TD_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_TD_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type   = TP_TD;
+	txte->obj    = NULL;
+	txte->pos.tx = H_FIELD_DOUBLE(0);
+	txte->pos.ty = H_FIELD_DOUBLE(1);
+
+	fprintf(stdout, "act_TD_op: text position ::tx=%3.3f:ty=%3.3f\n", txte->pos.tx, txte->pos.ty);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TP_Tm: Text matrix specification
+ * H_ARULE(Tm_op, SEQ(REP(SEQ(tnumb, ws), 6), LIT("Tm"), ws));    // set text matrix
+ */
+HParsedToken *
+act_Tm_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tm_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+
+	txte->type  = TP_Tm;
+	txte->obj   = NULL;
+
+	assert((p->ast->token_type == TT_SEQUENCE) &&
+			(p->ast->seq->elements[0]->token_type == TT_SEQUENCE) &&
+			(p->ast->seq->elements[0]->seq->used == 6));
+	for (int i=0; i<6; i++)
+
+		txte->fm.cell[i] = p->ast->seq->elements[0]->seq->elements[i]->seq->elements[0]->dbl;
+
+	fprintf(stdout, "act_Tm_op: text matrix ::\n");
+	for (int i=0; i<3; i++)
+		fprintf(stdout, "%3.3f : %3.3f\n", txte->fm.cell[i*2], txte->fm.cell[i*2+1]);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TP_Tstar: Move to the next line
+ * H_ARULE(Tstar_op, SEQ(LIT("T*"), ws));                     // move to next line
+ */
+HParsedToken *
+act_Tstar_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tstar_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+
+	txte->type  = TP_Tstar;
+	txte->obj   = NULL;
+	txte->value = 0;
+
+	fprintf(stdout, "act_Tstar_op: position pointer\n");
+
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+/*
+ * 9.4.3 - Text showing operators - Table 109
+ *   TW_Tj, TW_Tq, TW_Tqq, TW_TJ
+ *
+ * *****************************************************************
+ * *****************************************************************
+ *
+ * TW_Tj: Show string
+ * H_ARULE(Tj_op, SEQ(string, ws, LIT("Tj"), ws));          // show text string
+ */
+HParsedToken *
+act_Tj_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_Tj_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tstr = H_INDEX_TOKEN(p->ast, 0);
+
+
+	txte->type  = TW_Tj;
+	txte->obj   = NULL;
+
+	txte->tstr.text   = (char *)tstr->bytes.token;
+	txte->tstr.nchars = tstr->bytes.len;
+
+	fprintf(stdout, "act_Tj_op:: %.*s\n", txte->tstr.nchars, txte->tstr.text);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TW_Tq: Offset to next line then show string
+ * H_ARULE(TsingleQ_op, SEQ(string, ws, LIT(quote), ws));   // Move to next line and show text
+ */
+HParsedToken *
+act_TsingleQ_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_TsingleQ_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tstr = H_INDEX_TOKEN(p->ast, 0);
+
+
+	txte->type  = TW_Tq;
+	txte->obj   = NULL;
+
+	txte->tstr.text   = (char *)tstr->bytes.token;
+	txte->tstr.nchars = tstr->bytes.len;
+
+	fprintf(stdout, "act_TsingleQ_op:: %.*s\n", txte->tstr.nchars, txte->tstr.text);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+/*
+ * TW_Tqq: Offset to next line then show string, apply formatting specifications
+ * H_ARULE(TdoubleQ_op, SEQ(tnumb, ws, tnumb, ws, string, ws, LIT(dquote), ws)); // Move to next line and show formatted text
+ *
+ */
+HParsedToken *
+act_TdoubleQ_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_TdoubleQ_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *aw = H_INDEX_TOKEN(p->ast, 0);
+	const HParsedToken *ac = H_INDEX_TOKEN(p->ast, 1);
+	const HParsedToken *tstr = H_INDEX_TOKEN(p->ast, 2);
+
+
+	txte->type  = TW_Tqq;
+	txte->obj   = NULL;
+
+	txte->twfmt.aw          = aw->dbl;
+	txte->twfmt.ac          = ac->dbl;
+	txte->twfmt.tstr.text   = (char *)tstr->bytes.token;
+	txte->twfmt.tstr.nchars = tstr->bytes.len;
+
+	fprintf(stdout, "act_TdoubleQ_op:: aw=%3.3f, ac=%3.3f\n", txte->twfmt.aw, txte->twfmt.ac);
+	fprintf(stdout, "act_TdoubleQ_op:: %.*s\n", txte->twfmt.tstr.nchars, txte->twfmt.tstr.text);
+
+	return H_MAKE(TextEntry, txte);
+}
+
+/*
+ * TW_TJ: Show array of strings, with potentially re-positioning specifications for each string
+ * H_RULE(TArr_elem, SEQ(OPT(SEQ(nanumbs)), string, ws))
+ * H_ARULE(TJ_op, SEQ(h_many(TArr_elem), LIT("TJ"), ws));  // show one or more text strings
+ *
+ * TODO:: Implement the array parser
+ */
+HParsedToken *
+act_TJ_op(const HParseResult *p, void *u)
+{
+
+	fprintf(stdout, "act_TJ_op:: Here\n");
+
+	TextEntry          *txte = H_ALLOC(TextEntry);
+	const HParsedToken *tarr = H_INDEX_TOKEN(p->ast, 0);
+
+	txte->type  = TW_TJ;
+	txte->obj   = NULL;
+
+
+	/*
+	 * Parse each element of the array
+	 * Build up the pointers to each of the string pieces
+	 */
+	txte->tarray.nelts = tarr->seq->used;
+	txte->tarray.elts  = h_arena_malloc(p->arena, sizeof(struct tarrayelt) * txte->tarray.nelts);
+	txte->tarray.flattened.nchars = 0;
+
+	for (int i=0; i<txte->tarray.nelts; i++) {
+		const HParsedToken *elt = tarr->seq->elements[i];
+		assert( (elt->token_type == TT_SEQUENCE) && (elt->seq->used == 1) );
+		switch (elt->seq->elements[0]->token_type) {
+		case TT_DOUBLE:
+			txte->tarray.elts[i].adj         = elt->seq->elements[0]->dbl;
+			txte->tarray.elts[i].isStr       = false;
+			break;
+		case TT_BYTES:
+			txte->tarray.elts[i].tstr.text   = (char *)elt->seq->elements[0]->bytes.token;
+			txte->tarray.elts[i].tstr.nchars = elt->seq->elements[0]->bytes.len;
+			txte->tarray.elts[i].isStr       = true;
+			txte->tarray.flattened.nchars   += txte->tarray.elts[i].tstr.nchars;
+			// Debug
+//			fprintf(stdout, "act_TJ_op:Cumulative=%d/0x%x bytes,   Additional:%d bytes\n",
+//					txte->tarray.flattened.nchars, txte->tarray.flattened.nchars, txte->tarray.elts[i].tstr.nchars);
+			break;
+		default:
+			fprintf(stderr, "act_TJ_op:: Unexpected element type :: %d\n", elt->seq->elements[0]->token_type);
+			fflush(stderr);
+			assert(false);
+		}
+	}
+
+	/* hold on to a flattened copy of the string */
+	txte->tarray.flattened.text = h_arena_malloc(p->arena, sizeof(char) * txte->tarray.flattened.nchars);
+	int j = 0; // current index
+	for (int i=0; i<txte->tarray.nelts; i++) {
+		if (txte->tarray.elts[i].isStr) {
+			// Debug
+//			fprintf(stdout, "act_TJ_op:Start=%p-%d/0x%xbytes,   Writing to:%p-%dbytes\n",
+//					(void *)txte->tarray.flattened.text, txte->tarray.flattened.nchars, txte->tarray.flattened.nchars,
+//					(void *)&txte->tarray.flattened.text[j], txte->tarray.elts[i].tstr.nchars);
+//			fprintf(stdout, "act_TJ_op: %.*s\n", txte->tarray.elts[i].tstr.nchars, txte->tarray.elts[i].tstr.text);
+			memcpy(&txte->tarray.flattened.text[j], txte->tarray.elts[i].tstr.text, txte->tarray.elts[i].tstr.nchars);
+			j += txte->tarray.elts[i].tstr.nchars;
+		}
+	}
+
+	fprintf(stdout, "act_TJ_op:: %.*s\n", txte->tarray.flattened.nchars, txte->tarray.flattened.text);
+	return H_MAKE(TextEntry, txte);
+}
+
+
+
+
+/*
+ * Parse the text object delimited by "BT" and "ET"
+ */
+HParsedToken *
+act_txtobj(const HParseResult *p, void *u)
+{
+	fprintf(stdout, "act_txtobj:: Here\n");
+
+	assert(p->ast->token_type == TT_SEQUENCE);
+
+	TextEntry          *txtobj = H_ALLOC(TextEntry);
+	const HParsedToken *opstream = H_INDEX_TOKEN(p->ast, p->ast->seq->used-1);
+	const HParsedToken *tt_text=NULL;
+	char *tstr=NULL;
+	int textlen=0;
+
+
+	fprintf(stdout, "act_txtobj:: numtokens = %lu\n", opstream->seq->used);
+
+	// Walk through the tokens to determine how much space to allocate
+	// Count the number of characters in the stream
+	// Concatenate the text into the allocated space
+	for (int i =0; i < opstream->seq->used; i++) {
+
+		TextEntry *txte = H_CAST(TextEntry, opstream->seq->elements[i]);
+		// Process the text showing operators
+		switch (txte->type) {
+		case TW_TJ:
+			textlen += txte->tarray.flattened.nchars;
+			break;
+
+		case TW_Tj:
+		case TW_Tq:
+		case TW_Tqq:
+			textlen += txte->tstr.nchars;
+			break;
+		default:
+			; // ignore
+		}
+	}
+	tstr = h_arena_malloc(p->arena, sizeof(uint8_t) * textlen);
+	int idx=0;
+	// Now concatenate the pieces
+	for (int i =0; i < opstream->seq->used; i++) {
+		TextEntry *txte = H_CAST(TextEntry, opstream->seq->elements[i]);
+
+		// Process the text showing operators
+		// Process the text showing operators
+		switch (txte->type) {
+		case TW_TJ:
+			memcpy(&tstr[idx], txte->tarray.flattened.text, txte->tarray.flattened.nchars);
+			idx += txte->tarray.flattened.nchars;
+			break;
+
+		case TW_Tj:
+		case TW_Tq:
+		case TW_Tqq:
+			memcpy(&tstr[idx], txte->tstr.text, txte->tstr.nchars);
+			idx += txte->tstr.nchars;
+			break;
+		default:
+			; // ignore
+		}
+	}
+	assert(idx == textlen);
+
+	txtobj->type  = TW_TJ;
+	txtobj->obj   = NULL;
+	txtobj->tarray.flattened.text   = tstr;
+	txtobj->tarray.flattened.nchars = textlen;
+	// pretty print the information
+	tt_text = H_MAKE(TextEntry, txtobj);
+	pp_textentry(stdout, tt_text, 0, 0);
+
+	return (HParsedToken *)tt_text;
+}
+
+
+
+
 
-#if 0
-	if (v == NULL)
-		fprintf(stderr, "stream dict has no /Type\n");
-	else if (v->token_type != TT_BYTES)
-		fprintf(stderr, "stream /Type is no name object\n");
-	else if (bytes_eq(v->bytes, "XRef"))
-		return true;
-	return false;
-#endif	// XXX this block can be removed
 
-	return (v != NULL && v->token_type == TT_BYTES &&
-	    bytes_eq(v->bytes, "XRef"));
-}
 
-HParsedToken *
-act_dict_(const HParseResult *p, void *env)
-{
-	Dict *dict = H_CAST_SEQ(p->ast);
 
-	return H_MAKE(Dict, dict);
-}
 
-#define act_array_ h_act_flatten
 
-HParsedToken *
-act_shortlength(const HParseResult *p, void *u)
+
+// Utility -- Handles simplistic approach to UTF-16
+char convert2char(unsigned int b1)
 {
-	uint8_t length = H_CAST_UINT(p->ast);
-	/* Length can range from 0-127, corresponding to the range 1-128, inclusive */
-	uint8_t finallength = length+1;
+	char val;
 
-	return H_MAKE_UINT(finallength);
+	if (b1 == 0)
+	{
+		val = '?';
+	}
+	else if ( (b1 < 20) || ( b1 > 127 ) )
+	{
+		fprintf(stdout, " 0X%02X ", b1);
+		val = '?';
+	}
+	else
+	{
+		val = b1;
+		fprintf(stdout, "%c", val);
+	}
+	return val;
 }
 
-HParsedToken *
-act_longlength(const HParseResult *p, void *u)
-{
-	uint8_t length = H_CAST_UINT(p->ast);
-	uint8_t finallength = 257-length;
 
-	return H_MAKE_UINT(finallength);
-}
+// *********************************************************************
+// DEBUG
 
 HParsedToken *
-act_longrun(const HParseResult *p, void *u)
+act_txtbegin_(const HParseResult *p, void *u)
 {
-	HParsedToken **elements = h_seq_elements(p->ast);
-	HParsedToken *res = H_MAKE_SEQ();
-
-	uint8_t length = H_CAST_UINT(elements[0]);
-	uint8_t data = H_CAST_UINT(elements[1]);
+  const HParsedToken *tok=p->ast;
 
-	for (size_t len = 0; len < length; ++len)
-	{
-		h_seq_snoc(res, H_MAKE_UINT(data));
-	}
+  fprintf(stdout, "act_txtbegin:: Here %lx\n", (long unsigned int)tok);
 
-	return res;
+  return (HParsedToken *)tok;
 }
-
 HParsedToken *
-act_rldstring(const HParseResult *p, void *u)
+act_txtend(const HParseResult *p, void *u)
 {
-	const HParsedToken *flattened = h_seq_flatten(p->arena, p->ast);
-	HCountedArray *flattened_seq = H_CAST_SEQ(flattened);
-	size_t bytes_required;
-	uint8_t *result_bytes;
-
-	bytes_required = flattened_seq->used - 1;
-	result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * bytes_required);
 
-	for (size_t i = 0; i < flattened_seq->used-1; ++i)
-	{
-		result_bytes[i] = H_CAST_UINT(flattened_seq->elements[i]);
-	}
+  fprintf(stdout, "act_txtend:: Here\n");
 
-	return H_MAKE_BYTES(result_bytes, bytes_required);
+  return (HParsedToken *)p->ast;
 }
 
+
+/*
+ * ********************************************************************
+ * End Text parsing
+ * ********************************************************************
+ */
+
+
 /*
  * input grammar
  */
@@ -951,9 +1786,20 @@ HParser *p_wel;
 HParser *p_elemr;
 HParser *p_npair;
 
+/*
+ * Parsers for text streams
+ */
+HParser *p_textbegin;
+HParser *p_textstream;
+HParser *p_trailer;
+HParser *p_page;
+
+
 /* continuations for h_bind() */
 HParser *kstream(HAllocator *, const HParsedToken *, void *);
 HParser *kxstream(HAllocator *, const HParsedToken *, void *);
+HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
+HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
 
 void
 init_runlengthdecode_parser(struct Env *aux)
@@ -977,9 +1823,9 @@ void
 init_parser(struct Env *aux)
 {
 	TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
-	TT_XREntry =	h_allocate_token_new("XREntry", NULL, pp_xrentry);
-	TT_Ref =	h_allocate_token_new("Ref", NULL, pp_ref);
-	TT_Dict =	h_allocate_token_new("Dict", NULL, pp_dict);
+	TT_XREntry      = h_allocate_token_new("XREntry", NULL, pp_xrentry);
+	TT_Ref          = h_allocate_token_new("Ref", NULL, pp_ref);
+	TT_Dict         = h_allocate_token_new("Dict", NULL, pp_dict);
 
 	/* lines */
 	H_RULE(cr,	p_mapch('\r', '\n'));	/* semantic value: \n */
@@ -1041,6 +1887,13 @@ init_parser(struct Env *aux)
 	H_VRULE(pnat,	nat);
 	H_RULE(npair,	SEQ(pnat, wel,ws, nat));
 
+	/* Whitespace can occur between any digit and has to be ignored, */
+	/* Comments are not allowed inside streams, and % character should cause
+	 * a parse error. */
+	H_RULE(aws,	IGN(h_many(wchar)));  // all white space, include CR & LF, but not comments
+	#define MANY_AWS(X) h_many(CHX(aws, X))
+
+
 	/*
 	 * objects
 	 */
@@ -1225,16 +2078,12 @@ init_parser(struct Env *aux)
 	/* debug parser to consume as much as possible */
 	H_RULE(pdfdbg,	SEQ(OPT(start_junk), header, OPT(hdr_junk), h_many(tail), body, OPT(xr_td), OPT(SEQ(startxr, final_eof_junk))));
 
+
+
+
 	/*
 	 * filters
 	 */
-	/* Whitespace can occur between any digit and has to be ignored, */
-	/* Comments are not allowed inside streams, and % character should cause
-	 * a parse error. */
-	H_RULE(aws,	IGN(h_many(wchar)));  // all white space, include CR & LF, but not comments
-	#define MANY_AWS(X) h_many(CHX(aws, X))
-
-
 
 	/* Ascii85Decode */
 	H_RULE(a85eod,	SEQ(h_ch('~'), aws, h_ch('>')));
@@ -1246,7 +2095,7 @@ init_parser(struct Env *aux)
 	 * a parse error. */
 	#define MANY_LWS(X) h_many(CHX(lws, X))
 
-	/* This encoding of zero is not allowed */
+	/* Encoding of zero is not allowed */
 	// Folded the test for a85fiveexcl into the validation component
 	H_VARULE(a85fivedigits,	h_repeat_n(SEQ(a85digit, aws), 5));
 	// TODO:: will need to pull out error conditions -- a85fiveexcl or 'z' as one of the digits
@@ -1270,6 +2119,73 @@ init_parser(struct Env *aux)
 	init_runlengthdecode_parser(aux);
 
 
+	// ==========================================================================
+	/*
+	 * Text Objects Extraction - embedded in content streams
+	 *
+	 */
+	// ==========================================================================
+	/*                                                                             \
+	 * Text Objects Extraction - embedded in content streams                       \
+	 */                                                                            \
+                                                                                   \
+    H_RULE(txtbegin, h_indirect());                                                \
+    H_RULE(txt_before_junk, IGN(SEQ(h_not(LIT("BT")), CHX(comment, h_uint8()))));                 \
+	H_ARULE(txtbegin_, SEQ(IGN(h_many(txt_before_junk)), LIT("BT"), aws));                       \
+	h_bind_indirect(txtbegin, txtbegin_);                                          \
+	H_ARULE(txtend, KW("ET"));                                                     \
+	/* 9.3 - Text state operators */                                                        \
+    H_AVRULE(tnumb, numb);                                                                   \
+	H_ARULE(Tc_op, SEQ(tnumb, aws, LIT("Tc"), aws));  /* 9.3.2 - charSpace */                \
+	H_ARULE(Tw_op, SEQ(tnumb, aws, LIT("Tw"), aws));  /* 9.3.3 - wordSpace */                \
+	H_ARULE(Tz_op, SEQ(tnumb, aws, LIT("Tz"), aws));  /* 9.3.4 - horizontal scaling */       \
+	H_ARULE(TL_op, SEQ(tnumb, aws, LIT("TL"), aws));  /* 9.3.5 - leading */                  \
+	H_ARULE(Tf_op, SEQ(name, aws, nat, aws, KW("Tf"), aws));  /* font and size */     \
+	/* TDO: must map to an existing font dictionary */                                \
+	H_VRULE(tmode, nat);                        /* True if <= 7 */                    \
+	H_ARULE(Tr_op, SEQ(tmode, aws, LIT("Tr"), aws));  /* 9.3.6 - rendering mode */         \
+	H_ARULE(Ts_op, SEQ(tnumb, aws, LIT("Ts"), aws));  /* rise */                             \
+	H_RULE(textstate_ops, CHX(Tc_op, Tw_op, Tz_op, TL_op, Tf_op, Tr_op, Ts_op));    \
+                                                                                   \
+	/* 9.4.2 - Text positioning operators */                                          \
+	H_ARULE(Td_op, SEQ(tnumb, aws, tnumb, aws, LIT("Td"), aws));      /* move to next line with offset */                \
+	H_ARULE(TD_op, SEQ(tnumb, aws, tnumb, aws, LIT("TD"), aws));      /* move to next line with offset and set state */  \
+	H_ARULE(Tm_op, SEQ(REP(SEQ(tnumb, aws), 6), LIT("Tm"), aws));  /* set text matrix */                               \
+	H_ARULE(Tstar_op, SEQ(LIT("T*"), aws));                     /* move to next line */                              \
+	H_RULE(textpos_ops, CHX(Td_op, TD_op, Tm_op, Tstar_op));                       \
+                                                                                   \
+	/* 9.4.3 - Text showing operators */                                              \
+	H_RULE(quote,	h_ch('\''));                                                   \
+	H_RULE(dquote,	h_ch('"'));                                                    \
+	H_ARULE(Tj_op, SEQ(string, aws, LIT("Tj"), aws));          /* show text string */ \
+	H_ARULE(TsingleQ_op, SEQ(string, aws, quote, aws));   /* Move to next line and show text */                \
+	H_ARULE(TdoubleQ_op, SEQ(tnumb, aws, tnumb, aws, string, aws, dquote, aws)); /* Move to next line and show formatted text */ \
+	H_RULE(TArr_elem, SEQ(CHX(tnumb, string), aws));                       \
+	H_ARULE(TJ_op, SEQ(IGN(lbrack), aws, h_many(TArr_elem), IGN(rbrack), aws, LIT("TJ"), aws));      /* show one or more text strings */ \
+	H_RULE(textshow_ops, CHX(Tj_op, TsingleQ_op, TdoubleQ_op, TJ_op));             \
+                                                                                   \
+    H_RULE(text_inbetween_junk, IGN(SEQ(h_not(txtend), h_uint8())));                \
+                                                                                   \
+    H_RULE(text_ops, CHX(textstate_ops, textpos_ops, textshow_ops, text_inbetween_junk));               \
+                                                                                   \
+	/* Text object */                                                              \
+	H_ARULE(txtobj, SEQ(txtbegin, h_many(text_ops), txtend));   \
+                                                                                   \
+	/* text streams */                                                             \
+	H_RULE(txtstream, h_bind(h_many1(txtobj), ktxtstream, aux));                   \
+                                                                                   \
+                                                                                   \
+	// Page Tree
+	H_ARULE(contentstream, h_middle(stmbeg, h_many1(h_uint8()), stmend));
+//	H_ARULE(contentstream, h_middle(stmbeg, h_many(SEQ(h_not(stmend), h_uint8())), stmend));
+	H_ARULE(pgcontents, CHX(array, contentstream));
+	H_ARULE(page, SEQ(ws, npair, wel, KW("obj"), ws, pgcontents,
+			OPT(ws), OPT(lws), KW("endobj")));
+//	H_ARULE(page, CHX(ref, array));
+	p_page = page;
+
+
+
 	/* global parser variables */
 	p_pdf = pdf;
 	p_pdfdbg = pdfdbg;
@@ -1283,6 +2199,10 @@ init_parser(struct Env *aux)
 	p_elemr = h_action(elemr, h_act_flatten, NULL);
 	p_npair = npair;
 
+	/* text parser variables */                                                  \
+	p_textbegin  = txtbegin;                                                       \
+	p_textstream = txtstream;                                                      \
+
 	p_fail = h_nothing_p();
 	p_epsilon = epsilon;
 	p_return_0 = h_action(epsilon, act_return_uint, (void *)0);
@@ -1455,7 +2375,6 @@ resolve(struct Env *aux, const HParsedToken *v)
  */
 
 #include <limits.h>	/* INT_MAX */
-#include <stdlib.h>	/* abs() */
 #include <zlib.h>
 #include <err.h>
 
@@ -1883,6 +2802,7 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 	done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
 	assert(!done);	// XXX ITERATIVE
 
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
 	res = h_parse(p, pred.out, pred.nout);
 	free(pred.out);
 
@@ -1905,6 +2825,7 @@ RunLengthDecode(const Dict *parms, HBytes b, HParser *p)
 	}
 
 	assert(res->ast && res->ast->token_type == TT_BYTES);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
 	res = h_parse(p, res->ast->bytes.token, res->ast->bytes.len);
 
 	return res;
@@ -1932,6 +2853,7 @@ ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
 	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
 	fprintf(stdout, "ASCIIHexDecode::string = [%.*s]\n",
 			(int)f_res->ast->bytes.len, (char*)f_res->ast->bytes.token);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
 	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
 
 	if (res == NULL)
@@ -1957,6 +2879,7 @@ ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
 	}
 
 	assert(f_res->ast && f_res->ast->token_type == TT_BYTES);
+	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
 	res = h_parse(p, f_res->ast->bytes.token, f_res->ast->bytes.len);
 
 	if (res == NULL)
@@ -2002,8 +2925,11 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 		filter = RunLengthDecode;
 	else if (bytes_eq(v->bytes, "LZWDecode"))
 		filter = LZWDecode;
-	else
-		return NULL;		/* filter not supported */
+	else {		/* filter not supported */
+		fprintf(stderr, "decode_stream:: Unsupported Filter [%.*s\n]",
+				(int)v->bytes.len, v->bytes.token);
+		return NULL; /* Treat the stream as a byte array */
+	}
 
 	v = dictentry(d, "DecodeParms");
 	if (v && v->token_type == TT_Dict)
@@ -2039,6 +2965,9 @@ act_take_bytes(const HParseResult *p, void *env)
 	 * allocated only temporarily for the lifetime of the continuation
 	 * below.
 	 */
+	// DEBUG
+	fprintf (stdout, "act_take_bytes: Current position (bytes)= %p, len=%ld\n",
+			(void *)bs->token + offset, bs->len);
 	return H_MAKE_BYTES(bs->token + offset, bs->len);
 }
 
@@ -2078,7 +3007,14 @@ p_stream_data__m(HAllocator *mm__, const Dict *dict)
 	if (bytes_eq(v->bytes, "ObjStm"))
 		return p_objstm__m(mm__, dict);
 #endif
-
+	if (bytes_eq(v->bytes, "XObject")) {
+		/*
+		 *  TODO:: external objects can be images, forms, or postscript objects
+		 *  We are not handling them at the moment
+		 */
+		fprintf (stdout, "p_stream_data__m: XObject parsing is not yet supported!\n");
+		return NULL;
+	}
 	return NULL;					/* unrecognized type */
 }
 
@@ -2099,8 +3035,10 @@ act_ks_value(const HParseResult *p, void *u)
 	if (res == NULL) {
 		HBytes b = {NULL, 0};
 		const HParsedToken *v = dictentry(spec->dict, "Type");
-		if (v != NULL && v->token_type == TT_BYTES)
-			b = v->bytes;
+		if (v != NULL && v->token_type == TT_BYTES) {
+			b.token = v->bytes.token;
+			b.len   = v->bytes.len;
+		}
 		if (b.len > INT_MAX)
 			b.len = INT_MAX;
 		fprintf(stderr, "parse error in stream (%*s)\n",
@@ -2391,6 +3329,384 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 }
 
 
+
+
+
+/*
+ * This continuation takes the text stream and saves it in the environment for further
+ * processing, e.g. writing it out to a file with the same name as the pdf input filename
+ * but woth a .psectxt suffix.
+ * It does not consume the string and returns the token as the output.
+ *
+ * x = (txtobj ...)
+ */
+HParser *
+ktxtstream(HAllocator *mm__, const HParsedToken *x, void *env)
+{
+
+	struct Env *aux = env;
+#if 0
+	if (x->token_type != TT_TextEntry) {
+		fprintf(
+				stderr,
+				"ktxtstream:: Unexpected token type =%d :: (Expected TT_TextEntry)\n",
+				x->token_type);
+		assert(x->token_type == TT_TextEntry);
+		return NULL;
+	}
+#endif
+	assert (x->token_type == TT_SEQUENCE);
+	int n_tobjs = x->seq->used;
+
+	for (int n=0; n<n_tobjs; n++) {
+
+		assert(x->seq->elements[n]->token_type == TT_TextEntry);
+		TextEntry *tste = H_CAST(TextEntry, x->seq->elements[n]);
+		struct textstr *tstr = NULL;
+		/*
+		 *  To save all of the operators along with the text string, we have to walk
+		 *  through all of the tokens and keep a table of pointers to them
+		 *  For now, just keep a pointer to the text string in the environment
+		 *
+		 */
+		switch (tste->type) {
+		case TW_Tj:
+		case TW_Tq:
+		case TW_Tqq:
+			tstr = &tste->tstr;
+			break;
+		case TW_TJ:
+			tstr = &tste->tarray.flattened;
+			break;
+		default:
+			fprintf(stderr, "ktxtstream:: Text token type '%u' ignored\n",
+					tste->type);
+		}
+
+		fprintf(stdout, "ktxtstream: Value = %.*s\n", tstr->nchars, tstr->text);
+
+
+		// store the string in the environment
+		// not sure whether we need to actually store the string in malloc'ed area
+		// currently, we are reusing the token memory previously created
+		struct textnode *txtnd = (struct textnode *) malloc(
+				sizeof(struct textnode));
+		txtnd->tstr = tstr;
+		txtnd->next = NULL;
+		if (aux->txthead == NULL)
+			aux->txthead = txtnd;
+		if (aux->txttail == NULL)
+			aux->txttail = txtnd;
+		else {
+			aux->txttail->next = txtnd;
+			aux->txttail       = txtnd;
+		}
+		aux->ntextobjs += 1;
+
+	}
+
+	return p_return__m(mm__, x);
+}
+
+
+
+/*
+ * ********************************************************************
+ * Start Catalog parsing
+ * ********************************************************************
+ */
+
+void parse_pagenode(
+		struct Env         *aux,
+		PtNode_S           *pgNode   // node
+		)
+{
+
+	Dict               *pageD       = pgNode->pn.page;
+	const HParsedToken *contents_t  = NULL; // dictionary token
+	Ref                *contents_r  = NULL;
+//	const HParsedToken *contents    = NULL; // resolved token
+	XREntry            *ent         = NULL;
+	HParseResult       *res         = NULL;
+
+
+	// Hold on to the Resources dictionary
+	// This dictionary may be empty
+	// If there is no dictionary ==> inherit resources from parent
+	// TODO:: Capture a list of fonts associated with this page
+	pgNode->pn.resources = dictentry(pageD, "Resources");
+
+
+	// Process the contents stream or array (this will need a parser)
+	contents_t = dictentry(pageD, "Contents");
+	if (contents_t == NULL) {
+			fprintf(stderr, "parse_pagenode: Page node without contents!\n");
+			goto end;
+		}
+	else if (contents_t->token_type == TT_Ref) {
+		contents_r = H_CAST(Ref, contents_t);
+		ent        = lookup_xref(aux, contents_r->nr, contents_r->gen);
+		if (ent->type == XR_INUSE) {
+			size_t offset = ent->n.offs;
+			fprintf (stdout, "parse_pagenode:: Offset = %ld\n", offset);
+			res = h_parse(p_page, aux->input + offset, aux->sz - offset);
+			fprintf (stdout, "parse_pagenode:: res = %p\n", (void *) res);
+		}
+//		contents   = resolve(aux, contents_t);
+	}
+	else {
+		fprintf(stderr, "parse_pagenode: Page node is not a reference ... may be an array!\n");
+		// TODO:: Haven't handle this case
+		goto end;
+	}
+
+//	fprintf(stdout, "parse_pagenode:: Contents token type = %d\n",
+//			contents->token_type);
+
+
+	end:
+	return;
+}
+
+
+
+/*
+ * This helper implements the standard backwards parsing strategy to read
+ * the trailer dictionaries found at the very end of the input.
+ *
+ * It then follows the catalog dictionary to enumerate the pages in the pdf file
+ * identifying text streams and contents streams, saving the information to support
+ * text extraction in the environment structure.
+ *
+ *
+ * A return value of false indicates some parsing error.
+*/
+// need to maintain information about pages
+size_t
+parse_pagetree(
+		struct Env         *aux,
+		PtNode_S           *treeNode,
+		const HParsedToken *ptRef_t,     // my page tree node reference
+		const HParsedToken *kids,        // my kids
+		size_t              curr
+		)
+{
+
+	PtNode_S           *node      = NULL;
+	const HParsedToken *kidRef    = NULL; // page tree or page node reference
+	const HParsedToken *kidDict_t = NULL;
+	Dict               *kidDict   = NULL;
+	const HParsedToken *nType     = NULL; // node type
+	const HParsedToken *meRef_t   = NULL; // this page tree node
+	const HParsedToken *grandKids = NULL;
+	const HParsedToken *item      = NULL;
+	size_t              npages    = 0;
+    Ref     *ptRef=NULL, *meRef=NULL;
+
+
+
+	// get the kids (pgTable)
+	HCountedArray  *pgTable = H_CAST_SEQ(kids);
+	size_t          pgtSz   = pgTable->used;
+	if (curr + pgtSz > aux->catalog.pgCount) {
+		fprintf(stderr, "parse_pagetree: More kids then specified leaves!\n");
+		// TODO:: probably just a warning is enough here -- run the VIOL parser?
+	}
+	treeNode->kids = (PtNode_S*)h_arena_malloc(pgTable->arena, pgtSz * sizeof(PtNode_S));
+
+
+	// Process the kids
+	for (int i=0; i<pgtSz; i++)
+	{
+		node      = &treeNode->kids[i];
+		kidRef    = pgTable->elements[i];
+		kidDict_t = resolve(aux, kidRef);     // page or tree node dictionary token
+		kidDict   = H_CAST(Dict, kidDict_t);  // page or tree node dictionary
+
+
+		// check if this is a page node or tree node
+		// if tree node, call parse_pagetree recursively
+		// otherwise, process the page
+		nType = dictentry(kidDict, "Type");
+		if ( (nType == NULL) || (nType->token_type != TT_BYTES)  ) {
+			fprintf(stderr, "parse_pagetree: Not a page or page tree node!\n");
+			// TODO:: Call VIOL
+			assert(nType->token_type == TT_BYTES);
+		}
+
+		// Parent is a required field, for every page & page tree node except root
+		// this will be passed in to the child
+		meRef_t = dictentry(kidDict, "Parent");
+		if ( (meRef_t == NULL) || (meRef_t->token_type != TT_Ref) ) {
+			fprintf(stderr, "parse_pagetree: Invalid parent tree node!\n");
+			// TODO:: Call VIOL
+		}
+		else {
+			meRef   = H_CAST(Ref, meRef_t);
+			ptRef   = H_CAST(Ref, ptRef_t);
+			if ( (meRef->nr != ptRef->nr) || (meRef->gen != ptRef->gen) ) {
+				fprintf(stderr, "parse_pagetree: I am not the parent of my child! "
+						"[kids parent = (%ld, %ld),  me = (%ld, %ld)]\n",
+						meRef->nr, meRef->gen, ptRef->nr, ptRef->gen);
+				// TODO -- Use VIOL
+				goto end;
+			}
+		}
+
+		// the child node is a page tree node
+		if (bytes_eq(nType->bytes, "Pages")) { // tree node
+			/* resolve and process the page tree root reference to extract the dictionary --> Page Tree Object */
+			grandKids   = dictentry(kidDict, "Kids");
+			if (grandKids == NULL) {
+				fprintf(stderr, "parse_pagetree: page tree node with no grand kids!\n");
+				// TODO:: Violation ...this node should not be NULL
+				goto end;
+			}
+			else if (grandKids->token_type != TT_SEQUENCE) {
+				fprintf(stderr, "parse_pagetree: Token type error!! type = %d\n", grandKids->token_type);
+				// TODO:: Violation ...
+				goto end;
+			}
+
+
+
+			// Count is a required field
+			item = dictentry(kidDict, "Count");
+			if ( (item == NULL) || (item->token_type != TT_SINT) ) {
+				fprintf(stderr, "parse_pagetree: Required page node count missing!\n");
+				goto end;
+			}
+			// verify the count
+			npages = H_CAST_SINT(item);
+			if (npages+curr > aux->catalog.pgCount) {
+				fprintf(stderr, "parse_pagetree: page count greater than anticipated leaves:: "
+						"computed = %ld, max expected = %ld\n", npages+curr, aux->catalog.pgCount);
+				// TODO:: probably just a warning is enough here -- run the VIOL parser?
+			}
+			curr += npages;
+
+
+			// parse_pagetree
+			node->type   = PG_TREE;
+			node->parent = meRef_t;
+			parse_pagetree(aux, node, kidRef, grandKids, curr);
+
+		}
+
+
+		// process a page node
+		else if (bytes_eq(nType->bytes, "Page")) { // page node
+			node->type       = PG_NODE;
+			node->parent     = meRef_t;
+			node->pn.pageRef = kidRef;
+			node->pn.page    = kidDict;
+
+			parse_pagenode(aux, node);
+		}
+	} // end loop
+
+
+
+	end:
+	exit(0);
+
+}
+
+
+
+
+/*
+ * This helper starts the process of elaborating the page tree
+ * starting with the trailer dictionary
+ */
+bool
+parse_catalog(struct Env *aux, const HParsedToken *root)
+{
+	bool success = false;
+	const HParsedToken *dict_t  = NULL;
+	const Dict         *catalog = NULL;
+	const HParsedToken *ptRef   = NULL; // page tree reference
+	const Dict         *ptRoot  = NULL; // page tree root Dictionary
+	const HParsedToken *kids    = NULL;
+	const HParsedToken *item    = NULL;
+
+
+	// initialize the catalog structure
+	aux->catalog.catalog = NULL;
+	aux->catalog.pRoot   = NULL;
+	aux->catalog.pgCount = 0;
+
+
+
+
+	// Ensure the reference is to the catalog dictionary
+	dict_t  = resolve(aux, root);     // token
+	catalog = H_CAST(Dict, dict_t);   // catalog dictionary
+	item = dictentry(catalog, "Type");
+	if ( (item == NULL) || (item->token_type != TT_BYTES) ||
+			(! bytes_eq(item->bytes, "Catalog")) ) {
+		fprintf(stderr, "parse_catalog: Catalog not found!\n");
+		goto end;
+	}
+	aux->catalog.catalog = dict_t; // catalog dictionary token
+
+
+	// Catalog found -- Now get the root of the page tree associated with the catalog
+	ptRef = dictentry(catalog, "Pages"); // indirect reference to a dictionary
+	if ( (ptRef == NULL) || (ptRef->token_type != TT_Ref) ) {
+		fprintf(stderr, "parse_catalog: Page Tree not found!\n");
+		goto end;
+	}
+	aux->catalog.pRoot = ptRef; // indirect reference to the page tree
+
+
+	/* resolve and process the page tree root reference to extract the dictionary --> Page Tree Object */
+	dict_t = resolve(aux, ptRef);    // page tree root node
+	ptRoot = H_CAST(Dict, dict_t);   // page tree root dictionary
+
+	// Count is a required field
+	item = dictentry(ptRoot, "Count");
+	if ( (item == NULL) || (item->token_type != TT_SINT) ) {
+		fprintf(stderr, "parse_catalog: Required page node count missing!\n");
+		goto end;
+	}
+	else {
+		aux->catalog.pgCount = H_CAST_SINT(item);
+	}
+
+	item = dictentry(ptRoot, "Parent");  // root node ==> parent should be NULL
+	if (item != NULL) {
+		fprintf(stderr, "parse_pagetree: Parent of root page tree node is not NULL [p = %p]!\n",
+				(void *)item);
+		goto end;
+	}
+
+
+	// Kids is a required field
+	kids = dictentry(ptRoot, "Kids");  // array of references to page or page tree nodes
+	if ( (kids == NULL) || (kids->token_type != TT_SEQUENCE) ) {
+		fprintf(stderr, "parse_catalog: There are no kids!\n");
+		goto end;
+	}
+
+	// parse_pagetree
+	aux->catalog.pgTree.type   = PG_TREE;
+	aux->catalog.pgTree.parent = NULL;
+	parse_pagetree(aux, &aux->catalog.pgTree, ptRef, kids, 0);
+
+
+
+	end:
+	exit(0);
+	return success;
+}
+
+/*
+ * ********************************************************************
+ * End Catalog parsing
+ * ********************************************************************
+ */
+
 /*
  * main program
  */
@@ -2494,6 +3810,52 @@ end:
 	return xrefs;
 }
 
+
+
+/*
+ * This utility extracts the text stream from the global environment
+ * writes it out to a file with the same name as the pdf input filename
+ * but with a .psectxt suffix.
+ */
+void
+text_extract(const struct Env *aux)
+{
+    fprintf(stdout, "text_extract:: num text objects = %ld\n", aux->ntextobjs);
+	fprintf(stdout, "text_extract:: %s\n", aux->infile);
+
+	int infnlen = strlen(aux->infile);
+	int sfxlen = strlen(".psectxt");
+	int namelen = infnlen + sfxlen + 1;
+
+	char *outfn = (char *) malloc(sizeof(char) * namelen);
+	if (outfn == NULL) {
+		fprintf(stderr, "text_extract:: h_arena_realloc() failed");
+		return;
+	}
+	memcpy(outfn, aux->infile, infnlen);
+	memcpy(&outfn[infnlen], ".psectxt", sfxlen);
+	outfn[namelen-1] = '\0'; // null terminate the string
+
+	// open the file for writing
+	FILE *stream;
+	if (!(stream = fopen(outfn, "w"))) {
+		fprintf(stderr,
+				"text_extract:: Failed to open file '%s' for writing\n", outfn);
+		return;
+	}
+	struct textnode *curr = aux->txthead;
+	for (int i = 0; i < aux->ntextobjs; i++) {
+		fprintf(stdout, "%.*s\n", (int) curr->tstr->nchars, curr->tstr->text);
+		fprintf(stream, "%.*s\n", (int) curr->tstr->nchars, curr->tstr->text);
+		curr = curr->next;
+	}
+	fclose(stream);
+	free(outfn);
+	return;
+}
+
+
+
 int
 main(int argc, char *argv[])
 {