diff --git a/pdf.c b/pdf.c
index 59ed8b077837d7427c4d07fef5ccd3e0d460324c..c7d99574018a16d7a53491479e120787080407ab 100644
--- a/pdf.c
+++ b/pdf.c
@@ -255,78 +255,30 @@ typedef HCountedArray Dict;
 
 
 
-// Catalog Tree
-typedef struct RsrcDict_S {
-	const HParsedToken  *resources;             // font references dictionary (resources == NULL) ==> inherit
-	const HParsedToken   *fonts;                // dictonary of fonts used in this page
-	size_t                numFonts;
-	const HParsedToken   *xobj;                 // xobj used in this page (?? is this <=1??, can page use multiple xobjects??)
-//	Dict                **seenFonts;
-//	size_t                numSeenFonts;
-//	const HParsedToken  **seenCmaps;            // memoized cmaps (should this be a bytestream?
-//	size_t                numCmapsSeen;
-} RsrcDict_T;
-
-struct PtNode_S;
-
-typedef struct PtNode_S {
-	enum {PG_TREE, PG_NODE, XO_NODE} type;
-	const HParsedToken  *parent;                // Type = Page tree -- reference
-	RsrcDict_T          *pgRsrc;                // resource structure
-	const HParsedToken  *me;                    // Reference for me
-	size_t               offset;                //
-	union {
-		struct {
-			const Dict          *dict;          // page node dictionary
-			const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
-		} pn;
-		struct {
-			struct PtNode_S   *kids;            // page table
-			size_t             count;           // number of kids
-			size_t             leaves;          // number of pages in tree
-		} pt;
-		struct {
-			const Dict          *dict;          // page node dictionary
-			const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
-		} xn;
-	};
-
-} PtNode_T;
-
-struct XoNode_S;
-typedef struct XoNode_S {
-	char 				*name;
-	PtNode_T			*node;
-	struct XoNode_S     *next;
-} XoNode_T;
-
-
-typedef struct Catalog_S {
-	const  HParsedToken  *catalog;   // reference
-	const  HParsedToken  *pRoot;     // reference
-	PtNode_T              pgTree;    // page tree
-	size_t                pgCount;   // page tree node count
-	XoNode_T              xObjs;     // list of XObjects
-	XoNode_T             *xoHead;
-	XoNode_T             *xoTail;
-	size_t				  xoCount;   // number of xobjects
-} Catalog_T;
 
 
 // Forward declaration of text extraction related structures
 struct textnode;
 struct textstr;
 struct TextEntry_S;
+struct PtNode_S;
 
 // ***********************************************************
 /*
  * Text data structures
  */
+typedef struct RsrcDict_S {
+    const HParsedToken   *resources;            // font references dictionary (resources == NULL) ==> inherit
+    const HParsedToken   *fonts;                // dictonary of fonts used in this page
+    size_t                numFonts;
+    const HParsedToken   *xobj;                 // xobj used in this page (?? is this <=1??, can page use multiple xobjects??)
+//  Dict                **seenFonts;
+//  size_t                numSeenFonts;
+//  const HParsedToken  **seenCmaps;            // memoized cmaps (should this be a bytestream?
+//  size_t                numCmapsSeen;
+} RsrcDict_T;
+
 
-typedef struct {
-	struct PtNode_S    *page;
-	struct TextEntry_S *font;
-} TextState_T;
 
 
 struct textnode {
@@ -345,7 +297,9 @@ struct textpos {
 	double tx;
 	double ty;
 };
+
 struct textmat { double cell[6]; };
+
 struct textstr {
 	uint8_t            *text;
 	uint32_t            nchars;
@@ -354,8 +308,8 @@ struct textstr {
 
 
 struct textwfmt {         /* text with formatting specifications */
-	double        aw;    /* word spacing */
-	double        ac;    /* character spacing */
+	double         aw;    /* word spacing */
+	double         ac;    /* character spacing */
 	struct textstr tstr;  /* the string */
 };
 struct tarrayelt {
@@ -371,6 +325,17 @@ struct textarray {
 	struct textstr     flattened;
 };
 
+typedef struct {
+    struct PtNode_S    *node;
+    struct TextEntry_S *font;          // font name reference
+    double              char_spacing;  // width adjustment for characters
+    double              word_spacing;  // width adjustment for the space character
+    double              line_spacing;  // line adjustment
+    double              horiz_scaling; // line adjustment
+    double              font_size;     // font size
+    struct textpos      curr_pos;      // text position on page
+} TextState_T;
+
 
 
 /* operator::
@@ -382,7 +347,8 @@ typedef struct TextEntry_S {
 	enum {TS_Tc, TS_Tw, TS_Tz, TS_TL, TS_Tf, TS_Tr, TS_Ts,
 	      TP_Td, TP_TD, TP_Tm, TP_Tstar,
 		  TW_Tj, TW_Tq, TW_Tqq, TW_TJ} type;
-	TextState_T         ts;     // text state associated with this string (TBD: other state attributes)
+//	TextState_T         ts;     // text state associated with this string (TBD: other state attributes)
+    struct PtNode_S        *node;           // page or XObject node the string is associated with
 	union {
 		double              value; 			/* many just have a value */
 		uint8_t             mode;           /* text mode */
@@ -397,15 +363,64 @@ typedef struct TextEntry_S {
 } TextEntry;  // text object entries
 
 
+// Page Node
+typedef struct PtNode_S {
+    enum {PG_TREE, PG_NODE, XO_NODE} type;
+    struct PtNode_S     *parent_n;              // reference to the parent node struct
+    const HParsedToken  *parent_t;              // Type = Page tree -- reference
+    const HParsedToken  *me_t;                  // Reference for me (*(Ref*)me->user ==> Ref structure)
+    unsigned int         nodeNum;               // number in my parent's kids array
+    RsrcDict_T          *pgRsrc;                // resource structure
+    struct textpos       mediaBox;              // layout page dimensions
+    TextState_T          ts;                    // text state associated with this node
+    size_t               offset;                // This may not be useful -- often within encoded streams
+    struct {
+        unsigned int    width;                  // page width described by MediaBox
+        unsigned int    height;                 // page height described by MediaBox
+    } pagesz;
+    union {
+        struct {
+            const Dict          *dict;          // page node dictionary
+            unsigned int         page_num;      // number based on pgCount in the Catalog
+            const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
+        } pn;
+        struct {
+            struct PtNode_S     *kids;          // page tree array
+            unsigned int         numPages;         // number of kids
+       } pt;
+        struct {
+            const Dict          *dict;          // page node dictionary
+            struct textpos       curr_pos;      // text position on page
+            const HParsedToken  *textStream;    // content stream -- may be a result of concatenating array of content streams
+        } xn;                                   // content within an xobject
+    };
+
+} PtNode_T;
+
+struct XoNode_S;
+typedef struct XoNode_S {
+    char                *name;
+    PtNode_T            *node;
+    struct XoNode_S     *next;
+} XoNode_T;
+
+
+typedef struct Catalog_S {
+    const  HParsedToken  *catalog;   // reference
+    const  HParsedToken  *pRoot;     // reference
+    PtNode_T              pgTree;    // page tree
+    unsigned int          lastPage;  // incremented on visit of a page node
+    unsigned int          lastNode;  // incremented on visit of any PtNode_T
+
+    // TODO:: Determine if this should be moved to the PageTree
+    // XoNode is one type of PageNode
+    XoNode_T              xObjs;     // list of XObjects
+    XoNode_T             *xoHead;
+    XoNode_T             *xoTail;
+    unsigned int          xoCount;   // number of xobjects -- not sure we use this
+} Catalog_T;
+
 
-#if 0
-// Haven't used this type yet - maybe OBE
-typedef struct {
-	struct textmat     fm;                  /* font matrix associated with this text object */
-	TextEntry        **ops;                 /* operators associated w/string */
-	uint8_t           *txt;                 /* the string associated with this object */
-} TextString;
-#endif
 
 // Initial use -- object streams
 typedef struct {
@@ -447,12 +462,12 @@ struct Env {
 	struct textnode     *txttail;  /* parsed text objects from the file */
 	size_t               ntextobjs;
 
-	Catalog_T            catalog;  /* Catalog object and document structure */
-	TextState_T          tstate;   /* current text state */
+	Catalog_T            catalog;      /* Catalog object and document structure */
+	PtNode_T             *curr_node;   /* current text state */
 
 	unsigned int paren_nest_depth; /* String nesting depth */
 	unsigned int array_nest_depth; /* Array nesting depth */
-	unsigned int dict_nest_depth; /* Dictionary nesting depth */
+	unsigned int dict_nest_depth;  /* Dictionary nesting depth */
 
 };
 
@@ -1477,9 +1492,9 @@ void pp_fontstate(FILE *stream, const TextState_T *state)
 {
 	assert(state);
 	fprintf(stream, "\nFont State: Page = ");
-	if ( (state->page) &&
-			( (state->page->type == PG_NODE) || (state->page->type == XO_NODE) ) )
-		pp_ref(stream, state->page->me, 0, 0);
+	if ( (state->node) &&
+			( (state->node->type == PG_NODE) || (state->node->type == XO_NODE) ) )
+		pp_ref(stream, state->node->me_t, 0, 0);
 	else
 		fprintf(stream, ", Text not associated with a page or xobject\n");
 	if (state->font)  // not all operators need or set this
@@ -1621,8 +1636,7 @@ act_Tc_op(const HParseResult *p, void *u)
 	txte->value   = tval->dbl;
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Tc_op:: %3.3f\n", txte->value);
 	return H_MAKE(TextEntry, txte);
@@ -1645,8 +1659,7 @@ act_Tw_op(const HParseResult *p, void *u)
 	txte->value = H_FIELD_DOUBLE(0);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Tw_op:: %3.3f\n", txte->value);
 	return H_MAKE(TextEntry, txte);
@@ -1655,7 +1668,7 @@ act_Tw_op(const HParseResult *p, void *u)
 
 
 /*
- * Tz operator: horizintal scaling specification
+ * Tz operator: horizontal scaling specification
  * H_ARULE(Tz_op, SEQ(tnumb, ws, LIT("Tz")));  // 9.3.4 - horizontal scaling
  */
 HParsedToken *
@@ -1670,8 +1683,7 @@ act_Tz_op(const HParseResult *p, void *u)
 	txte->value = H_FIELD_DOUBLE(0);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Tz_op:: %3.3f\n", txte->value);
 	return H_MAKE(TextEntry, txte);
@@ -1695,8 +1707,7 @@ act_TL_op(const HParseResult *p, void *u)
 	txte->value = H_FIELD_DOUBLE(0);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_TL_op:: %3.3f\n", txte->value);
 	return H_MAKE(TextEntry, txte);
@@ -1712,8 +1723,8 @@ act_TL_op(const HParseResult *p, void *u)
 HParsedToken *
 act_Tf_op(const HParseResult *p, void *u)
 {
-	TextEntry          *txte = H_ALLOC(TextEntry);
-	struct Env         *aux    = (struct Env*)u;
+	TextEntry          *txte     = H_ALLOC(TextEntry);
+	struct Env         *aux      = (struct Env*)u;
 	const HParsedToken *fn_token = H_FIELD_TOKEN(0);
 
 	txte->type  = TS_Tf;
@@ -1729,27 +1740,12 @@ act_Tf_op(const HParseResult *p, void *u)
 		txte->fref.fontsize = (double) H_FIELD_UINT(1);
 	else if (tokenType == TT_DOUBLE)
 		txte->fref.fontsize = (double) H_FIELD_DOUBLE(1);
-	//else
-	//	fprintf(stderr, "act_Tf_op: Unexpected token type for fontsize - token_type=%u\n",
-	//				tokenType);
 
+    // associate the text with the current state
+    txte->node = aux->curr_node;
 
-	// save this foont as the current state to be used by subsequent text
-	const HParsedToken * restok = H_MAKE(TextEntry, txte);
-	aux->tstate.font            = txte;
 
-	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font; // recursive :-) defn
-
-	// DEBUG
-	//fprintf(stdout, "act_Tf_op: fn=%.*s, fontsize=%3.3f, fontstate=%p, page=",
-	//		txte->fref.namelen, txte->fref.fontname, txte->fref.fontsize, (void*)txte);
-	//if (aux->tstate.page->type==PG_NODE)
-	//	pp_ref(stdout, aux->tstate.page->me, 0, 0);
-	//fprintf(stdout, "\n");
-
-	return ((HParsedToken *)restok);
+	return H_MAKE(TextEntry, txte);
 }
 
 
@@ -1781,8 +1777,7 @@ act_Tr_op(const HParseResult *p, void *u)
 	txte->mode  = H_FIELD_UINT(0);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Tr_op:: %d\n", txte->mode);
 	return H_MAKE(TextEntry, txte);
@@ -1805,8 +1800,7 @@ act_Ts_op(const HParseResult *p, void *u)
 	txte->value = H_FIELD_DOUBLE(0);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Ts_op:: %3.3f\n", txte->value);
 	return H_MAKE(TextEntry, txte);
@@ -1838,8 +1832,7 @@ act_Td_op(const HParseResult *p, void *u)
 
 	// associate the text with the current state
 	// NOTE: This operator does not require a font
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Td_op: text position ::tx=%.3f:ty=%.3f\n",
 	//		txte->pos.tx, txte->pos.ty);
@@ -1867,8 +1860,7 @@ act_TD_op(const HParseResult *p, void *u)
 	txte->pos.ty = H_FIELD_DOUBLE(1);
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_TD_op: text position ::tx=%3.3f:ty=%3.3f\n", txte->pos.tx, txte->pos.ty);
 	//pp_fontstate(stdout, &txte->ts);
@@ -1885,6 +1877,7 @@ HParsedToken *
 act_Tm_op(const HParseResult *p, void *u)
 {
 	TextEntry          *txte = H_ALLOC(TextEntry);
+    struct Env         *aux    = (struct Env*)u;
 
 
 	txte->type  = TP_Tm;
@@ -1897,6 +1890,9 @@ act_Tm_op(const HParseResult *p, void *u)
 
 		txte->fm.cell[i] = p->ast->seq->elements[0]->seq->elements[i]->seq->elements[0]->dbl;
 
+    // associate the text with the current state
+    txte->node = aux->curr_node;
+
 	//fprintf(stdout, "act_Tm_op: text matrix ::\n");
 	//for (int i=0; i<3; i++)
 	//	fprintf(stdout, "%3.3f : %3.3f\n", txte->fm.cell[i*2], txte->fm.cell[i*2+1]);
@@ -1920,8 +1916,7 @@ act_Tstar_op(const HParseResult *p, void *u)
 	txte->value = 0;
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	//fprintf(stdout, "act_Tstar_op: position pointer\n");
 	//pp_fontstate(stdout, &txte->ts);
@@ -1956,8 +1951,7 @@ act_Tj_op(const HParseResult *p, void *u)
 	txte->tstr.nchars = tstr->bytes.len;
 
 	// associate the text with the current state
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 
 	//fprintf(stdout, "\nact_Tj_op:: nchars=%u, txt=%.*s\n", txte->tstr.nchars,
@@ -1987,8 +1981,7 @@ act_TsingleQ_op(const HParseResult *p, void *u)
 	txte->tstr.nchars = tstr->bytes.len;
 
 	// associate the text wth the current font
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 
 	//fprintf(stdout, "\nact_TsingleQ_op:: nchars=%u, txt=%.*s\n", txte->tstr.nchars,
@@ -2023,8 +2016,7 @@ act_TdoubleQ_op(const HParseResult *p, void *u)
 	txte->twfmt.tstr.nchars = tstr->bytes.len;
 
 	// associate the text wth the current font
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 
 
@@ -2056,8 +2048,7 @@ act_TJ_op(const HParseResult *p, void *u)
 
 
 	// associate the text wth the current font
-	txte->ts.page = aux->tstate.page;
-	txte->ts.font = aux->tstate.font;
+	txte->node = aux->curr_node;
 
 	/*
 	 * Parse each element of the array
@@ -2081,14 +2072,6 @@ act_TJ_op(const HParseResult *p, void *u)
 			txte->tarray.elts[i].isStr       = true;
 			txte->tarray.flattened.nchars   += txte->tarray.elts[i].tstr.nchars;
 
-
-			// Debug
-//			fprintf(stdout, "act_TJ_op:Cumulative=%d/0x%x bytes,   Additional:%d bytes\n",
-//					txte->tarray.flattened.nchars, txte->tarray.flattened.nchars, txte->tarray.elts[i].tstr.nchars);
-			//fprintf(stdout, "act_TJ_op::: Using font= %p - page=", txte->ts.font);
-			//pp_ref(stdout, txte->ts.page->me, 0, 0);
-			//fprintf(stdout, "\nact_TJ_op:: nchars=%u, txt=%.*s\n", txte->tarray.elts[i].tstr.nchars,
-			//		txte->tarray.elts[i].tstr.nchars, txte->tarray.elts[i].tstr.text);
 			break;
 		default:
 			log_message(SEV_DONTCARE, "act_TJ_op:: Unexpected element type :: %d\n", elt->seq->elements[0]->token_type);
@@ -2123,24 +2106,57 @@ act_TJ_op(const HParseResult *p, void *u)
 
 /*
  * Parse the text object delimited by "BT" and "ET"
+ * Text Matrix and Text Line Matrix are reinitialized at each invocation
+ *
+ * TODO:: Maintain line number to vertically (horizontally) sort the text
  */
 HParsedToken *
 act_txtobj(const HParseResult *p, void *u)
 {
-
-	//fprintf(stdout, "act_txtobj:: Here\n");
-
-	assert(p->ast->token_type == TT_SEQUENCE);
-
+    // we will need to handle the case when the text string is longer than 8192 characters
+    struct Env         *aux    = (struct Env*)u;
 	TextEntry          *txtobj = H_ALLOC(TextEntry);
 	TextEntry          *txte   = NULL;
 	const HParsedToken *opstream = H_INDEX_TOKEN(p->ast, 1);
 	const HParsedToken *tt_text=NULL;
 	uint8_t            *tstr=NULL;
 	int                 textlen=0;
+	struct textmat      tm;
+    PtNode_T           *node = aux->curr_node;
+    double              cs = node->ts.char_spacing;
+    double              ws = node->ts.word_spacing;
+    double              ls = node->ts.line_spacing;
+    double             *px=&tm.cell[4];
+    double             *py=&tm.cell[5];
+
+    // initialize the text matrix
+    tm.cell[0] = 1;
+    tm.cell[1] = 0;
+    tm.cell[2] = 0;
+    tm.cell[3] = 1;
+    tm.cell[4] = 0;
+    tm.cell[5] = 0;
+
+    { // debug
+        if (node->type == PG_NODE) {
+            fprintf(stdout, "/nPage Num = %d,    ", node->pn.page_num);
+        }
+        else if (node->type == XO_NODE) {
+            fprintf(stdout, "Is a XO_NODE:\n");
+            h_pprintln(stdout, node->me_t);
+        }
+        fprintf(stdout, "Starting Position: (x, y) = (%f, %f), Font = %s\n",
+                node->ts.curr_pos.tx, node->ts.curr_pos.ty,
+                (node->ts.font)?node->ts.font->fref.fn:"null");
+    }
 
+    // if the current page/XObject is not set, return a NULL token
+    if (aux->curr_node == NULL) {
+        return ((HParsedToken *) NULL);
+    }
+
+	// TODO:: Handle non-horizontal text
 
-	//fprintf(stdout, "act_txtobj:: numtokens = %lu\n", opstream->seq->used);
 
 	// Walk through the tokens to determine how much space to allocate
 	// Count the number of characters in the stream
@@ -2148,75 +2164,202 @@ act_txtobj(const HParseResult *p, void *u)
 	for (int i =0; i < opstream->seq->used; i++) {
 
 		txte = H_CAST(TextEntry, opstream->seq->elements[i]);
-		// Process the text showing operators
+
+		// make sure we are working on the same node as the current node
+		assert(txte && (txte->node == node));
+
 		switch (txte->type) {
-		case TP_Td:
-		case TP_TD:
+		// text state operators
+		case TS_Tc:
+		    node->ts.char_spacing = txte->value;
+		    break;
+
+		case TS_Tw:
+		    node->ts.word_spacing = txte->value;
+            break;
+
+        case TS_Tz:
+            node->ts.horiz_scaling = txte->value;
+            break;
+
+        case TS_TL:
+            node->ts.line_spacing = txte->value;
+            break;
+
+        case TS_Tf:
+            node->ts.font = txte;
+            node->ts.font_size = txte->fref.fontsize;
+            break;
+
+
+        // text positioning and showing operators
+        case TP_TD:
+		    node->ts.line_spacing = txte->pos.ty;
+        case TP_Td:
+		    if ( (*px == 0.0) && (*py == 0.0) ) { // initialize
+		        *px = txte->pos.tx;
+		        *py = txte->pos.ty;
+		        // check to see if we are starting a new line
+		        if ( (node->ts.curr_pos.ty != 0.0) &&
+		                (node->ts.curr_pos.ty != *py) ) {
+	                 textlen += 1; // add a newline
+		        }
+		    } else {
+		        if (txte->pos.ty != 0.0) {
+		            //we are not rendering -- we just know it is not in the same line if y not equal
+		            textlen += 1; // add a newline
+		            *py -= txte->pos.ty; // should this be a +=??
+		        }
+		        if (txte->pos.tx) { // handle x -- when should we add a space
+		            // TODO:: handle x -- not sure .. for now, ignore
+		            *px += txte->pos.tx;
+		        }
+		    }
+            break;
 		case TP_Tstar:
-			textlen += 1;
+			*py -= node->ts.line_spacing;
+            textlen += 1;
 			break;
 
+
 		case TW_Tqq:
-			textlen += 1;
+		    node->ts.word_spacing = txte->twfmt.aw;
+            node->ts.char_spacing = txte->twfmt.ac;
+        case TW_Tq:
+            *py -= node->ts.line_spacing;
+            textlen += 1;
+        case TW_Tj:
+            textlen += txte->tstr.nchars;
+            *px += txte->tstr.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
+            break;
+
 		case TW_TJ:
 			textlen += txte->tarray.flattened.nchars;
+            *px += txte->tarray.flattened.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
 			break;
 
-		case TW_Tq:
-			textlen += 1;
-		case TW_Tj:
-			textlen += txte->tstr.nchars;
-			break;
-			break;
 		default:
 			; // ignore
 		}
 	}
+
+	// Are we within the page bounds? If not, generate a warning
+	if ( (*px < 0) || (*px > aux->curr_node->mediaBox.tx) ) {
+	    fprintf (stdout, "Final position of the text string is outside media box bounds.\n"
+	            "Media Box-page width=%f, String end position-@width=%f\n",
+	            aux->curr_node->mediaBox.tx, *px);
+	}
+    if ( (*py < 0) || (*py > aux->curr_node->mediaBox.ty) ) {
+        fprintf (stdout, "Final position of the text string is outside media box bounds.\n"
+                "Media Box-page height=%f, String end position-@height=%f\n",
+                aux->curr_node->mediaBox.tx, *px);
+    }
+
+    // reset text state
+    *px = *py = 0.0;
+    node->ts.char_spacing = cs;
+    node->ts.word_spacing = ws;
+    node->ts.line_spacing = ls;
+
 	tstr = h_arena_malloc(p->arena, sizeof(uint8_t) * textlen);
 	int idx=0;
 	TextState_T      *ts;
 	// Now concatenate the pieces
 	for (int i =0; i < opstream->seq->used; i++) {
 		txte = H_CAST(TextEntry, opstream->seq->elements[i]);
-		ts   = &txte->ts;
+		ts   = &txte->node->ts;
 
-		// Process the text operators
+        { // debug
+            fprintf(stdout, "Position: (x, y) = (%f, %f), Font = %s\n", *px, *py,
+                    ts->font->fref.fn);
+        }
+
+        // Process the text operators
 		switch (txte->type) {
-		case TP_Td:
-		case TP_TD:
-		case TP_Tstar:
-			tstr[idx] = '\n';
-			idx += 1;
+        // text state operators
+        case TS_Tc:
+            node->ts.char_spacing = txte->value;
+            break;
+
+        case TS_Tw:
+            node->ts.word_spacing = txte->value;
+            break;
+
+        case TS_Tz:
+            node->ts.horiz_scaling = txte->value;
+            break;
+
+        case TS_TL:
+            node->ts.line_spacing = txte->value;
+            break;
+
+        case TS_Tf:
+            ts->font      = txte;
+            node->ts.font_size = txte->fref.fontsize;
+            break;
+
+
+        // text positioning and showing operators
+        case TP_TD:
+        case TP_Td:
+            if ( (*px == 0.0) && (*py == 0.0) ) { // initialize
+                *px = txte->pos.tx;
+                *py = txte->pos.ty;
+                if ( (node->ts.curr_pos.ty != 0.0) &&
+                        (node->ts.curr_pos.ty != *py) ) {
+                    tstr[idx] = '\n'; idx += 1;
+                }
+            } else {
+                if (txte->pos.ty != 0.0) {
+                    tstr[idx] = '\n'; idx += 1;
+                    *py -= txte->pos.ty; // should this be a +=??
+                }
+                if (txte->pos.tx) { // handle x -- when should we add a space
+                    // TODO:: handle x -- not sure .. for now, ignore
+                    *px += txte->pos.tx;
+                }
+            }
+            break;
+
+        case TP_Tstar:
+			tstr[idx] = '\n'; idx += 1;
+            *py -= node->ts.line_spacing;
 			break;
 
-		case TW_Tqq:
-			tstr[idx] = '\n';
-			idx += 1;
-		case TW_TJ:
-			memcpy(&tstr[idx], txte->tarray.flattened.text, txte->tarray.flattened.nchars);
-			idx += txte->tarray.flattened.nchars;
-			//fprintf(stdout, "act_txtobj - array:: len=%u, str=", txte->tarray.flattened.nchars);
-			//fwrite((const void *)txte->tarray.flattened.text, (int) txte->tarray.flattened.nchars, 1, stdout);
-			//pp_fontstate(stdout, ts);
-			break;
 
-		case TW_Tq:
-			tstr[idx] = '\n';
-			idx += 1;
+        case TW_Tqq:
+            node->ts.word_spacing = txte->twfmt.aw;
+            node->ts.char_spacing = txte->twfmt.ac;
+        case TW_Tq:
+            *py -= node->ts.line_spacing;
+            tstr[idx] = '\n'; idx += 1;
+        case TW_Tj:
+            memcpy(&tstr[idx], txte->tstr.text, txte->tstr.nchars);
+            idx += txte->tstr.nchars;
+            *px += txte->tarray.flattened.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
+            break;
+
+
+        case TW_TJ:
+            memcpy(&tstr[idx], txte->tarray.flattened.text, txte->tarray.flattened.nchars);
+            idx += txte->tarray.flattened.nchars;
+            *px += txte->tarray.flattened.nchars * node->ts.font->fref.fontsize; // TODO:: handle character width from font description
+            break;
 
-		case TW_Tj:
-			memcpy(&tstr[idx], txte->tstr.text, txte->tstr.nchars);
-			idx += txte->tstr.nchars;
-			//fprintf(stdout, "act_txtobj:: len=%u, str=", txte->tstr.nchars);
-			//fwrite((const void *)txte->tstr.text, (int) txte->tstr.nchars, 1, stdout);
-			//pp_fontstate(stdout, ts);
-			break;
 		default:
 			; // ignore
 		}
 	}
 	assert(idx == textlen);
 
+	// update the position on the page
+    node->ts.curr_pos.tx  = *px;
+    node->ts.curr_pos.ty  = *py;
+
+    { // debug
+        fprintf(stdout, "Ending Position: (x, y) = (%f, %f), Font = %s\n", *px, *py,
+                node->ts.font->fref.fn);
+    }
 
 	txtobj->type         = TW_Tj;
 	txtobj->obj          = opstream;
@@ -2224,25 +2367,14 @@ act_txtobj(const HParseResult *p, void *u)
 	txtobj->tstr.nchars  = textlen;
 	txtobj->tstr.tobj    = opstream;
 	if (textlen) {
-		txtobj->ts.page = ts->page;
-		txtobj->ts.font = ts->font;
+		txtobj->node = ts->node;
 	}
 	else {
-		txtobj->ts.page = NULL;
-		txtobj->ts.font = NULL;
+		txtobj->node = NULL;
 	}
 	// pretty print the information
 	tt_text = H_MAKE(TextEntry, txtobj);
 
-	// DEBUG
-	/*
-	if (textlen) {
-		fprintf(stdout, "act_txtobj:: ");
-		pp_textentry(stdout, tt_text, 0, 0);
-		if (&txtobj->ts.page)
-			pp_fontstate(stdout, &txtobj->ts);
-	}
-	*/
 	return (HParsedToken *)tt_text;
 }
 
@@ -2250,7 +2382,7 @@ act_txtobj(const HParseResult *p, void *u)
 /*
  * This continuation takes the text stream and saves it in the environment for further
  * processing, e.g. writing it out to a file with the same name as the pdf input filename
- * but woth a .psectxt suffix.
+ * but with a .psectxt suffix.
  * It does not consume the string and returns the parser as the output.
  *
  * x = (txtobj ...)
@@ -2415,10 +2547,10 @@ text_extract(struct Env *aux)
 				break;
 			}
 			if (tstr) {
-				ft = lookup_font(&txte->ts, aux);
+				ft = lookup_font(&txte->node->ts, aux);
 				if (ft) {
 					//pp_fontinfo(stdout, &txte->ts, ft);
-					pp_fontinfo(stream, &txte->ts, ft);
+					pp_fontinfo(stream, &txte->node->ts, ft);
 				}
 				else {
 					char *estr = "\nMissing Font Info!!\n";
@@ -2915,9 +3047,11 @@ init_parser(struct Env *aux)
 
     H_RULE(txtbegin, h_indirect());
     H_RULE(txt_before_junk, IGN(SEQ(h_not(LIT("BT")), CHX(comment, h_uint8()))));
-	H_ARULE(txtbegin_, SEQ(IGN(h_many(txt_before_junk)), LIT("BT"), aws));
+    HParser *txtbegin_ = h_action(SEQ(IGN(h_many(txt_before_junk)), LIT("BT"), aws), act_txtbegin_, aux);  /* Text Begin  */
+//	H_ARULE(txtbegin_, SEQ(IGN(h_many(txt_before_junk)), LIT("BT"), aws));
 	h_bind_indirect(txtbegin, txtbegin_);
-	H_ARULE(txtend, KW("ET"));
+	HParser *txtend = h_action(KW("ET"), act_txtend, aux);
+//    H_ARULE(txtend, KW("ET"));
 	/* 9.3 - Text state operators */
     H_AVRULE(tnumb, numb);
     HParser *Tc_op = h_action(SEQ(tnumb, aws, LIT("Tc"), aws), act_Tc_op, aux);  /* 9.3.2 - charSpace */
@@ -2953,7 +3087,7 @@ init_parser(struct Env *aux)
     H_RULE(text_ops, CHX(textstate_ops, textpos_ops, textshow_ops, text_inbetween_junk));
 
 	/* Text object */
-	H_ARULE(txtobj, SEQ(txtbegin, h_many(text_ops), txtend));
+    HParser *txtobj = h_action(SEQ(txtbegin, h_many(text_ops), txtend), act_txtobj, aux);
 	H_RULE(txtobjs, h_many1(txtobj));
 
 
@@ -4164,7 +4298,7 @@ get_fontdict(const HParsedToken *obj, struct Env* aux)
 const HParsedToken *
 get_dictoftype(
 		const HParsedToken *obj,
-		const HParsedToken *pRefT,
+		const HParsedToken *parent_t,
 		char               *value,
 		struct Env         *aux)
 {
@@ -4174,7 +4308,7 @@ get_dictoftype(
 
 	if (obj->token_type == TT_Dict) {
 		dict = H_CAST(Dict, obj);
-		if (is_parent(dict, pRefT) && has_value(dict, "Type", value))
+		if (is_parent(dict, parent_t) && has_value(dict, "Type", value))
 			tok = obj;
 		else
 			dict = NULL;
@@ -4187,7 +4321,7 @@ get_dictoftype(
 			const HParsedToken *sitem = resolve_item(aux, stm->tok[i].obj, &ioff, p_objdef);
 			if ((sitem) && (sitem->token_type == TT_Dict)) {
 				dict = H_CAST(Dict, sitem);
-				if (is_parent(dict, pRefT) && has_value(dict, "Type", value)) {
+				if (is_parent(dict, parent_t) && has_value(dict, "Type", value)) {
 					tok = sitem;
 					break;
 				}
@@ -4196,34 +4330,10 @@ get_dictoftype(
 			}
 		}
 	}
-	/*
-	else {
-		fprintf(stdout, "get_dictoftype: token type not yet handled: %u\n",
-				obj->token_type);
-		fprintf(stdout, "get_dictoftype: Possibly needed for CMAPS\n");
-		h_pprintln(stdout, obj);
-	}
-	*/
 
 	if (dict == NULL)
 		tok = NULL;
 
-	// DEBUG
-	/*
-	if (pRefT) {
-		fprintf(stdout, "\nget_dictoftype: Parent = ");
-		pp_ref(stdout, pRefT, 0, 0);
-	}
-	else
-		fprintf(stdout, "get_dictoftype: Parent = NULL");
-	if (tok) {
-		fprintf(stdout, "\nget_dictoftype: Type = %s\n", value);
-		pp_dict(stdout, tok, 0, 0);
-	}
-	else {
-		fprintf(stdout, "\nget_dictoftype: Null dictionary of Type = %s\n", value);
-	}
-	*/
 	return tok;
 }
 
@@ -4344,6 +4454,8 @@ kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env)
 
 }
 
+#if 0
+// unused function
 const
 HParsedToken *create_strmdict(HArena *arena, size_t len)
 {
@@ -4360,6 +4472,7 @@ HParsedToken *create_strmdict(HArena *arena, size_t len)
 	tok = res->ast;
 	return tok;
 }
+#endif
 
 Fontinfo_T *
 getFontinfo(const Dict *fontdict, char *name, struct Env *aux)
@@ -4504,7 +4617,7 @@ lookup_font(TextState_T *state, struct Env *aux)
 	Dict               *fontlist; // font list dictionary in page
 	Fontinfo_T         *fontinfo = NULL;
 
-	PtNode_T *page   = state->page;
+	PtNode_T *page   = state->node;
 	assert ( (page->type == PG_NODE) || (page->type == XO_NODE) );
 	struct TextEntry_S *fentry = state->font;
 	if (fentry) {
@@ -4584,7 +4697,7 @@ bool parse_fonts(const HParsedToken *dict_t, RsrcDict_T *pgRsrc, struct Env *aux
 	return processed;
 }
 
-void parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux);
+RsrcDict_T *parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux);
 
 
 XoNode_T*
@@ -4638,14 +4751,15 @@ void parse_xobject(
 
 		Dict *xolist = H_CAST(Dict, item);
 
-		// DEBUG
-		fprintf(stdout, "\nparse_xobject: Node for Parent = ");
-		if (parent->me) pp_ref(stdout, parent->me, 0, 0);
-		fprintf(stdout, "\nOld XO Count = %lu, Num xobjects used in page = %lu \n",
-				aux->catalog.xoCount, xolist->used);
-		pp_dict(stdout, item, 0, 0);
-		// DEBUG
-
+		{
+            // DEBUG
+            fprintf(stdout, "\nparse_xobject: Node for Parent = ");
+            if (parent->me_t) pp_ref(stdout, parent->me_t, 0, 0);
+            fprintf(stdout, "\nOld XO Count = %u, Num xobjects used in page = %lu \n",
+                    aux->catalog.xoCount, xolist->used);
+            pp_dict(stdout, item, 0, 0);
+            // DEBUG
+		}
 		aux->catalog.xoCount += xolist->used;
 		// work on each element of the dictionary
 		for (int i=0; i<xolist->used; i++) {
@@ -4658,11 +4772,12 @@ void parse_xobject(
 			const HParsedToken *ref = H_INDEX_TOKEN(xolist->elements[i], 1);
 			assert(ref->token_type == TT_Ref);
 			xobj_r->node =  h_arena_malloc(xodict->arena, sizeof(PtNode_T));
-			xobj_r->node->type   = XO_NODE;
-			xobj_r->node->parent = parent->me;
-			xobj_r->node->pgRsrc = pgRsrc;
-			xobj_r->node->me     = ref;
-			xobj_r->node->offset = 0;    // TODO: get the offset to the stream
+			xobj_r->node->type     = XO_NODE;
+			xobj_r->node->parent_n = parent;
+			xobj_r->node->parent_t = parent->me_t;
+			xobj_r->node->pgRsrc   = pgRsrc;
+			xobj_r->node->me_t     = ref;
+			xobj_r->node->offset   = 0;    // TODO: get the offset to the stream
 			tok = resolve_item(aux, ref, &xobj_r->node->offset, p_objdef);
 
 			if (tok == NULL) continue;
@@ -4711,12 +4826,15 @@ void parse_xobject(
 						xobj_r->node->xn.textStream = NULL;
 						continue;
 					}
+		            { // DEBUG
+		                fprintf(stdout, "parse_xobject: **! XObject redefining resources for the page\n");
+		            }
 
-					parse_rsrcdict(xodict->arena, rsrcdict_t, xobj_r->node, aux);
+					xobj_r->node->pgRsrc = parse_rsrcdict(xodict->arena, rsrcdict_t, xobj_r->node, aux);
 
 					// set the text state to this xobject
 					// parse the text stream, which is field 2 of the sequence
-					aux->tstate.page = xobj_r->node;
+					aux->curr_node = xobj_r->node;
 					xobj_t = H_INDEX_TOKEN(tok, 1);  // expecting an HParseResult token
 					const HParseResult *res = H_CAST(HParseResult, xobj_t);
 					// DEBUG
@@ -4750,9 +4868,9 @@ void parse_xobject(
 }
 
 
-void parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux)
+RsrcDict_T *parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode, struct Env *aux)
 {
-	RsrcDict_T        *rsrc = NULL;
+	RsrcDict_T *rsrc = NULL;
 
 
 	// Process the dictionary
@@ -4796,11 +4914,11 @@ void parse_rsrcdict(HArena *arena, const HParsedToken *dict_t, PtNode_T *pgNode,
 		}
 	}
 
-	pgNode->pgRsrc = rsrc;
-	return;
+	return (rsrc);
 
 }
 
+
 void pp_pgrsrc(FILE *stream, const RsrcDict_T *pgRsrc)
 {
 	if (pgRsrc) {
@@ -4818,9 +4936,10 @@ void pp_ptnode(FILE *stream, const PtNode_T *node)
 {
 	fprintf(stream, "\nPage Tree Node Info:\n");
 	fprintf(stream, "pp_ptnode: parent = ");
-	if (node->parent) h_pprintln(stream, node->parent);
-	fprintf(stream, "\npp_ptnode: me = ");
-	if (node->me)     h_pprintln(stream, node->me);
+	fprintf(stream, "pp_ptnode: parent_ref = %p\n", node->parent_n);
+	if (node->parent_t) h_pprintln(stream, node->parent_t);
+	fprintf(stream, "\npp_ptnode: me_ref = ");
+	if (node->me_t)     h_pprintln(stream, node->me_t);
 	if (node->pgRsrc) pp_pgrsrc(stream, node->pgRsrc);
 }
 
@@ -4829,58 +4948,102 @@ void parse_pagenode (
 		PtNode_T           *myNode,
 		const HParsedToken *myRef,     // my page node reference
 		const Dict         *myDict,    // my page node specification
-		const HParsedToken *parent,
+		const HParsedToken *parent_t,  // parent reference token
+		PtNode_T           *parent_n,  // my parent node
 		HArena             *arena
 		)
 {
 
-	const HParsedToken *item        = NULL;
 	const HParsedToken *contents_t  = NULL; // dictionary token
-	//Ref                *contents_r  = NULL;
 	const HParsedToken *contents    = NULL; // resolved token
 	const HParsedToken *entry       = NULL;
-	const HParsedToken *rsrcdict_t  = NULL;
-	size_t             nOffset = 0;
-
-	// DEBUG
-	//fprintf(stdout, "\nparse_pagenode: parsing Page Node = ");
-	//pp_ref(stdout, myRef, 0, 0);
-
-
-	// set some global state variables
-	aux->tstate.page = myNode;
-	myNode->type     = PG_NODE;
-	myNode->me       = myRef;
-	myNode->pn.dict  = myDict;
+	size_t              nOffset     = 0;
+
+
+    // Initialize the page tree node
+	myNode->type               = PG_NODE;
+    myNode->parent_n           = parent_n;  // NULL for root
+    myNode->parent_t           = parent_t;
+	myNode->me_t               = myRef;
+    aux->catalog.lastNode++;  // keep track of the node count
+    aux->catalog.lastPage++;  // keep track of the page count
+    aux->curr_node            = myNode;
+    myNode->ts.node           = myNode;  // set the current page to me
+    myNode->ts.font           = NULL;
+    myNode->ts.char_spacing   = 0.0;
+    myNode->ts.word_spacing   = 0.0;
+    myNode->ts.line_spacing   = 0.0;
+    // locate the starting point at top left of a 8.5x11" paper
+    myNode->ts.curr_pos.tx    = 0.0;   // 8.5" = 612 points
+    myNode->ts.curr_pos.ty    = 0.0; // 11"  = 792 points
+    myNode->pn.dict            = myDict;
+    myNode->nodeNum            = aux->catalog.lastNode;
+    myNode->pn.page_num        = aux->catalog.lastPage;
+    // initialize by inheriting -- override with local
+    myNode->pgRsrc             = parent_n->pgRsrc;
+    myNode->mediaBox.tx        = parent_n->mediaBox.tx;
+    myNode->mediaBox.ty        = parent_n->mediaBox.ty;
+
+
+
+    const HParsedToken *item        = NULL;
+	item = dictentry(myDict, "Parent");
+    { // Debug
+        fprintf(stdout, "PageNode:: Parent (from caller) =");
+        h_pprintln(stdout, parent_t);
+        fprintf(stdout, "PageNode:: Parent (from node) =");
+        h_pprintln(stdout, item);
+        fprintf(stdout, "PageNode:: Me =");
+        h_pprintln(stdout, myRef);
+    }
+    bool matched = false;
+    if (item && parent_t) { // neither item nor parent_t should be NULL
+       assert(item->token_type == TT_Ref);
+       if ( ( ((Ref*)item->user)->nr == ((Ref*)parent_t->user)->nr ) &&
+             ( ((Ref*)item->user)->gen == ((Ref*)parent_t->user)->gen ) ) {
+            matched = true;
+        }
+    }
+    if (! matched) {
+        fprintf(stdout, "parse_pagenode: Inconsistent or corrupt parent key!\n");
+        fprintf(stdout, "Parent (from caller) =\n");
+        h_pprintln(stdout, parent_t);
+        fprintf(stdout, "Parent (from node) =\n");
+        h_pprintln(stdout, item);
+
+        // just continue for now
+        goto end;
+    }
 
 
 
-	item = dictentry(myDict, "Parent");
-	assert(item->token_type == TT_Ref);
-	if ( !( ( ((Ref*)item->user)->nr == ((Ref*)parent->user)->nr ) &&
-			( ((Ref*)item->user)->gen == ((Ref*)parent->user)->gen ) ) ) {
-		//fprintf(stderr, "parse_pagenode: Inconsistent parent pointer [p = %p]!\n",
-		//		(void *)item);
-		// should this just be a warning?
-		goto end;
-	}
-	myNode->parent = item;
+    // Hold on to the mediaBox (page dimensions)-- required
+    item = dictentry(myDict, "MediaBox"); // inheritable if not NULL
+    if (item) {
+        item = resolve(aux, item);
+        if (item->token_type == TT_SEQUENCE) {
+            int llx, lly, urx, ury; // typical but can be any diagonal
+            llx = H_INDEX_SINT(item, 0);
+            lly = H_INDEX_SINT(item, 1);
+            urx = H_INDEX_SINT(item, 2);
+            ury = H_INDEX_SINT(item, 3);
+            myNode->mediaBox.tx = abs(llx - urx);
+            myNode->mediaBox.ty = abs(lly - ury);;
+        }
+    }
 
-	// Hold on to the Resources dictionary
-	// This dictionary may be empty
-	// If there is no dictionary ==> inherit resources from parent
-	myNode->pgRsrc = NULL;
+	// Hold on to the Resources dictionary -- required
+    const HParsedToken *rsrcDictT;
 	item = dictentry(myDict, "Resources");
 	if (item) {
-		//fprintf(stdout, "\n\nparse_pagenode: Found resources in node\n");
-		rsrcdict_t = resolve(aux, item);
-		//fprintf(stdout, "\nparse_pagenode: Resource token type = %u\n",rsrcdict_t->token_type);
-		parse_rsrcdict(arena, rsrcdict_t, myNode, aux);
-		//pp_ptnode(stdout, myNode);
+	    rsrcDictT = resolve(aux, item);
+        myNode->pgRsrc = parse_rsrcdict(myDict->arena, rsrcDictT, myNode, aux);
 	}
 
 
-	// Process the contents stream or array
+
+
+	// Process the contents stream or array  -- optional
 	contents_t = dictentry(myDict, "Contents");
 	if (contents_t == NULL) {
 			//fprintf(stderr, "parse_pagenode: Page node without contents!\n");
@@ -4895,7 +5058,11 @@ void parse_pagenode (
 			goto end;
 		}
 		if (contents->token_type == TT_Objstm) { // Resources for the page node
-			parse_rsrcdict(arena, contents, myNode, aux);
+		    { // DEBUG
+		        fprintf(stdout, "parse_pagenode: **! Contents redefining resources for the page\n");
+		    }
+
+			myNode->pgRsrc = parse_rsrcdict(arena, contents, myNode, aux);
 			//pp_ptnode(stdout, myNode);
 		}
 		/* Indirect reference to an array, which may in turn have indirect references */
@@ -4979,13 +5146,6 @@ void parse_pagenode (
 			myNode->pn.textStream = tstrm->ast;
 		}
 	}
-	else {
-		//fprintf(stdout, "parse_pagenode: Unexpected page node contents token type = %u\n", contents_t->token_type);
-		goto end;
-	}
-
-//	fprintf(stdout, "parse_pagenode:: Contents token type = %d\n",
-//			contents->token_type);
 
 
 end:
@@ -5004,78 +5164,150 @@ end:
  * identifying text streams and contents streams, saving the information to support
  * text extraction in the environment structure.
  *
+ * Parent reference and parent node are both NULL for the page tree
  *
  * A return value of false indicates some parsing error.
 */
-// need to maintain information about pages
 void
 parse_pagetree(
 		struct Env         *aux,
-		PtNode_T           *myNode,
+		PtNode_T           *myNode,    // my page tree node
 		const HParsedToken *myRef,     // my page tree node reference
 		const Dict         *myDict,    // my page tree specification
-		const HParsedToken *pRefT,     // parent reference token
-		size_t              curr       // number of pages seen so far
+		const HParsedToken *parent_t,     // my parent reference token
+		PtNode_T           *parent_n    // my parent node
 		)
 {
-
-	const HParsedToken *item      = NULL;
-	const HParsedToken *kids      = NULL;
-	PtNode_T           *kid       = NULL;
-	const HParsedToken *kidRef    = NULL; // page tree or page node reference
-	const HParsedToken *kidDict_t = NULL;
-	const HParsedToken *pageDict_t = NULL;
-	const HParsedToken *treeDict_t = NULL;
-	const Dict         *kidDict   = NULL;
-	const HParsedToken *rsrcdict_t = NULL;
-
-
-	//fprintf(stdout, "\nparse_pagetree: parsing Page Tree Node = ");
-	//pp_ref(stdout, myRef, 0, 0);
-
-
+	// Initialize the page tree node
     myNode->type     = PG_TREE;
+    myNode->parent_n = parent_n;  // NULL for root
+    myNode->parent_t = parent_t;
+    myNode->me_t     = myRef;
+    aux->catalog.lastNode++;  // keep track of the node count
+    myNode->nodeNum  = aux->catalog.lastNode;
+    if (parent_n) { // inheritable if in a tree node
+        myNode->pgRsrc      = parent_n->pgRsrc;
+        myNode->mediaBox.tx = parent_n->mediaBox.tx;
+        myNode->mediaBox.ty = parent_n->mediaBox.ty;
+    }
+    else {
+        myNode->pgRsrc      = NULL;
+        myNode->mediaBox.tx = 0;
+        myNode->mediaBox.ty = 0;
+    }
+    myNode->offset   = 0;
 
-	item = dictentry(myDict, "Parent");  // if root node ==> parent should be NULL
-	myNode->parent = item;
 
-	// Count is a required field except for the root
+    // make sure that the parent node matches
+    const HParsedToken *item       = NULL;
+	item = dictentry(myDict, "Parent");  // if root node ==> parent should be NULL
+	{ // Debug
+	    h_pprintln(stdout, NULL); // will this generate an exception??
+        fprintf(stdout, "PageTree:: Parent (from caller) =");
+        h_pprintln(stdout, parent_t);
+        fprintf(stdout, "PageTree:: Parent (from node) =");
+        h_pprintln(stdout, item);
+	}
+	bool matched = false;
+	if (item && parent_t) {
+        if ( ( ((Ref*)item->user)->nr == ((Ref*)parent_t->user)->nr ) &&
+             ( ((Ref*)item->user)->gen == ((Ref*)parent_t->user)->gen ) ) {
+            matched = true;
+        }
+	}
+	else {   // either the parent is the page tree root or the node does not have a valid Parent key
+	    if (item == parent_t) {// if item is NULL, we expect parent_t to also be NULL (==> root)
+	        matched = true;
+	    }
+	}
+	if (! matched) {
+        fprintf(stdout, "parse_pagetree: Inconsistent or corrupt parent key!\n");
+        fprintf(stdout, "Parent (from caller) =\n");
+        h_pprintln(stdout, parent_t);
+        fprintf(stdout, "Parent (from node) =\n");
+        h_pprintln(stdout, item);
+
+        // just continue for now
+        goto end;
+	}
+
+
+	// Count is a required field
 	item = dictentry(myDict, "Count");
-	if ( (item == NULL) || (item->token_type != TT_SINT) ) {
-		//fprintf(stderr, "parse_pagetree: Required page node count missing!\n");
-		goto end; // This should just be a warning
+	bool parsed = false;
+	if (item) {
+	    item = resolve(aux, item);  // anything can be a ref
+	    if ( item && (item->token_type == TT_SINT) ) parsed = true;
 	}
-	else {
-		myNode->pt.leaves = H_CAST_SINT(item);
-		if (aux->catalog.pgCount == 0)
-			aux->catalog.pgCount = myNode->pt.leaves;
+	if ( ! parsed ) {
+		fprintf(stdout, "parse_pagetree: Required page node count missing or corrupt!\n");
+		goto end; // Should just be a warning?
 	}
+    myNode->pt.numPages = H_CAST_SINT(item);
+
+
 
+	// page tree node may have resources and media box that are inheritable
+	item = dictentry(myDict, "MediaBox"); // inheritable if not NULL
+	if (item) {
+	    item = resolve(aux, item);
+	    if (item->token_type == TT_SEQUENCE) {
+	        int llx, lly, urx, ury; // typical but can be any diagonal
+            llx = H_INDEX_SINT(item, 0);
+            lly = H_INDEX_SINT(item, 1);
+            urx = H_INDEX_SINT(item, 2);
+            ury = H_INDEX_SINT(item, 3);
+            myNode->mediaBox.tx = abs(llx - urx);
+            myNode->mediaBox.ty = abs(lly - ury);;
+	    }
+	}
+
+    const HParsedToken *rsrcDictT  = NULL;
+   item = dictentry(myDict, "Resources"); // inheritable if not NULL
+    if (item) {
+        rsrcDictT = resolve(aux, item);
+        myNode->pgRsrc = parse_rsrcdict(myDict->arena, rsrcDictT, myNode, aux);
+    }
 
 
 
 
-	// Kids is a required field
-	kids = dictentry(myDict, "Kids");  // array of references to page or page tree nodes
-	if ( (kids == NULL) || (kids->token_type != TT_SEQUENCE) ) {
-		//fprintf(stderr, "parse_pagetree: This tree node has no pages!\n");
-		goto end; // Nothing more to do here
-	}
 
+    // Kids array is a required field -- process the kids (pgTable)
+     const HParsedToken *kids = NULL;
+     parsed = false;
+     kids = dictentry(myDict, "Kids");  // array of references to page or page tree nodes
+     if (kids) {
+         kids = resolve(aux, kids);
+         if (kids->token_type == TT_SEQUENCE) parsed = true;
+     }
+     if ( ! parsed ) {
+         fprintf(stdout, "parse_pagetree: This tree node has no pages!\n");
+         goto end; // Nothing more to do here
+     }
 
 
+    PtNode_T           *kid       = NULL;
+    const HParsedToken *kidRef    = NULL; // page tree or page node reference
+    const HParsedToken *kidDict_t = NULL;
+    const HParsedToken *pageDict_t = NULL;
+    const HParsedToken *treeDict_t = NULL;
+    const Dict         *kidDict   = NULL;
 
-	// get the kids (pgTable)
 	HCountedArray  *pgTable = H_CAST_SEQ(kids);
 	size_t          pgtSz   = pgTable->used;
+	if (pgtSz != myNode->pt.numPages) {
+	    fprintf(stdout, "parse_pagetree: Size mismatch:: "
+	            "Kids array size = %lu, Page Count in node = %u\n",
+	            pgtSz, myNode->pt.numPages);
+	    // continue processing the pages
+	}
 	myNode->pt.kids  = (PtNode_T*)h_arena_malloc(pgTable->arena, pgtSz * sizeof(PtNode_T));
-	myNode->pt.count = pgtSz;
 
 	// Process the kids
 	for (int i=0; i<pgtSz; i++)
 	{
 		kid         = &myNode->pt.kids[i];
-		kid->parent = myRef;
 		kidRef      = pgTable->elements[i];
 		kidDict_t   = resolve(aux, kidRef);     // page or tree node dictionary or object stream token
 
@@ -5084,43 +5316,15 @@ parse_pagetree(
 			treeDict_t = get_dictoftype(kidDict_t, myRef, "Pages", aux);
 			if (treeDict_t) {
 				kidDict = H_CAST(Dict, treeDict_t);
-				parse_pagetree(aux, kid, kidRef, kidDict, myRef, curr);
+				parse_pagetree(aux, kid, kidRef, kidDict, myRef, myNode);
 			}
 			// Look for a page node
 			pageDict_t = get_dictoftype(kidDict_t, myRef, "Page", aux);
 			if (pageDict_t) {
 				kidDict = H_CAST(Dict, pageDict_t);
-				if (++curr > aux->catalog.pgCount) {
-					//fprintf(stderr, "parse_pagetree: More kids then specified leaves!\n");
-					// TODO:: probably just a warning is enough here -- run the VIOL parser?
-				}
-				parse_pagenode(aux, kid, kidRef, kidDict, myRef, pgTable->arena);
+				parse_pagenode(aux, kid, kidRef, kidDict, myRef, myNode, pgTable->arena);
 			}
-
-			// Look for Resources dictionary
-			myNode->pgRsrc = NULL;
-			item = dictentry(myDict, "Resources");
-			if (item) {
-				//fprintf(stdout, "\n\nparse_pagetree: Found resources in node\n");
-				size_t offset = 0;
-				rsrcdict_t = resolve_item(aux, item, &offset, p_objdef);
-				if (!rsrcdict_t) {  // TODO: Failure ==> xref error -- Figure out how to handle
-					goto end;
-				}
-				//fprintf(stdout, "\nparse_pagetree: Resource token type = %u\n",rsrcdict_t->token_type);
-				parse_rsrcdict(pgTable->arena, rsrcdict_t, myNode, aux);
-				//pp_ptnode(stdout, myNode);
-			}
-
 		}
-		/*
-		else {
-			Ref *ref = (Ref *)kidRef->user;
-			fprintf(stderr, "parse_pagetree: Reference <%zu, %zu> not found -- Deleted?!\n",
-					ref->nr, ref->gen);
-		}
-		*/
-
 	} // end loop
 
 
@@ -5146,9 +5350,10 @@ parse_catalog(struct Env *aux, const HParsedToken *root)
 
 
 	// initialize the catalog structure
-	aux->catalog.catalog = NULL;
-	aux->catalog.pRoot   = NULL;
-	aux->catalog.pgCount = 0;
+	aux->catalog.catalog    = NULL;
+	aux->catalog.pRoot      = NULL;
+	aux->catalog.lastPage  = 0;
+    aux->catalog.lastNode  = 0;
     // Initialize the xobject structure
 	aux->catalog.xObjs.name = NULL;
 	aux->catalog.xObjs.node = NULL;
@@ -5157,15 +5362,6 @@ parse_catalog(struct Env *aux, const HParsedToken *root)
 	aux->catalog.xoTail     = NULL;
 	aux->catalog.xoCount    = 0;
 
-	// DEBUG
-	/*
-	fprintf(stdout, "\nparse_catalog: parsing Catalog = ");
-	if (root->token_type == TT_Ref)
-		pp_ref(stdout, root, 0, 0);
-	else if (root->token_type == TT_Dict)
-		pp_dict(stdout, root, 0, 0);
-	*/
-
 
 	// Ensure the reference is to the catalog dictionary
 	size_t offset = 0;
@@ -5173,6 +5369,7 @@ parse_catalog(struct Env *aux, const HParsedToken *root)
 	if (!dict_t) { // TODO: Failure ==> xref error -- Figure out how to handle
 		goto end;
 	}
+    fprintf(stdout, "Trailer at offset = %lu\n", offset); // TODO:: Remove me
 
 	aux->catalog.catalog = get_dictoftype(dict_t, NULL, "Catalog", aux); // catalog dictionary token
 	if (aux->catalog.catalog) { // Caution:: relying on the short-circuiting behavior here
@@ -5182,29 +5379,27 @@ parse_catalog(struct Env *aux, const HParsedToken *root)
 		// Catalog found -- Now get the root of the page tree associated with the catalog
 		ptRef = dictentry(catalog, "Pages"); // indirect reference to a dictionary
 		if ( (ptRef == NULL) || (ptRef->token_type != TT_Ref) ) {
-			//fprintf(stderr, "parse_catalog: Page Tree not found!\n");
 			goto end;
 		}
 		aux->catalog.pRoot = ptRef; // indirect reference to the page tree
 
 
 		/* resolve and process the page tree root reference to extract the dictionary --> Page Tree Object */
-		dict_t = resolve_item(aux, ptRef, &offset, p_objdef);                     // page tree root node
-		if (!dict_t) { // TODO: Failure ==> xref error -- Figure out how to handle
+		dict_t = resolve_item(aux, ptRef, &offset, p_objdef);  // page tree root node
+		if (!dict_t) {
 			goto end;
 		}
+		// make sure the retrieved token is a dictionary of /Type "Pages"
 		dict_t = get_dictoftype(dict_t, NULL, "Pages", aux);   // page tree root dictionary (parent is NULL)
 		if (!dict_t) {
-			//fprintf(stderr, "parse_catalog: No page table!\n");
 			goto end; // Nothing more to do here
 		}
 		ptRoot = H_CAST(Dict, dict_t);
 
 		// parse_pagetree
-		parse_pagetree(aux, &aux->catalog.pgTree, ptRef, ptRoot, NULL, 0);
+		parse_pagetree(aux, &aux->catalog.pgTree, ptRef, ptRoot, NULL, NULL);
 	}
 	else {   // looks like the field "Type:Catalog" is a hint, not a requirement for a valid pdf
-		//fprintf (stdout, "\n\nThe Catalog is missing!!");
 		goto end;
 
 	}