From 534fd269ea1d68160d5e231b5a81f78af3f4784d Mon Sep 17 00:00:00 2001
From: "sumit.ray@baesystems.com" <sumit.ray@baesystems.com>
Date: Mon, 26 Jul 2021 00:23:44 -0400
Subject: [PATCH] Broken -- contains hack to try to pass in stream length to
 act_ostm

---
 pdf.c | 131 ++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 96 insertions(+), 35 deletions(-)

diff --git a/pdf.c b/pdf.c
index 31dab43..419e1b5 100644
--- a/pdf.c
+++ b/pdf.c
@@ -295,6 +295,27 @@ typedef struct {
 } Fontinfo_T;
 
 
+
+// Used by act_ostm to ensure indices are reasonable
+typedef struct {
+	const Dict   *dict;
+	size_t	      numobjs;
+	size_t        stmlength;
+} ostm_spec_T;
+
+
+// Parser for object streams
+HParser *p_objstm__m(HAllocator *, ostm_spec_T *);
+
+// Action for stream continuation
+HParsedToken *act_ks_value(const HParseResult *p, void *u);
+struct streamspec {
+	Dict *dict;		/* stream dictionary */
+	HParser *parser;	/* data parser */
+};
+
+
+
 // ***********************************************************
 
 
@@ -2247,6 +2268,7 @@ HParser *ktxtstream(HAllocator *, const HParsedToken *, void *);
 HParser *kcontentstream(HAllocator *, const HParsedToken *, void *);
 HParser *kbyteostream(HAllocator *, const HParsedToken *, void *);
 
+
 void
 init_runlengthdecode_parser(struct Env *aux)
 {
@@ -2996,8 +3018,9 @@ depred_png(struct predictor *pred, uint8_t *inp, size_t sz)
 	return done;
 }
 
+// SR:: Fix Hacky code - N is only useful for object streams
 HParseResult *
-FlateDecode(const Dict *parms, HBytes b, HParser *p)
+FlateDecode(const Dict *parms, HBytes b, HParser *p, size_t N)
 {
 	size_t const BUFSIZE = 8 * 1024;
 	uint8_t *buf;
@@ -3098,10 +3121,15 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p)
 		// XXX always return NULL on error?
 #else
 
-	// DEBUG -- will not always work depending on the font encoding used
-//	fprintf (stdout, "FlateDecode:: Inflated string (%lu):\n%.*s\n", pred.nout, (int)pred.nout, pred.out);
-	//DEBUG
-
+	// DEBUG --
+	// SR :: Hacky code for Object Streams
+	// If I understood Marcell correctly, I should be able to access the ostrm_spec
+	if (N > 0) { // object streams
+		ostm_spec_T *ospec =  (ostm_spec_T *)(p->env + sizeof(HParser*) + sizeof(HAction));
+		ospec->stmlength = pred.nout;
+		fprintf(stdout, "\n\nFlateDecode: %p, N = (%lu, %lu), stmlength = %lu\n",
+				ospec, N, ospec->numobjs, ospec->stmlength);
+	}
 	res = h_parse(p, pred.out, pred.nout);
 	free(pred.out);
 #endif
@@ -3265,7 +3293,7 @@ int read_lzw_buffer(void)
 
 
 HParseResult *
-LZWDecode(const Dict *parms, HBytes b, HParser *p)
+LZWDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs)
 {
 	struct predictor pred = {1, 1, 8, 1};
 	int (*depredict)(struct predictor *, uint8_t *, size_t);
@@ -3343,7 +3371,7 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 }
 
 HParseResult *
-RunLengthDecode(const Dict *parms, HBytes b, HParser *p)
+RunLengthDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs)
 {
 	HParseResult *res;
 
@@ -3366,7 +3394,7 @@ RunLengthDecode(const Dict *parms, HBytes b, HParser *p)
  * parms should be empty, because the filter has no parameters
  */
 HParseResult *
-ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
+ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs)
 {
 	HParseResult *f_res, *res;
 
@@ -3397,7 +3425,7 @@ ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p)
  * parms should be empty, because the filter has no parameters
  */
 HParseResult*
-ASCII85Decode(const Dict *parms, HBytes b, HParser *p)
+ASCII85Decode(const Dict *parms, HBytes b, HParser *p, size_t numobjs)
 {
 	HParseResult *f_res, *res;
 
@@ -3473,16 +3501,6 @@ p_take__m(HAllocator *mm__, size_t n, struct Env *aux)
 }
 
 
-// Parser for object streams
-HParser *p_objstm__m(HAllocator *, const Dict *);
-
-// Action for stream continuation
-HParsedToken *act_ks_value(const HParseResult *p, void *u);
-struct streamspec {
-	Dict *dict;		/* stream dictionary */
-	HParser *parser;	/* data parser */
-};
-
 
 
 /*
@@ -3497,7 +3515,7 @@ struct streamspec {
 HParseResult *
 decode_contentstream(const Dict *d, HBytes b, HParser *p)
 {
-	HParseResult *(*filter)(const Dict *, HBytes, HParser *);
+	HParseResult *(*filter)(const Dict *, HBytes, HParser *, size_t);
 	const Dict *parms = NULL;
 	const HParsedToken *v;
 	HParseResult *res = NULL;
@@ -3534,7 +3552,7 @@ decode_contentstream(const Dict *d, HBytes b, HParser *p)
 		else if (bytes_eq(v->bytes, "LZWDecode"))
 			filter = LZWDecode;
 		else {		/* filter not supported */
-			fprintf(stderr, "decode_stream:: Unsupported Filter [%.*s]\n",
+			fprintf(stderr, "decode_contentstream:: Unsupported Filter [%.*s]\n",
 					(int)v->bytes.len, v->bytes.token);
 			return NULL; /* Treat the stream as a byte array */
 		}
@@ -3543,7 +3561,14 @@ decode_contentstream(const Dict *d, HBytes b, HParser *p)
 		if (v && v->token_type == TT_Dict)
 			parms = v->user;
 
-		res = filter(parms, b, p);
+		// SR:: Hacky code -- see if we are processing an object stream
+		// if we are, set N
+		size_t N = 0;
+		v = dictentry(d, "N");
+		if ( (v) && (v->token_type == TT_SINT) ) {
+			N = v->sint;
+		}
+		res = filter(parms, b, p, N);
 
 		/* Debug */
 		if (res){
@@ -4036,8 +4061,14 @@ kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env)
 	v = dictentry(dict, "Type");
 	if (v == NULL)	// XXX -> custom type
 		spec->parser = p_textstream;
-	else if ( (v->token_type == TT_BYTES) && bytes_eq(v->bytes, "ObjStm") )
-		spec->parser = p_objstm__m(mm__, dict);
+	else if ( (v->token_type == TT_BYTES) && bytes_eq(v->bytes, "ObjStm") ) {
+		ostm_spec_T *ospec = h_alloc(mm__, sizeof(ostm_spec_T ));
+		ospec->dict      = dict;
+		ospec->numobjs   = 0;
+		ospec->stmlength = 0;
+
+		spec->parser = p_objstm__m(mm__, ospec);
+	}
 	else {
 		fprintf(stdout, "kcontentstream: Not a text or object stream!\n");
 		return p_fail;
@@ -4868,7 +4899,7 @@ parse_catalog(struct Env *aux, const HParsedToken *root)
 HParseResult *
 decode_stream(const Dict *d, HBytes b, HParser *p)
 {
-	HParseResult *(*filter)(const Dict *, HBytes, HParser *);
+	HParseResult *(*filter)(const Dict *, HBytes, HParser *, size_t);
 	const Dict *parms = NULL;
 	const HParsedToken *v;
 
@@ -4908,7 +4939,14 @@ decode_stream(const Dict *d, HBytes b, HParser *p)
 	if (v && v->token_type == TT_Dict)
 		parms = v->user;
 
-	return filter(parms, b, p);
+	// SR:: Hacky code -- see if we are processing an object stream
+	// if we are, set N
+	size_t N = 0;
+	v = dictentry(d, "N");
+	if ( (v) && (v->token_type == TT_SINT) ) {
+		N = v->sint;
+	}
+	return filter(parms, b, p, N);
 }
 
 
@@ -4930,7 +4968,17 @@ p_stream_data__m(HAllocator *mm__, const Dict *dict, struct Env *aux)
 
 	if (bytes_eq(v->bytes, "ObjStm")) {
 		fprintf(stdout, "\np_stream_data__m:: Parsing object stream\n");
-		return p_objstm__m(mm__, dict);
+		ostm_spec_T *ospec = h_alloc(mm__, sizeof(ostm_spec_T ));
+		ospec->dict      = dict;
+		v = dictentry(dict, "N");
+		if ( (v == NULL) || (v->token_type != TT_SINT)) {
+			fprintf(stderr, "\np_stream_data__m:: Missing # of objects -N- in stream!!\n");
+			return NULL;
+		}
+		ospec->numobjs   = v->sint;
+		ospec->stmlength = 0;
+
+		return p_objstm__m(mm__, ospec);
 	}
 
 	if (bytes_eq(v->bytes, "XObject")) {
@@ -5219,20 +5267,28 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict)
 HParsedToken *
 act_ostm(const HParseResult *p, void *u)
 {
-	assert(((HParsedToken *)u)->token_type == TT_SINT);
-	size_t N = ((HParsedToken *)u)->sint;
+	ostm_spec_T *spec = (ostm_spec_T *)u;
+	fprintf(stdout, "\nact_ostm:: N objects = %lu, Stream Length = %lu\n",
+			spec->numobjs, spec->stmlength);
 
 	Objstm   *ostrm = H_ALLOC(Objstm);
-	ostrm->numObjs  = N;
-	ostrm->tok      = h_arena_malloc(p->arena, N* sizeof(Objref_T));
+	ostrm->numObjs  = spec->numobjs;
+	ostrm->tok      = h_arena_malloc(p->arena, ostrm->numObjs* sizeof(Objref_T));
 	ostrm->arena    = p->arena;
 
 	for (int i=0; i<ostrm->numObjs; i++) {
 		const HParsedToken *num = H_FIELD_TOKEN(0, 2*i);
-		assert(num->token_type == TT_UINT);
+		const HParsedToken *offt = H_FIELD_TOKEN(0, 2*i+1);
+		assert((num->token_type == TT_UINT) && (offt->token_type == TT_UINT));
 		ostrm->tok[i].oid.nr  = H_CAST_UINT(num);
 		ostrm->tok[i].oid.gen = 0;
 		ostrm->tok[i].obj     = H_FIELD_TOKEN(1, i);
+		size_t offs = H_CAST_UINT(offt);
+		if (offs < 0) { // need to figure out a way to get the stream
+			fprintf(stdout, "\nact_ostm:: This should be warning and not generate an excption!\n\n");
+			// not sure whether returning NULL will generate an exception
+			return NULL;
+		}
 	}
 
 //	const HCountedArray *indices = H_FIELD_SEQ(0);
@@ -5248,24 +5304,29 @@ act_ostm(const HParseResult *p, void *u)
 }
 
 HParser *
-p_objstm__m(HAllocator *mm__, const Dict *dict)
+p_objstm__m(HAllocator *mm__, ostm_spec_T *spec)
 {
 	const HParsedToken *v;
 	size_t N;
 
-	v = dictentry(dict, "N");
+	if ((!spec) || (!spec->dict)) {
+		fprintf(stdout, "\np_objstm__m: The object stream dictionary missing!\n");
+		return p_fail;
+	}
+	v = dictentry(spec->dict, "N");
 	if (v == NULL || v->token_type != TT_SINT || v->sint < 0 ||
 	    (uint64_t)v->sint > SIZE_MAX) {
 		fprintf(stderr, "p_objstm__m: missing /N on object stream\n");
 		return p_fail;
 	}
 	N = v->sint;
+	spec->numobjs = N;
 
 	HParser *wel_ws = h_sequence__m(mm__, p_wel, p_ws, NULL);
 	HParser *idx = p_sepBy_n__m(mm__, p_npair, wel_ws, N);
 
 	HParser *p_ostm = h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL);
-	HParser *ostm_p = h_action__m(mm__, p_ostm, act_ostm, (void *)v);
+	HParser *ostm_p = h_action__m(mm__, p_ostm, act_ostm, (void *)spec);
 
 	return ostm_p;
 		// XXX leading and trailing ws OK?
-- 
GitLab