From 6b54ebfa3261e225ceeb55557879f1fbd5bda222 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Wed, 5 Feb 2020 21:08:24 +0100
Subject: [PATCH] generally parse stream objects (only XRef for now)

---
 pdf.c | 464 +++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 294 insertions(+), 170 deletions(-)

diff --git a/pdf.c b/pdf.c
index 0520696..5795ee8 100644
--- a/pdf.c
+++ b/pdf.c
@@ -116,7 +116,7 @@ struct Env {
 /*
  * custom token types
  */
-HTokenType TT_XREntry, TT_Ref;
+HTokenType TT_XREntry, TT_Ref, TT_HParseResult;
 
 typedef struct {
 	enum {XR_FREE, XR_INUSE, XR_OBJSTM} type;
@@ -161,6 +161,14 @@ pp_ref(FILE *stream, const HParsedToken *tok, int indent, int delta)
 	fprintf(stream, "[%zu,%zu]", r->nr, r->gen);
 }
 
+void
+pp_parseresult(FILE *stream, const HParsedToken *tok, int indent, int delta)
+{
+	HParseResult *res = H_CAST(HParseResult, tok);
+
+	h_pprint(stream, res->ast, indent, delta);
+}
+
 
 /*
  * semantic actions
@@ -355,18 +363,54 @@ act_xrstment(const HParseResult *p, void *u)
 
 #define act_xrefs h_act_last
 
+/*
+ * return a cross-reference stream in the same form as xref sections.
+ *
+ * p = (pnat nat (dict [HParseResult: xrefs]))
+ * result = (xrefs dict)
+ */
 HParsedToken *
-act_rest(const HParseResult *p, void *env)
+act_xrstm(const HParseResult *p, void *u)
 {
-	struct Env *aux = env;
-	size_t offset = H_CAST_UINT(p->ast) / 8;
+	const HParsedToken *xrefs, *dict;
+	HParsedToken *tok;
+	HParseResult *res;
 
-	return H_MAKE_BYTES(aux->input + offset, aux->sz - offset);
+	dict = H_INDEX_TOKEN(p->ast, 2, 0);
+	res = H_FIELD(HParseResult, 2, 1);	// XXX free this
+	xrefs = res->ast;
+
+	tok = H_MAKE_SEQN(2);
+	tok->seq->elements[0] = (HParsedToken *)xrefs;
+	tok->seq->elements[1] = (HParsedToken *)dict;
+	tok->seq->used = 2;
+	return tok;
 }
 
-/* stream semantics (defined further below) */
-bool validate_xrstm(HParseResult *, void *);
-HParsedToken *act_xrstm(const HParseResult *, void *);
+/*
+ * validate the /Type field on a cross-reference stream.
+ *
+ * p = pnat nat (dict offs offs)
+ */
+bool
+validate_xrstm(HParseResult *p, void *u)
+{
+	const HCountedArray *tdict = H_FIELD_SEQ(2, 0);
+	const HParsedToken *v = dictentry(tdict, "Type");
+
+#if 0
+	if (v == NULL)
+		fprintf(stderr, "stream dict has no /Type\n");
+	else if (v->token_type != TT_BYTES)
+		fprintf(stderr, "stream /Type is no name object\n");
+	else if (bytes_eq(v->bytes, "XRef"))
+		return true;
+	return false;
+#endif
+
+	return (v != NULL && v->token_type == TT_BYTES &&
+	    bytes_eq(v->bytes, "XRef"));
+}
 
 
 /*
@@ -386,6 +430,7 @@ HParser *kxstream(HAllocator *, const HParsedToken *, void *);
 void
 init_parser(struct Env *aux)
 {
+	TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult);
 	TT_XREntry =	h_allocate_token_new("XREntry", NULL, pp_xrentry);
 	TT_Ref =	h_allocate_token_new("Ref", NULL, pp_ref);
 
@@ -518,7 +563,10 @@ init_parser(struct Env *aux)
 	/* body */
 	H_RULE(indobj,	CHX(stream, obj));
 	H_RULE(objdef,	SEQ(pnat, nat, KW("obj"), indobj, KW("endobj")));
-	H_RULE(body,	h_many(objdef));	// XXX object streams
+	H_RULE(body,	h_many(objdef));
+
+	/* for object streams */
+	//H_RULE(osidx,	h_many(SEQ(pnat, nat)));
 
 	/* cross-reference section */
 	H_RULE(xreol,	CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
@@ -533,8 +581,7 @@ init_parser(struct Env *aux)
 	H_ARULE(xrefs,	SEQ(KW("xref"), nl, h_many(xrsub)));
 
 	/* cross-reference streams */
-	H_RULE(rest,	h_action(h_tell(), act_rest, aux));
-	H_RULE(xstream,	h_bind(SEQ(stmbeg, rest), kxstream, aux));
+	H_RULE(xstream,	h_bind(stmbeg, kxstream, aux));
 	H_AVRULE(xrstm,	SEQ(pnat, nat, KW("obj"), xstream));
 		// XXX skip however much we consumed and check for "endstream endobj"?
 
@@ -568,28 +615,9 @@ init_parser(struct Env *aux)
 
 
 /*
- * stream object handling incl. cross-reference streams
+ * lookup and resolution of indirect references
  */
 
-#include <limits.h>	/* INT_MAX */
-#include <zlib.h>
-#include <err.h>
-
-/* combine current position with env=(input,sz) into HBytes */
-HParsedToken *
-act_ks_bytes(const HParseResult *p, void *env)
-{
-	const HBytes *bs = env;
-	size_t offset = H_CAST_UINT(p->ast) / 8;
-
-	/*
-	 * NB: we must allocate a new HBytes struct here because the old one is
-	 * allocated only temporarily for the lifetime of the continuation
-	 * below.
-	 */
-	return H_MAKE_BYTES(bs->token + offset, bs->len);
-}
-
 XREntry *
 lookup_xref(struct Env *aux, size_t nr, size_t gen)
 {
@@ -644,13 +672,43 @@ parse_obj(struct Env *aux, size_t nr, size_t gen, size_t offset)
 }
 
 const HParsedToken *
-parse_obj_stm(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
+parse_objstm_obj(struct Env *aux, size_t nr, size_t stm_nr, size_t idx)
 {
-	//const HParsedToken *stm;
+	XREntry *ent;
+	const HParsedToken *stm;
 
-	// XXX find the stream object, decode it, parse the offset at idx,
-	// and parse the target object at it
-	return NULL;
+	/*
+	 * acquire the stream object
+	 */
+
+	ent = lookup_xref(aux, stm_nr, 0);
+	if (ent == NULL)
+		return NULL;		/* stream not found */
+
+	switch (ent->type)
+	{
+	case XR_FREE:
+		return NULL;		/* stream deleted */
+	case XR_INUSE:
+		if (ent->n.gen != 0)
+			return NULL;	/* stream replaced */
+		if (ent->obj == NULL)
+			ent->obj = parse_obj(aux, stm_nr, 0, ent->n.offs);
+		break;
+	case XR_OBJSTM:
+		return NULL;		/* invalid: nested streams */
+	}
+
+	if ((stm = ent->obj) == NULL) {
+		fprintf(stderr, "%s: error parsing object stream at position "
+		    "%zu (%#zx)\n", aux->infile, ent->n.offs, ent->n.offs);
+		return NULL;
+	}
+
+	/*
+	 * decode the stream and find the target object in it
+	 */
+	return NULL;	// XXX
 }
 
 const HParsedToken *
@@ -687,84 +745,22 @@ resolve(struct Env *aux, const HParsedToken *v)
 	case XR_OBJSTM:
 		if (r->gen != 0)
 			return NULL;		/* invalid entry! */
-		ent->obj = parse_obj_stm(aux, r->nr, ent->o.stm, ent->o.idx);
+		ent->obj = parse_objstm_obj(aux, r->nr, ent->o.stm, ent->o.idx);
 		break;
 	}
 
 	return resolve(aux, ent->obj);
 }
 
-/*
- * This continuation takes the stream dictionary (as first element of x) and
- * should return a parser that consumes exactly the bytes that make up the
- * stream data.
- */
-HParser *
-kstream(HAllocator *mm__, const HParsedToken *x, void *env)
-{
-	struct Env *aux = env;
-	const HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
-	const HCountedArray *dict = H_CAST_SEQ(dict_t);
-	const HParsedToken *v = NULL;
-	size_t sz;
-
-	/* look for the Length entry */
-	v = dictentry(dict, "Length");
-	v = resolve(aux, v);		/* resolve indirect references */
-	if (v == NULL || v->token_type != TT_SINT || v->sint < 0)
-		goto fail;
-	sz = (size_t)v->sint;
-
-	//fprintf(stderr, "parsing stream object, length %zu.\n", sz);	// XXX debug
-
-	/* dummy struct to hold the pair (input,sz) */
-	HBytes *bytes = h_alloc(mm__, sizeof(HBytes));
-	bytes->token = aux->input;
-	bytes->len = sz;
-
-	HParser *tell = h_tell__m(mm__);
-	HParser *skip = h_skip__m(mm__, sz * 8);
-
-	HParser *bytes_p = h_action__m(mm__, tell, act_ks_bytes, bytes);
-	HParser *dict_p  = p_return__m(mm__, dict_t);
-	return h_sequence__m(mm__, dict_p, bytes_p, skip, NULL);
-fail:
-#if 0
-	if (v == NULL)
-		fprintf(stderr, "stream /Length missing\n");
-	else if (v -> token_type != TT_SINT)
-		fprintf(stderr, "stream /Length not an integer\n");
-	else if (v < 0)
-		fprintf(stderr, "stream /Length negative\n");
-#endif
-	//h_pprintln(stderr, p);	// XXX debug
-	return p_fail;
-}
 
 /*
- * validate the /Type field on a cross-reference stream.
- *
- * p = pnat nat (dict offs offs)
+ * stream object handling incl. filters and cross-reference streams
  */
-bool
-validate_xrstm(HParseResult *p, void *u)
-{
-	const HCountedArray *tdict = H_FIELD_SEQ(2, 0);
-	const HParsedToken *v = dictentry(tdict, "Type");
-
-#if 0
-	if (v == NULL)
-		fprintf(stderr, "stream dict has no /Type\n");
-	else if (v->token_type != TT_BYTES)
-		fprintf(stderr, "stream /Type is no name object\n");
-	else if (bytes_eq(v->bytes, "XRef"))
-		return true;
-	return false;
-#endif
 
-	return (v != NULL && v->token_type == TT_BYTES &&
-	    bytes_eq(v->bytes, "XRef"));
-}
+#include <limits.h>	/* INT_MAX */
+#include <stdlib.h>	/* abs() */
+#include <zlib.h>
+#include <err.h>
 
 struct predictor {
 	/* parameters */
@@ -794,8 +790,6 @@ uint8_t pp_sub(int a, int b, int c)	{ return a; }
 uint8_t pp_up(int a, int b, int c)	{ return b; }
 uint8_t pp_avg(int a, int b, int c)	{ return (a + b) / 2; }
 
-#include <stdlib.h>	/* abs() */
-
 uint8_t
 pp_paeth(int a, int b, int c)
 {
@@ -866,7 +860,7 @@ depred_png(struct predictor *pred, uint8_t *inp, size_t sz)
 }
 
 HParseResult *
-FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p)
+FlateDecode(HCountedArray *parms, HBytes b, HParser *p)
 {
 	size_t const BUFSIZE = 8 * 1024;
 	uint8_t *buf;
@@ -922,8 +916,9 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p)
 			return NULL;
 		}
 		pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8;
-		pred.buf = h_alloc(mm__, pred.rowsz);
-		memset(pred.buf, 0, pred.rowsz);
+		pred.buf = calloc(1, pred.rowsz);
+		if (pred.buf == NULL)
+			err(1, "FlateDecode");
 	}
 
 	/* set up zlib */
@@ -931,10 +926,12 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p)
 	ret = inflateInit(&strm);
 	if (ret != Z_OK)
 		errx(1, "inflateInit: %s (%d)", strm.msg, ret);
-	buf = h_alloc(mm__, BUFSIZE);
+	buf = malloc(BUFSIZE);
+	if (buf == NULL)
+		err(1, "FlateDecode");
 
 	/* initialize target parser */
-	sp = h_parse_start__m(mm__, p);
+	sp = h_parse_start(p);
 	assert(sp != NULL);
 	pred.sp = sp;
 
@@ -958,8 +955,8 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p)
 	res = h_parse_finish(sp);
 		// XXX always return NULL on error?
 	inflateEnd(&strm);
-	mm__->free(mm__, pred.buf);
-	mm__->free(mm__, buf);
+	free(pred.buf);
+	free(buf);
 
 	if (done == -1)
 		return NULL;
@@ -967,23 +964,23 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p)
 }
 
 /*
- * decode the byte stream 'b' according to metadata in its stream dictionary
- * 'd' and parse the result with 'p'.
+ * decode the bytes in 'b' according to metadata in the stream dictionary 'd'
+ * and parse the result with 'p'.
  */
 HParseResult *
-parse_stream(HAllocator  *mm__, HCountedArray *d, HBytes b, HParser *p)
+decode_stream(const HCountedArray *d, HBytes b, HParser *p)
 {
-	HParseResult *(*filter)(HAllocator *, HCountedArray *, HBytes, HParser *);
+	HParseResult *(*filter)(HCountedArray *, HBytes, HParser *);
 	HCountedArray *parms = NULL;
 	const HParsedToken *v;
 
 	v = dictentry(d, "Filter");
 	if (v == NULL)
-		return h_parse__m(mm__, p, b.token, b.len);
+		return h_parse(p, b.token, b.len);
 
 	/* compile to a CF backend to enable incremental parsing */
 	if (h_compile(p, PB_LLk, NULL) == -1)
-		errx(1, "xref data parser: LL(1) compile failed");
+		errx(1, "stream data parser: LL(1) compile failed");
 
 	if (v->token_type == TT_SEQUENCE)
 		return NULL;	// XXX filter chains not supported, yet
@@ -997,29 +994,146 @@ parse_stream(HAllocator  *mm__, HCountedArray *d, HBytes b, HParser *p)
 	if (v && v->token_type == TT_SEQUENCE)
 		parms = v->seq;
 
-	return filter(mm__, parms, b, p);
+	return filter(parms, b, p);
+}
+
+HParsedToken *
+act_rest(const HParseResult *p, void *env)
+{
+	struct Env *aux = env;
+	size_t offset = H_CAST_UINT(p->ast) / 8;
+
+	return H_MAKE_BYTES(aux->input + offset, aux->sz - offset);
+}
+
+HParser *
+p_rest__m(HAllocator *mm__, struct Env *aux)
+{
+	return h_action__m(mm__, h_tell__m(mm__), act_rest, aux);
+}
+
+/* combine current position with env=(input,sz) into HBytes */
+HParsedToken *
+act_take_bytes(const HParseResult *p, void *env)
+{
+	const HBytes *bs = env;
+	size_t offset = H_CAST_UINT(p->ast) / 8;
+
+	/*
+	 * NB: we must allocate a new HBytes struct here because the old one is
+	 * allocated only temporarily for the lifetime of the continuation
+	 * below.
+	 */
+	return H_MAKE_BYTES(bs->token + offset, bs->len);
+}
+
+HParser *
+p_take__m(HAllocator *mm__, size_t n, struct Env *aux)
+{
+	HParser *skip, *bytes;
+	HBytes *bs;
+
+	/* dummy struct to hold the pair (input,n) */
+	bs = h_alloc(mm__, sizeof(HBytes));
+	bs->token = aux->input;
+	bs->len = n;
+
+	bytes = h_action__m(mm__, h_tell__m(mm__), act_take_bytes, bs);
+	skip  = h_skip__m(mm__, n * 8);
+
+	return h_left__m(mm__, bytes, skip);
+}
+
+HParser *
+p_xrefdata__m(HAllocator *mm__, const HCountedArray *dict);
+
+HParser *
+p_stream_data__m(HAllocator *mm__, const HCountedArray *dict)
+{
+	const HParsedToken *v;
+
+	v = dictentry(dict, "Type");
+	if (v == NULL || v->token_type != TT_BYTES)	// XXX -> custom type
+		return NULL;				/* no /Type field */
+
+	/* interpret known stream types */
+	if (bytes_eq(v->bytes, "XRef"))
+		return p_xrefdata__m(mm__, dict);
+	// XXX
+	//if (bytes_eq(v->bytes, "ObjStm"))
+	//	return p_objstm__m(mm__, dict);
+
+	return NULL;					/* unrecognized type */
+}
+
+struct streamspec {
+	HCountedArray *dict;	/* stream dictionary */
+	HParser *parser;	/* data parser */
+};
+
+HParsedToken *
+act_ks_value(const HParseResult *p, void *u)
+{
+	struct streamspec *spec = u;
+	HBytes bytes = H_CAST_BYTES(p->ast);
+	HParseResult *res;
+
+	/* decode and parse the stream data */
+	res = decode_stream(spec->dict, bytes, spec->parser);
+
+	return H_MAKE(HParseResult, res);
 }
 
 /*
- * interpret a cross-reference stream and return it in the same form as other
- * cross-reference sections:
+ * This continuation takes the stream dictionary (as first element of x) and
+ * should return a parser that consumes exactly the bytes that make up the
+ * stream data.
  *
- * p = (pnat nat (dict xrefs))
- * result = (xrefs dict)
+ * x = (dict ...)
  */
-HParsedToken *
-act_xrstm(const HParseResult *p, void *u)
+HParser *
+kstream(HAllocator *mm__, const HParsedToken *x, void *env)
 {
-	HParsedToken *xrefs, *dict, *result;
+	struct Env *aux = env;
+	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
+	HCountedArray *dict = H_CAST_SEQ(dict_t);
+	const HParsedToken *v = NULL;
+	HParser *bytes_p, *dict_p, *value_p;
+	struct streamspec *spec;
+	size_t sz;
 
-	dict = H_INDEX_TOKEN(p->ast, 2, 0);
-	xrefs = H_INDEX_TOKEN(p->ast, 2, 1);
+	/* look for the Length entry */
+	v = dictentry(dict, "Length");
+	v = resolve(aux, v);		/* resolve indirect references */
+	if (v == NULL || v->token_type != TT_SINT || v->sint < 0)
+		goto fail;
+	sz = (size_t)v->sint;
+
+	//fprintf(stderr, "parsing stream object, length %zu.\n", sz);	// XXX debug
+
+	dict_p  = p_return__m(mm__, dict_t);
+	bytes_p = p_take__m(mm__, sz, aux);
+
+	spec = h_alloc(mm__, sizeof(struct streamspec));
+	spec->dict = dict;
+	spec->parser = p_stream_data__m(mm__, dict);
+	if (spec->parser != NULL)
+		value_p = h_action__m(mm__, bytes_p, act_ks_value, spec);
+	else
+		value_p = bytes_p;
 
-	result = H_MAKE_SEQN(2);
-	result->seq->elements[0] = xrefs;
-	result->seq->elements[1] = dict;
-	result->seq->used = 2;
-	return result;
+	return h_sequence__m(mm__, dict_p, value_p, NULL);
+fail:
+#if 0
+	if (v == NULL)
+		fprintf(stderr, "stream /Length missing\n");
+	else if (v -> token_type != TT_SINT)
+		fprintf(stderr, "stream /Length not an integer\n");
+	else if (v < 0)
+		fprintf(stderr, "stream /Length negative\n");
+#endif
+	//h_pprintln(stderr, p);	// XXX debug
+	return p_fail;
 }
 
 HParser *
@@ -1035,22 +1149,13 @@ p_xrefsub__m(HAllocator *mm__, size_t base, size_t count, HParser *p_entry)
 	return h_sequence__m(mm__, p_header, p_entries, NULL);
 }
 
-/* x = ((dict ...) bytes) */
 HParser *
-kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
+p_xrefdata__m(HAllocator *mm__, const HCountedArray *dict)
 {
-	//struct Env *aux = env;
-	const HParsedToken *v, *dict_t;
-	const HParseResult *res;
-	HCountedArray *dict;
-	HBytes bytes;
+	const HParsedToken *v;
+	HParser *p_field[3], *p_entry, **p_subs;
 	size_t W[3];
 	size_t Size, Wn, Wskip;
-	HParser *p_field[3], *p_entry, **p_subs, *p_xrefdata;
-
-	dict_t = H_INDEX_TOKEN(x, 0, 0);
-	dict = H_CAST_SEQ(dict_t);
-	bytes = H_INDEX_BYTES(x, 1);
 
 	/*
 	 * what follows is a horrible bunch of code that builds, from the
@@ -1086,35 +1191,35 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 	/* Size (required) - total size of xref table */
 	v = dictentry(dict, "Size");
 	if (v == NULL || v->token_type != TT_SINT || v->sint < 1)
-		goto fail;
+		return p_fail;
 	Size = v->sint;
 
 	/* W (required) - field widths for each xref entry */
 	v = dictentry(dict, "W");
 	if (v == NULL || v->token_type != TT_SEQUENCE)
-		goto fail;
+		return p_fail;
 	if ((Wn = v->seq->used) < 3)
-		goto fail;
+		return p_fail;
 	Wskip = 0;
 	for (size_t i = 0; i < Wn; i++) {
 		HTokenType tt = v->seq->elements[i]->token_type;
 		int64_t w = v->seq->elements[i]->sint;
 
 		if (tt != TT_SINT || w < 0)
-			goto fail;
+			return p_fail;
 		if (i < 3) {
 			/* we can't take >64 bits and want to use size_t */
 			if (w > 8 || w > sizeof(size_t))
-				goto fail;
+				return p_fail;
 			W[i] = (size_t)w;
 		} else {
 			if (w > SIZE_MAX - Wskip)
-				goto fail;	/* overflow */
+				return p_fail;	/* overflow */
 			Wskip += w;
 		}
 	}
 	if (Wskip > SIZE_MAX / 8)
-		goto fail;
+		return p_fail;
 
 	/*
 	 * build the parser for one xref entry.
@@ -1160,7 +1265,7 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 		#endif
 	}
 	if (Wskip > 0)	// XXX h_skip does not work with CF, yet
-		goto fail;
+		return p_fail;
 	p_entry = h_sequence__m(mm__, p_field[0], p_field[1], p_field[2], NULL);
 	p_entry = h_action__m(mm__, p_entry, act_xrstment, NULL);
 
@@ -1172,13 +1277,13 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 		p_subs[0] = p_xrefsub__m(mm__, 0, Size, p_entry);
 		p_subs[1] = NULL;
 	} else if (v->token_type != TT_SEQUENCE) {
-		goto fail;
+		return p_fail;
 	} else {
 		size_t nsubs = v->seq->used / 2;
 
 		/* build a parser for each subsection */
 		if (nsubs >= SIZE_MAX / sizeof(HParser *))
-			goto fail;
+			return p_fail;
 		p_subs = h_alloc(mm__, (nsubs + 1) * sizeof(HParser *));
 		for (size_t i = 0; i < nsubs; i++) {
 			HParsedToken *base = v->seq->elements[2 * i];
@@ -1187,33 +1292,52 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
 			if (base->token_type != TT_SINT || base->sint < 0 ||
 			    n->token_type != TT_SINT || n->sint < 0 ||
 			    n->sint > SIZE_MAX)
-				goto fail;
+				return p_fail;
 
 			p_subs[i] = p_xrefsub__m(mm__, base->sint, n->sint,
 			    p_entry);
 		}
 		p_subs[nsubs] = NULL;
 	}
-	p_xrefdata = h_sequence__ma(mm__, (void **)p_subs);
+	return h_sequence__ma(mm__, (void **)p_subs);
+}
+
+/*
+ * This continuation is very similar to kstream, except that it does not
+ * rely on /Length to consume the right amount of input. If /Length is
+ * not present or indirect, it will operate on the entire rest of the input.
+ * This is permissible, other than for general streams, because the XRef data
+ * is always self-delimiting.
+ *
+ * x = (dict ...)
+ */
+HParser *
+kxstream(HAllocator *mm__, const HParsedToken *x, void *env)
+{
+	struct Env *aux = env;
+	HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
+	HCountedArray *dict = H_CAST_SEQ(dict_t);
+	const HParsedToken *v;
+	HParser *bytes_p, *dict_p, *value_p;
+	struct streamspec *spec;
 
 	/* restrict bytes to Length if present (and not indirect) */
 	v = dictentry(dict, "Length");
 	if (v != NULL && v->token_type == TT_SINT && v->sint >= 0)
-		bytes.len = v->sint;
+		bytes_p = p_take__m(mm__, v->sint, aux);
+	else
+		bytes_p = p_rest__m(mm__, aux);	// XXX consume the proper amount
 
-	/* decode and parse the stream data */
-	res = parse_stream(mm__, dict, bytes, p_xrefdata);
-	if (res == NULL)
-		goto fail;
+	/* construct the parser for the stream data */
+	spec = h_alloc(mm__, sizeof(struct streamspec));
+	spec->dict = dict;
+	spec->parser = p_xrefdata__m(mm__, dict);
+	assert (spec->parser != NULL);
 
-	HParser *dict_p = p_return__m(mm__, dict_t);
-	HParser *xref_p = p_return__m(mm__, res->ast);
-	HParser *skip_p = h_skip__m(mm__, bytes.len * 8);
-		// XXX skip only as much as parse_stream consumed
+	dict_p  = p_return__m(mm__, dict_t);
+	value_p = h_action__m(mm__, bytes_p, act_ks_value, spec);
 
-	return h_sequence__m(mm__, dict_p, xref_p, skip_p, NULL);
-fail:
-	return p_fail;
+	return h_sequence__m(mm__, dict_p, value_p, NULL);
 }
 
 
-- 
GitLab