From 0398c35b3027cd85459bf41162da3173e12b171b Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Tue, 28 Jan 2020 14:07:07 +0100
Subject: [PATCH] implement resolve() sans object streams

---
 pdf.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 95 insertions(+), 3 deletions(-)

diff --git a/pdf.c b/pdf.c
index 37eaa1f..32f4974 100644
--- a/pdf.c
+++ b/pdf.c
@@ -108,7 +108,7 @@ struct Env {
 	const char *input;
 	size_t sz;
 
-	const HParsedToken **xrefs;
+	const HParsedToken **xrefs;	/* all xref sections of the file */
 	size_t nxrefs;
 	const HParsedToken **objs;
 	size_t nobjs;
@@ -127,6 +127,7 @@ typedef struct {
 		struct { size_t offs, gen; } n;		/* inuse */
 		struct { size_t stm, idx; } o;		/* objstm */
 	};
+	const HParsedToken *obj;
 } XREntry;
 
 typedef struct { size_t nr, gen; } Ref;
@@ -308,6 +309,7 @@ act_xrent(const HParseResult *p, void *u)
 	XREntry *xr = H_ALLOC(XREntry);
 	char c = H_FIELD_UINT(2);
 
+	xr->obj = NULL;
 	switch (c) {
 	case 'f':
 		xr->type = XR_FREE;
@@ -331,6 +333,7 @@ act_xrstment(const HParseResult *p, void *u)
 {
 	XREntry *xr = H_ALLOC(XREntry);
 
+	xr->obj = NULL;
 	xr->type = H_FIELD_UINT(0);
 	switch (xr->type) {
 	case XR_FREE:
@@ -376,6 +379,7 @@ HParser *p_pdf;
 HParser *p_pdfdbg;
 HParser *p_startxref;
 HParser *p_xref;
+HParser *p_objdef;
 
 /* continuations for h_bind() */
 HParser *kstream(HAllocator *, const HParsedToken *, void *);
@@ -556,6 +560,7 @@ init_parser(struct Env *aux)
 	p_pdfdbg = pdfdbg;
 	p_startxref = startxr;
 	p_xref = CHX(xr_td, xrstm);
+	p_objdef = objdef;
 
 	p_fail = h_nothing_p();
 	p_epsilon = epsilon;
@@ -587,11 +592,98 @@ act_ks_bytes(const HParseResult *p, void *env)
 	return H_MAKE_BYTES(bs->token + offset, bs->len);
 }
 
+XREntry *
+lookup_xref(struct Env *aux, size_t nr, size_t gen)
+{
+	/* for each cross-reference section (i.e. update) */
+	for (size_t i = 0; i < aux->nxrefs; i++) {
+		HCountedArray *subs = H_INDEX_SEQ(aux->xrefs[i], 0);
+
+		/* for each cross-reference subsection */
+		for (size_t j = 0; j < subs->used; j++) {
+			HParsedToken *ss = subs->elements[i];
+			size_t base = H_INDEX_UINT(ss, 0, 0);
+			size_t n = H_INDEX_UINT(ss, 0, 1);
+
+			if (nr >= base && nr - base < n)
+				return H_INDEX(XREntry, ss, 1, nr - base);
+		}
+	}
+
+	return NULL;
+}
+
+const HParsedToken *
+parse_obj(struct Env *aux, Ref r, size_t offset)
+{
+	HParseResult *res;
+	size_t nr, gen;
+
+	if (offset >= aux->sz) {
+		fprintf(stderr, "%s: obj %zu %zu: position %zu (0x%zx) out of "
+		    "bounds\n", aux->infile, r.nr, r.gen, offset, offset);
+		return NULL;
+	}
+
+	res = h_parse(p_objdef, aux->input + offset, aux->sz - offset);
+	if (res == NULL) {
+		fprintf(stderr, "%s: error parsing object %zu %zu at position "
+		    "%zu (0x%zx)\n", aux->infile, r.nr, r.gen, offset, offset);
+		return NULL;
+	}
+
+	nr = H_INDEX_UINT(res->ast, 0);
+	gen = H_INDEX_UINT(res->ast, 1);
+	if (nr != r.nr || gen != r.gen) {
+		fprintf(stderr, "%s: object ID mismatch at position %zu "
+		    "(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile,
+		    offset, offset, r.nr, r.gen, nr, gen);
+		return NULL;
+	}
+
+	return H_INDEX_TOKEN(res->ast, 2);
+}
+
 const HParsedToken *
 resolve(struct Env *aux, const HParsedToken *v)
 {
-	// XXX look up in cross-reference table
-	return v;
+	XREntry *entry = NULL;
+	Ref *r;
+
+	/* direct objects pass through */
+	if (v == NULL || v->token_type != TT_Ref)
+		return v;
+
+	/* we are looking at an indirect reference */
+	r = v->user;
+
+	/* find the xref entry for this reference */
+	entry = lookup_xref(aux, r->nr, r->gen);
+	if (entry == NULL)
+		return NULL;			/* obj not found */
+	if (entry->obj != NULL)
+		return resolve(aux, entry->obj);
+
+	/* parse the object and memoize */
+	entry->obj = v;				/* break loops */
+	switch (entry->type)
+	{
+	case XR_FREE:
+		return NULL;			/* obj deleted */
+	case XR_INUSE:
+		if (entry->n.gen != r->gen)
+			return NULL;		/* obj nr reused */
+		entry->obj = parse_obj(aux, *r, entry->n.offs);
+		break;
+	case XR_OBJSTM:
+		if (r->gen != 0)
+			return NULL;		/* invalid entry! */
+		//XXX entry->obj = parse_obj_stm(aux, entry->o.stm, entry->o.idx);
+		//XXX break;
+		return NULL;
+	}
+
+	return resolve(aux, entry->obj);
 }
 
 /*
-- 
GitLab