From 0398c35b3027cd85459bf41162da3173e12b171b Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Tue, 28 Jan 2020 14:07:07 +0100 Subject: [PATCH] implement resolve() sans object streams --- pdf.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 3 deletions(-) diff --git a/pdf.c b/pdf.c index 37eaa1f..32f4974 100644 --- a/pdf.c +++ b/pdf.c @@ -108,7 +108,7 @@ struct Env { const char *input; size_t sz; - const HParsedToken **xrefs; + const HParsedToken **xrefs; /* all xref sections of the file */ size_t nxrefs; const HParsedToken **objs; size_t nobjs; @@ -127,6 +127,7 @@ typedef struct { struct { size_t offs, gen; } n; /* inuse */ struct { size_t stm, idx; } o; /* objstm */ }; + const HParsedToken *obj; } XREntry; typedef struct { size_t nr, gen; } Ref; @@ -308,6 +309,7 @@ act_xrent(const HParseResult *p, void *u) XREntry *xr = H_ALLOC(XREntry); char c = H_FIELD_UINT(2); + xr->obj = NULL; switch (c) { case 'f': xr->type = XR_FREE; @@ -331,6 +333,7 @@ act_xrstment(const HParseResult *p, void *u) { XREntry *xr = H_ALLOC(XREntry); + xr->obj = NULL; xr->type = H_FIELD_UINT(0); switch (xr->type) { case XR_FREE: @@ -376,6 +379,7 @@ HParser *p_pdf; HParser *p_pdfdbg; HParser *p_startxref; HParser *p_xref; +HParser *p_objdef; /* continuations for h_bind() */ HParser *kstream(HAllocator *, const HParsedToken *, void *); @@ -556,6 +560,7 @@ init_parser(struct Env *aux) p_pdfdbg = pdfdbg; p_startxref = startxr; p_xref = CHX(xr_td, xrstm); + p_objdef = objdef; p_fail = h_nothing_p(); p_epsilon = epsilon; @@ -587,11 +592,98 @@ act_ks_bytes(const HParseResult *p, void *env) return H_MAKE_BYTES(bs->token + offset, bs->len); } +XREntry * +lookup_xref(struct Env *aux, size_t nr, size_t gen) +{ + /* for each cross-reference section (i.e. update) */ + for (size_t i = 0; i < aux->nxrefs; i++) { + HCountedArray *subs = H_INDEX_SEQ(aux->xrefs[i], 0); + + /* for each cross-reference subsection */ + for (size_t j = 0; j < subs->used; j++) { + HParsedToken *ss = subs->elements[i]; + size_t base = H_INDEX_UINT(ss, 0, 0); + size_t n = H_INDEX_UINT(ss, 0, 1); + + if (nr >= base && nr - base < n) + return H_INDEX(XREntry, ss, 1, nr - base); + } + } + + return NULL; +} + +const HParsedToken * +parse_obj(struct Env *aux, Ref r, size_t offset) +{ + HParseResult *res; + size_t nr, gen; + + if (offset >= aux->sz) { + fprintf(stderr, "%s: obj %zu %zu: position %zu (0x%zx) out of " + "bounds\n", aux->infile, r.nr, r.gen, offset, offset); + return NULL; + } + + res = h_parse(p_objdef, aux->input + offset, aux->sz - offset); + if (res == NULL) { + fprintf(stderr, "%s: error parsing object %zu %zu at position " + "%zu (0x%zx)\n", aux->infile, r.nr, r.gen, offset, offset); + return NULL; + } + + nr = H_INDEX_UINT(res->ast, 0); + gen = H_INDEX_UINT(res->ast, 1); + if (nr != r.nr || gen != r.gen) { + fprintf(stderr, "%s: object ID mismatch at position %zu " + "(%#zx): sought %zu %zu, found %zu %zu.\n", aux->infile, + offset, offset, r.nr, r.gen, nr, gen); + return NULL; + } + + return H_INDEX_TOKEN(res->ast, 2); +} + const HParsedToken * resolve(struct Env *aux, const HParsedToken *v) { - // XXX look up in cross-reference table - return v; + XREntry *entry = NULL; + Ref *r; + + /* direct objects pass through */ + if (v == NULL || v->token_type != TT_Ref) + return v; + + /* we are looking at an indirect reference */ + r = v->user; + + /* find the xref entry for this reference */ + entry = lookup_xref(aux, r->nr, r->gen); + if (entry == NULL) + return NULL; /* obj not found */ + if (entry->obj != NULL) + return resolve(aux, entry->obj); + + /* parse the object and memoize */ + entry->obj = v; /* break loops */ + switch (entry->type) + { + case XR_FREE: + return NULL; /* obj deleted */ + case XR_INUSE: + if (entry->n.gen != r->gen) + return NULL; /* obj nr reused */ + entry->obj = parse_obj(aux, *r, entry->n.offs); + break; + case XR_OBJSTM: + if (r->gen != 0) + return NULL; /* invalid entry! */ + //XXX entry->obj = parse_obj_stm(aux, entry->o.stm, entry->o.idx); + //XXX break; + return NULL; + } + + return resolve(aux, entry->obj); } /* -- GitLab