diff --git a/pdf.c b/pdf.c index dd6855686b3efc46c94b7d1113c6962714182231..85bae4cdebbc8d775a0a4073b8cbb967becbde0f 100644 --- a/pdf.c +++ b/pdf.c @@ -54,6 +54,13 @@ p_return__m(HAllocator *mm__, const HParsedToken *tok) return h_action__m(mm__, p_epsilon, act_return, (void *)tok); } +/* a parser that just returns a given uint */ +HParser * +p_return_uint__m(HAllocator *mm__, uint64_t x) +{ + return h_action__m(mm__, p_epsilon, act_return_uint, (void *)x); +} + /* a helper to compare an HBytes to a string */ bool bytes_eq(HBytes b, const char *s) @@ -108,6 +115,44 @@ struct Env { }; +/* + * custom token types + */ +HTokenType TT_XREntry; + +typedef struct { + enum {XR_FREE, XR_INUSE, XR_OBJSTM} type; + union { + struct { size_t next, ngen; } f; /* free */ + struct { size_t offs, gen; } n; /* inuse */ + struct { size_t stm, idx; } o; /* objstm */ + }; +} XREntry; + +void +pp_xrentry(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + XREntry *xr = H_CAST(XREntry, tok); + + switch (xr->type) { + case XR_FREE: + fprintf(stream, "{ \"type\":\"free\", \"next\":%zu, " + "\"ngen\":%zu }", xr->f.next, xr->f.ngen); + break; + case XR_INUSE: + fprintf(stream, "{ \"type\":\"inuse\", \"offs\":%zu, " + "\"gen\":%zu }", xr->n.offs, xr->n.gen); + break; + case XR_OBJSTM: + fprintf(stream, "{ \"type\":\"objstm\", \"stm\":%zu, " + "\"idx\":%zu }", xr->o.stm, xr->o.idx); + break; + default: + assert(!"reached"); + } +} + + /* * semantic actions */ @@ -138,6 +183,7 @@ act_nat(const HParseResult *p, void *u) uint64_t x = 0; HCountedArray *seq = H_CAST_SEQ(p->ast); + // XXX check for overflow for (size_t i = 0; i < seq->used; i++) x = x*10 + H_CAST_UINT(seq->elements[i]); @@ -153,6 +199,7 @@ act_pnat(const HParseResult *p, void *u) uint64_t x = H_FIELD_UINT(0); HCountedArray *seq = H_FIELD_SEQ(1); + // XXX check for overflow for (size_t i = 0; i < seq->used; i++) x = x*10 + H_CAST_UINT(seq->elements[i]); @@ -165,6 +212,7 @@ act_intg(const HParseResult *p, void *u) int64_t x = 0; HCountedArray *seq = H_FIELD_SEQ(1); + // XXX check for overflow for (size_t i = 0; i < seq->used; i++) x = x*10 + H_CAST_UINT(seq->elements[i]); @@ -184,6 +232,7 @@ act_real(const HParseResult *p, void *u) HCountedArray *whole = H_FIELD_SEQ(1, 0); HCountedArray *fract = H_FIELD_SEQ(1, 2); + // XXX check for overflow for (size_t i = 0; i < whole->used; i++) x = x*10 + H_CAST_UINT(whole->elements[i]); for (size_t i = 0; i < fract->used; i++) @@ -232,6 +281,56 @@ act_octal(const HParseResult *p, void *u) return H_MAKE_UINT(x); } +HParsedToken * +act_xrent(const HParseResult *p, void *u) +{ + XREntry *xr = H_ALLOC(XREntry); + char c = H_FIELD_UINT(2); + + switch (c) { + case 'f': + xr->type = XR_FREE; + xr->f.next = H_FIELD_UINT(0); + xr->f.ngen = H_FIELD_UINT(1); + break; + case 'n': + xr->type = XR_INUSE; + xr->n.offs = H_FIELD_UINT(0); + xr->n.gen = H_FIELD_UINT(1); + break; + default: + assert(!"reached"); + } + + return H_MAKE(XREntry, xr); +} + +HParsedToken * +act_xrstment(const HParseResult *p, void *u) +{ + XREntry *xr = H_ALLOC(XREntry); + + xr->type = H_FIELD_UINT(0); + switch (xr->type) { + case XR_FREE: + xr->f.next = H_FIELD_UINT(1); + xr->f.ngen = H_FIELD_UINT(2); + break; + case XR_INUSE: + xr->n.offs = H_FIELD_UINT(1); + xr->n.gen = H_FIELD_UINT(2); + break; + case XR_OBJSTM: + xr->o.stm = H_FIELD_UINT(1); + xr->o.idx = H_FIELD_UINT(2); + break; + default: + assert(!"reached"); + } + + return H_MAKE(XREntry, xr); +} + #define act_xrefs h_act_last HParsedToken * @@ -264,6 +363,8 @@ HParser *kxstream(HAllocator *, const HParsedToken *, void *); void init_parser(struct Env *aux) { + TT_XREntry = h_allocate_token_new("XREntry", NULL, pp_xrentry); + /* lines */ H_RULE(cr, p_mapch('\r', '\n')); /* semantic value: \n */ H_RULE(lf, h_ch('\n')); /* semantic value: \n */ @@ -401,7 +502,7 @@ init_parser(struct Env *aux) H_RULE(xrtyp, CHX(h_ch('n'), h_ch('f'))); H_ARULE(xroff, REP(digit, 10)); H_ARULE(xrgen, REP(digit, 5)); - H_RULE(xrent, SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol))); + H_ARULE(xrent, SEQ(xroff, IGN(sp), xrgen, IGN(sp), xrtyp, IGN(xreol))); H_ARULE(xrnat, h_many1(digit)); H_RULE(xrhead, SEQ(xrnat, IGN(sp), xrnat, nl)); H_RULE(xrsub, SEQ(xrhead, h_many(xrent))); @@ -445,7 +546,6 @@ init_parser(struct Env *aux) * stream object handling incl. cross-reference streams */ -#include <inttypes.h> #include <limits.h> /* INT_MAX */ #include <zlib.h> #include <err.h> @@ -801,9 +901,16 @@ act_xrstm(const HParseResult *p, void *u) } HParser * -p_xrefsub__m(HAllocator *mm__, size_t base, size_t n, HParser *p_entry) +p_xrefsub__m(HAllocator *mm__, size_t base, size_t count, HParser *p_entry) { - return h_repeat_n__m(mm__, p_entry, n); + HParser *ret_base, *ret_count, *p_header, *p_entries; + + ret_base = p_return_uint__m(mm__, base); + ret_count = p_return_uint__m(mm__, count); + p_header = h_sequence__m(mm__, ret_base, ret_count, NULL); + p_entries = h_repeat_n__m(mm__, p_entry, count); + + return h_sequence__m(mm__, p_header, p_entries, NULL); } /* x = ((dict ...) bytes) */ @@ -856,10 +963,9 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env) /* Size (required) - total size of xref table */ v = dictentry(dict, "Size"); - if (v == NULL || v->token_type != TT_SINT) - goto fail; - if ((Size = v->sint) < 1) + if (v == NULL || v->token_type != TT_SINT || v->sint < 1) goto fail; + Size = v->sint; /* W (required) - field widths for each xref entry */ v = dictentry(dict, "W"); @@ -869,17 +975,20 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env) goto fail; Wskip = 0; for (size_t i = 0; i < Wn; i++) { - if (v->seq->elements[i]->token_type != TT_SINT || - v->seq->elements[i]->sint < 0) + HTokenType tt = v->seq->elements[i]->token_type; + int64_t w = v->seq->elements[i]->sint; + + if (tt != TT_SINT || w < 0) goto fail; if (i < 3) { - if (v->seq->elements[i]->sint > 8) - goto fail; /* can't take >64 bits */ - W[i] = (size_t)v->seq->elements[i]->sint; + /* we can't take >64 bits and want to use size_t */ + if (w > 8 || w > sizeof(size_t)) + goto fail; + W[i] = (size_t)w; } else { - if (v->seq->elements[i]->sint > SIZE_MAX - Wskip) + if (w > SIZE_MAX - Wskip) goto fail; /* overflow */ - Wskip += v->seq->elements[i]->sint; + Wskip += w; } } if (Wskip > SIZE_MAX / 8) @@ -924,6 +1033,7 @@ kxstream(HAllocator *mm__, const HParsedToken *x, void *env) if (Wskip > 0) // XXX h_skip does not work with CF, yet goto fail; p_entry = h_sequence__m(mm__, p_field[0], p_field[1], p_field[2], NULL); + p_entry = h_action__m(mm__, p_entry, act_xrstment, NULL); /* Index (optional) - subsections [base count ...] */ v = dictentry(dict, "Index"); @@ -983,6 +1093,7 @@ fail: */ #include <stdio.h> +#include <inttypes.h> #include <stdlib.h> /* realloc() */ #include <fcntl.h> /* open() */ #include <unistd.h> /* lseek() */