diff --git a/pdf.c b/pdf.c index 63863af7537a76c35b77a584d345dc6165f0578f..72b33ef24ec6e203af931b86c77aab94c9282404 100644 --- a/pdf.c +++ b/pdf.c @@ -2,7 +2,7 @@ * pesco 2019 */ -#include <string.h> /* strncmp() */ +#include <string.h> /* strncmp(), memset() */ #include <hammer/hammer.h> #include <hammer/glue.h> @@ -440,6 +440,7 @@ init_parser(struct Env *aux) */ #include <inttypes.h> +#include <limits.h> /* INT_MAX */ #include <zlib.h> #include <err.h> @@ -530,13 +531,104 @@ validate_xrstm(HParseResult *p, void *u) bytes_eq(v->bytes, "XRef")); } -struct Predictor { +struct predictor { + /* parameters */ int num; /* default: 1 (no prediction) */ int colors; /* default: 1 */ int bpc; /* bits per component; default: 8 */ int columns; /* default: 1 */ + + int rowsz; /* bytes per row = ceil(colors * bpc * columns / 8) */ + + /* state */ + HSuspendedParser *sp; + uint8_t (*predfun)(int, int, int); + uint8_t *buf; /* previous row of input */ + uint8_t c; /* byte 'c' (upper left) */ + int x; /* current position */ }; +int +depred_none(struct predictor *pred, uint8_t *inp, size_t sz) +{ + return h_parse_chunk(pred->sp, inp, sz); +} + +uint8_t pp_none(int a, int b, int c) { return 0; } +uint8_t pp_sub(int a, int b, int c) { return a; } +uint8_t pp_up(int a, int b, int c) { return b; } +uint8_t pp_avg(int a, int b, int c) { return (a + b) / 2; } + +#include <stdlib.h> /* abs() */ + +uint8_t +pp_paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p - a); + int pb = abs(p - b); + int pc = abs(p - c); + + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +int +depred_png(struct predictor *pred, uint8_t *inp, size_t sz) +{ + /* NB: + * at this point, the specific value of pred->num no longer matters. + * the PNG predictor tags each row with the function used for that row + * and decoding always follows the tag. + */ + static uint8_t (*predfuns[])(int, int, int) = + {pp_none, pp_sub, pp_up, pp_avg, pp_paeth}; + + bool done = false; + int bpp; + + bpp = (pred->colors * pred->bpc + 7) / 8; /* bytes per pixel */ + assert (bpp > 0); + + for (size_t i=0; i < sz && !done; i++) { + int x = pred->x; + int a = x<bpp ? 0 : pred->buf[x-bpp]; /* left */ + int b = pred->buf[x]; /* up */ + int c = pred->c; /* up left */ + + if (pred->predfun == NULL) { /* we are before a new row */ + /* select predictor function */ + if (inp[i] > 4) { + fprintf(stderr, "unknown PNG predictor %d\n", + (int)inp[i]); + return -1; + } + pred->predfun = predfuns[inp[i]]; + + /* consume the tag */ + if (++i == sz) + break; + } + + /* undo the prediction and save the decoded value */ + pred->buf[x] = inp[i] + pred->predfun(a, b, c); + + /* advance to the right */ + pred->c = b; + pred->x = ++x; + + /* when row complete, pass it to parser and start a new row */ + if (x == pred->rowsz) { + done = h_parse_chunk(pred->sp, pred->buf, pred->rowsz); + pred->c = pred->x = 0; + pred->predfun = NULL; + } + } + + return done; +} + HParseResult * FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p) { @@ -546,12 +638,13 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p) HParseResult *res; const HParsedToken *v; size_t sz; - bool done; + int done; z_stream strm = {0}; int ret; - struct Predictor pred = {1, 1, 8, 1}; + struct predictor pred = {1, 1, 8, 1}; + int (*depredict)(struct predictor *, uint8_t *, size_t); - /* determine the predictor algorithm to use (if any) */ + /* set up the predictor (if any) */ #define SETPARM(VAR,STR) do { \ v = dictentry(parms, (STR)); \ if (v != NULL) { \ @@ -559,26 +652,46 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p) return NULL; \ VAR = v->sint; \ } } while(0) - SETPARM(pred.num, "Predictor"); - SETPARM(pred.colors, "Colors"); - SETPARM(pred.bpc, "BitsPerComponent"); - SETPARM(pred.columns, "Columns"); + SETPARM(pred.num, "Predictor"); + SETPARM(pred.colors, "Colors"); + SETPARM(pred.bpc, "BitsPerComponent"); + SETPARM(pred.columns, "Columns"); #undef SETPARM - if (pred.num != 1) { // XXX - fprintf(stderr, "FlateDecode: /Predictor %d unimplemented\n", - pred.num); - return NULL; + if (pred.num == 1) + depredict = depred_none; + else { + // XXX add TIFF predictor + if (pred.num >= 10 && pred.num <= 15) + depredict = depred_png; + else { + fprintf(stderr, "FlateDecode: /Predictor %d" + " not supported\n", pred.num); + return NULL; + } + + /* allocate row buffer */ + if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) { + fprintf(stderr, "FlateDecode: overflow\n"); + return NULL; + } + pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8; + pred.buf = h_alloc(mm__, pred.rowsz); + memset(pred.buf, 0, pred.rowsz); } + /* set up zlib */ // XXX pass our allocator to zlib ret = inflateInit(&strm); if (ret != Z_OK) errx(1, "inflateInit: %s (%d)", strm.msg, ret); buf = h_alloc(mm__, BUFSIZE); + + /* initialize target parser */ sp = h_parse_start__m(mm__, p); assert(sp != NULL); + pred.sp = sp; - done = false; + done = 0; strm.avail_in = b.len; strm.next_in = (unsigned char *)b.token; do { @@ -592,13 +705,17 @@ FlateDecode(HAllocator *mm__, HCountedArray *parms, HBytes b, HParser *p) } sz = BUFSIZE - strm.avail_out; - done = h_parse_chunk(sp, buf, sz); - } while (!done && ret == Z_OK); + done = depredict(&pred, buf, sz); + } while (done == 0 && ret == Z_OK); res = h_parse_finish(sp); // XXX always return NULL on error? inflateEnd(&strm); + mm__->free(mm__, pred.buf); mm__->free(mm__, buf); + + if (done == -1) + return NULL; return res; } @@ -646,13 +763,13 @@ parse_stream(HAllocator *mm__, HCountedArray *d, HBytes b, HParser *p) HParsedToken * act_xrstm(const HParseResult *p, void *u) { - HParsedToken *bytes, *dict, *result; + HParsedToken *xrefs, *dict, *result; dict = H_INDEX_TOKEN(p->ast, 2, 0); - bytes = H_INDEX_TOKEN(p->ast, 2, 1); + xrefs = H_INDEX_TOKEN(p->ast, 2, 1); result = H_MAKE_SEQN(2); - result->seq->elements[0] = bytes; + result->seq->elements[0] = xrefs; result->seq->elements[1] = dict; result->seq->used = 2; return result; @@ -926,10 +1043,6 @@ parse_xrefs(const char *input, size_t sz, size_t *nxrefs) offset = (size_t)tok->sint; } - // XXX debug - //fprintf(stderr, "%s: %zu xref sections parsed\n", infile, n); - //for (size_t i = 0; i < n; i++) - // h_pprintln(stderr, xrefs[i]); end: *nxrefs = n; @@ -970,6 +1083,10 @@ main(int argc, char *argv[]) /* parse all cross-reference sections and trailer dictionaries */ xrefs = parse_xrefs(input, sz, &nxrefs); + // XXX debug + //fprintf(stderr, "%s: %zu xref sections parsed\n", infile, nxrefs); + //for (size_t i = 0; i < nxrefs; i++) + // h_pprintln(stderr, xrefs[i]); (void)xrefs; // shut up, gcc /* run the main parser */