diff --git a/pdf.c b/pdf.c index 446699f97bb227cb4dbb35e3cd7814fdf98c6c79..50186a4098ee6e8d1ab54548c6611c1ea921e323 100644 --- a/pdf.c +++ b/pdf.c @@ -1618,6 +1618,215 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p) return res; } +/* LZW helpers */ + +typedef struct +{ + uint8_t *lzw_buf; + size_t total_buf_size; + size_t write_head; + size_t write_tail; + uint8_t write_checksum; + size_t eof_loc; + + HBytes *input_stream; + size_t read_head; + size_t read_tail; + uint8_t read_checksum; +} lzwspec; + +lzwspec *cur_lzw_spec; + +/* used by write_lzw_buffer to get more space for decoding if needed */ +void +grow_lzw_buffer(size_t amount) +{ + uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t)); + if(ret_buf != NULL) + { + cur_lzw_spec->total_buf_size += amount; + cur_lzw_spec->lzw_buf = ret_buf; + } + else + { + fprintf(stderr, "LZWDecode: h_arena_realloc() failed"); + return; + } +} + +lzwspec * +new_lzw_spec(HBytes *bytes) +{ + size_t const BUFSIZE = sizeof(uint8_t) * 1024; + lzwspec *ret = malloc(sizeof(lzwspec)); + ret->input_stream = bytes; + ret->lzw_buf = malloc(BUFSIZE); + ret->total_buf_size = BUFSIZE; + return ret; +} + +void +delete_lzw_spec(lzwspec *spec) +{ + free(spec->lzw_buf); + free(spec); +} + +void +bind_lzw_spec(lzwspec *spec) +{ + cur_lzw_spec = spec; +} + + +#include "lzw-lib.h" + +/* Buffer writer function for the lzw-ab implementation, with a fixed signature. + * Although the type is defined as int, it is expected to write one byte at a time. + * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */ + +void +write_lzw_buffer(int value) +{ + size_t const BUFSIZE = sizeof(uint8_t) * 1024; + + if(!cur_lzw_spec->lzw_buf) + { + fprintf(stderr, "LZWDecode: lzw_buf is null!"); + assert(cur_lzw_spec->lzw_buf != NULL); + } + + assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size); + + if (value == EOF) { + cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value; + cur_lzw_spec->eof_loc = cur_lzw_spec->write_head; + cur_lzw_spec->write_head++; + return; + } + + /* We can get away with this cast due to writing single bytes. */ + cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value; + + /* If you looked at lzw-ab's code, the write head is reset here + * This function uses write_head as the offset of the last written item */ + if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size) + { + grow_lzw_buffer(BUFSIZE); + } + + cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value; +} + + +/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec + * with bind_lzw_spec() */ +int read_lzw_buffer(void) +{ + uint8_t byte_read; + int ret_value; + + /* Input data is already waiting in the buffer */ + if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail) + cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len; + + if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail) + { + byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++]; + cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read; + ret_value = byte_read; + } + else + ret_value = EOF; + + return ret_value; +} + + +HParseResult * +LZWDecode(const Dict *parms, HBytes b, HParser *p) +{ + struct predictor pred = {1, 1, 8, 1}; + int (*depredict)(struct predictor *, uint8_t *, size_t); + HParseResult *res; + int done; + int ret; + const HParsedToken *v; + + /* set up the predictor (if any) */ + #define SETPARM(VAR,STR) do { \ + v = dictentry(parms, (STR)); \ + if (v != NULL) { \ + if (v->token_type != TT_SINT || v->sint < 0) \ + return NULL; \ + VAR = v->sint; \ + } } while(0) + SETPARM(pred.num, "Predictor"); + SETPARM(pred.colors, "Colors"); + SETPARM(pred.bpc, "BitsPerComponent"); + SETPARM(pred.columns, "Columns"); + #undef SETPARM + if (pred.num == 1) + depredict = depred_none; + else { + if (pred.num >= 10 && pred.num <= 15) + depredict = depred_png; + else if (pred.num == 2) { + /* for 8-bpc TIFF pred. 2, we can reuse PNG Sub */ + if (pred.bpc == 8) { + pred.predfun = pp_sub; /* predict left */ + depredict = depred_png; + } else { + // XXX add general TIFF predictor (bpc != 8) + fprintf(stderr, "LZWDecode: /Predictor %d " + "not supported for /BitsPerComponent %d\n", + pred.num, pred.bpc); + return NULL; + } + } else { + fprintf(stderr, "LZWDecode: /Predictor %d" + " not supported\n", pred.num); + return NULL; + } + + /* allocate row buffer */ + if (pred.columns > (INT_MAX - 7) / pred.colors / pred.bpc) { + fprintf(stderr, "LZWDecode: overflow\n"); + return NULL; + } + pred.rowsz = (pred.colors * pred.bpc * pred.columns + 7) / 8; + pred.buf = calloc(1, pred.rowsz); + if (pred.buf == NULL) + err(1, "LZWDecode"); + } + + lzwspec *lzw_spec = new_lzw_spec(&b); + bind_lzw_spec(lzw_spec); + + ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer); + if (ret) { + fprintf(stderr, "lzw_decompress: error (%d)\n", ret); + assert(!"LZWDecode: failed to decompress"); + } + done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1); + + if(!done) + { + fprintf(stderr, "LZWDecode: unexpected end of input (depred returns 0, but there are no more bytes"); + } + + res = h_parse(p, pred.out, pred.nout); + free(pred.out); + + bind_lzw_spec(NULL); + delete_lzw_spec(lzw_spec); + + assert(res->ast && res->ast->token_type == TT_BYTES); + res = h_parse(p, res->ast->bytes.token, res->ast->bytes.len); + + return res; +} + HParseResult * RunLengthDecode(const Dict *parms, HBytes b, HParser *p) {