From 534fd269ea1d68160d5e231b5a81f78af3f4784d Mon Sep 17 00:00:00 2001 From: "sumit.ray@baesystems.com" <sumit.ray@baesystems.com> Date: Mon, 26 Jul 2021 00:23:44 -0400 Subject: [PATCH] Broken -- contains hack to try to pass in stream length to act_ostm --- pdf.c | 131 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/pdf.c b/pdf.c index 31dab43..419e1b5 100644 --- a/pdf.c +++ b/pdf.c @@ -295,6 +295,27 @@ typedef struct { } Fontinfo_T; + +// Used by act_ostm to ensure indices are reasonable +typedef struct { + const Dict *dict; + size_t numobjs; + size_t stmlength; +} ostm_spec_T; + + +// Parser for object streams +HParser *p_objstm__m(HAllocator *, ostm_spec_T *); + +// Action for stream continuation +HParsedToken *act_ks_value(const HParseResult *p, void *u); +struct streamspec { + Dict *dict; /* stream dictionary */ + HParser *parser; /* data parser */ +}; + + + // *********************************************************** @@ -2247,6 +2268,7 @@ HParser *ktxtstream(HAllocator *, const HParsedToken *, void *); HParser *kcontentstream(HAllocator *, const HParsedToken *, void *); HParser *kbyteostream(HAllocator *, const HParsedToken *, void *); + void init_runlengthdecode_parser(struct Env *aux) { @@ -2996,8 +3018,9 @@ depred_png(struct predictor *pred, uint8_t *inp, size_t sz) return done; } +// SR:: Fix Hacky code - N is only useful for object streams HParseResult * -FlateDecode(const Dict *parms, HBytes b, HParser *p) +FlateDecode(const Dict *parms, HBytes b, HParser *p, size_t N) { size_t const BUFSIZE = 8 * 1024; uint8_t *buf; @@ -3098,10 +3121,15 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p) // XXX always return NULL on error? #else - // DEBUG -- will not always work depending on the font encoding used -// fprintf (stdout, "FlateDecode:: Inflated string (%lu):\n%.*s\n", pred.nout, (int)pred.nout, pred.out); - //DEBUG - + // DEBUG -- + // SR :: Hacky code for Object Streams + // If I understood Marcell correctly, I should be able to access the ostrm_spec + if (N > 0) { // object streams + ostm_spec_T *ospec = (ostm_spec_T *)(p->env + sizeof(HParser*) + sizeof(HAction)); + ospec->stmlength = pred.nout; + fprintf(stdout, "\n\nFlateDecode: %p, N = (%lu, %lu), stmlength = %lu\n", + ospec, N, ospec->numobjs, ospec->stmlength); + } res = h_parse(p, pred.out, pred.nout); free(pred.out); #endif @@ -3265,7 +3293,7 @@ int read_lzw_buffer(void) HParseResult * -LZWDecode(const Dict *parms, HBytes b, HParser *p) +LZWDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs) { struct predictor pred = {1, 1, 8, 1}; int (*depredict)(struct predictor *, uint8_t *, size_t); @@ -3343,7 +3371,7 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p) } HParseResult * -RunLengthDecode(const Dict *parms, HBytes b, HParser *p) +RunLengthDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs) { HParseResult *res; @@ -3366,7 +3394,7 @@ RunLengthDecode(const Dict *parms, HBytes b, HParser *p) * parms should be empty, because the filter has no parameters */ HParseResult * -ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p) +ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p, size_t numobjs) { HParseResult *f_res, *res; @@ -3397,7 +3425,7 @@ ASCIIHexDecode(const Dict *parms, HBytes b, HParser *p) * parms should be empty, because the filter has no parameters */ HParseResult* -ASCII85Decode(const Dict *parms, HBytes b, HParser *p) +ASCII85Decode(const Dict *parms, HBytes b, HParser *p, size_t numobjs) { HParseResult *f_res, *res; @@ -3473,16 +3501,6 @@ p_take__m(HAllocator *mm__, size_t n, struct Env *aux) } -// Parser for object streams -HParser *p_objstm__m(HAllocator *, const Dict *); - -// Action for stream continuation -HParsedToken *act_ks_value(const HParseResult *p, void *u); -struct streamspec { - Dict *dict; /* stream dictionary */ - HParser *parser; /* data parser */ -}; - /* @@ -3497,7 +3515,7 @@ struct streamspec { HParseResult * decode_contentstream(const Dict *d, HBytes b, HParser *p) { - HParseResult *(*filter)(const Dict *, HBytes, HParser *); + HParseResult *(*filter)(const Dict *, HBytes, HParser *, size_t); const Dict *parms = NULL; const HParsedToken *v; HParseResult *res = NULL; @@ -3534,7 +3552,7 @@ decode_contentstream(const Dict *d, HBytes b, HParser *p) else if (bytes_eq(v->bytes, "LZWDecode")) filter = LZWDecode; else { /* filter not supported */ - fprintf(stderr, "decode_stream:: Unsupported Filter [%.*s]\n", + fprintf(stderr, "decode_contentstream:: Unsupported Filter [%.*s]\n", (int)v->bytes.len, v->bytes.token); return NULL; /* Treat the stream as a byte array */ } @@ -3543,7 +3561,14 @@ decode_contentstream(const Dict *d, HBytes b, HParser *p) if (v && v->token_type == TT_Dict) parms = v->user; - res = filter(parms, b, p); + // SR:: Hacky code -- see if we are processing an object stream + // if we are, set N + size_t N = 0; + v = dictentry(d, "N"); + if ( (v) && (v->token_type == TT_SINT) ) { + N = v->sint; + } + res = filter(parms, b, p, N); /* Debug */ if (res){ @@ -4036,8 +4061,14 @@ kcontentstream(HAllocator *mm__, const HParsedToken *x, void *env) v = dictentry(dict, "Type"); if (v == NULL) // XXX -> custom type spec->parser = p_textstream; - else if ( (v->token_type == TT_BYTES) && bytes_eq(v->bytes, "ObjStm") ) - spec->parser = p_objstm__m(mm__, dict); + else if ( (v->token_type == TT_BYTES) && bytes_eq(v->bytes, "ObjStm") ) { + ostm_spec_T *ospec = h_alloc(mm__, sizeof(ostm_spec_T )); + ospec->dict = dict; + ospec->numobjs = 0; + ospec->stmlength = 0; + + spec->parser = p_objstm__m(mm__, ospec); + } else { fprintf(stdout, "kcontentstream: Not a text or object stream!\n"); return p_fail; @@ -4868,7 +4899,7 @@ parse_catalog(struct Env *aux, const HParsedToken *root) HParseResult * decode_stream(const Dict *d, HBytes b, HParser *p) { - HParseResult *(*filter)(const Dict *, HBytes, HParser *); + HParseResult *(*filter)(const Dict *, HBytes, HParser *, size_t); const Dict *parms = NULL; const HParsedToken *v; @@ -4908,7 +4939,14 @@ decode_stream(const Dict *d, HBytes b, HParser *p) if (v && v->token_type == TT_Dict) parms = v->user; - return filter(parms, b, p); + // SR:: Hacky code -- see if we are processing an object stream + // if we are, set N + size_t N = 0; + v = dictentry(d, "N"); + if ( (v) && (v->token_type == TT_SINT) ) { + N = v->sint; + } + return filter(parms, b, p, N); } @@ -4930,7 +4968,17 @@ p_stream_data__m(HAllocator *mm__, const Dict *dict, struct Env *aux) if (bytes_eq(v->bytes, "ObjStm")) { fprintf(stdout, "\np_stream_data__m:: Parsing object stream\n"); - return p_objstm__m(mm__, dict); + ostm_spec_T *ospec = h_alloc(mm__, sizeof(ostm_spec_T )); + ospec->dict = dict; + v = dictentry(dict, "N"); + if ( (v == NULL) || (v->token_type != TT_SINT)) { + fprintf(stderr, "\np_stream_data__m:: Missing # of objects -N- in stream!!\n"); + return NULL; + } + ospec->numobjs = v->sint; + ospec->stmlength = 0; + + return p_objstm__m(mm__, ospec); } if (bytes_eq(v->bytes, "XObject")) { @@ -5219,20 +5267,28 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict) HParsedToken * act_ostm(const HParseResult *p, void *u) { - assert(((HParsedToken *)u)->token_type == TT_SINT); - size_t N = ((HParsedToken *)u)->sint; + ostm_spec_T *spec = (ostm_spec_T *)u; + fprintf(stdout, "\nact_ostm:: N objects = %lu, Stream Length = %lu\n", + spec->numobjs, spec->stmlength); Objstm *ostrm = H_ALLOC(Objstm); - ostrm->numObjs = N; - ostrm->tok = h_arena_malloc(p->arena, N* sizeof(Objref_T)); + ostrm->numObjs = spec->numobjs; + ostrm->tok = h_arena_malloc(p->arena, ostrm->numObjs* sizeof(Objref_T)); ostrm->arena = p->arena; for (int i=0; i<ostrm->numObjs; i++) { const HParsedToken *num = H_FIELD_TOKEN(0, 2*i); - assert(num->token_type == TT_UINT); + const HParsedToken *offt = H_FIELD_TOKEN(0, 2*i+1); + assert((num->token_type == TT_UINT) && (offt->token_type == TT_UINT)); ostrm->tok[i].oid.nr = H_CAST_UINT(num); ostrm->tok[i].oid.gen = 0; ostrm->tok[i].obj = H_FIELD_TOKEN(1, i); + size_t offs = H_CAST_UINT(offt); + if (offs < 0) { // need to figure out a way to get the stream + fprintf(stdout, "\nact_ostm:: This should be warning and not generate an excption!\n\n"); + // not sure whether returning NULL will generate an exception + return NULL; + } } // const HCountedArray *indices = H_FIELD_SEQ(0); @@ -5248,24 +5304,29 @@ act_ostm(const HParseResult *p, void *u) } HParser * -p_objstm__m(HAllocator *mm__, const Dict *dict) +p_objstm__m(HAllocator *mm__, ostm_spec_T *spec) { const HParsedToken *v; size_t N; - v = dictentry(dict, "N"); + if ((!spec) || (!spec->dict)) { + fprintf(stdout, "\np_objstm__m: The object stream dictionary missing!\n"); + return p_fail; + } + v = dictentry(spec->dict, "N"); if (v == NULL || v->token_type != TT_SINT || v->sint < 0 || (uint64_t)v->sint > SIZE_MAX) { fprintf(stderr, "p_objstm__m: missing /N on object stream\n"); return p_fail; } N = v->sint; + spec->numobjs = N; HParser *wel_ws = h_sequence__m(mm__, p_wel, p_ws, NULL); HParser *idx = p_sepBy_n__m(mm__, p_npair, wel_ws, N); HParser *p_ostm = h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL); - HParser *ostm_p = h_action__m(mm__, p_ostm, act_ostm, (void *)v); + HParser *ostm_p = h_action__m(mm__, p_ostm, act_ostm, (void *)spec); return ostm_p; // XXX leading and trailing ws OK? -- GitLab