From ddf109c3328e811604ec75a9834861aefc0d6b40 Mon Sep 17 00:00:00 2001 From: Pompolic <pompolic@special-circumstanc.es> Date: Mon, 21 Feb 2022 18:51:40 +0100 Subject: [PATCH] Workaround: avoid crashing if /Contents in a page object is an indirect array --- pdf.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/pdf.c b/pdf.c index 817fdcd..b78944e 100644 --- a/pdf.c +++ b/pdf.c @@ -4747,20 +4747,33 @@ void parse_pagenode ( parse_rsrcdict(arena, contents, myNode, aux); //pp_ptnode(stdout, myNode); } + /* Indirect reference to an array, which may in turn have indirect references */ + // XXX: possible refactor: the branch with contents_t being a TT_SEQUENCE is meant to do the same thing, could be factored out into a function of its own + else if (contents->token_type == TT_SEQUENCE) + { + myNode->pn.textStream = NULL; + goto end; //XXX: This avoids crashing + } else { // DEBUG //fprintf(stdout, "\n\nparse_pagenode: What is token 0 anyway?\n"); - HParsedToken *tok0 = H_INDEX_TOKEN(contents, 0); - h_pprintln(stdout, tok0); + if(contents->seq->used >= 1) + { + HParsedToken *tok0 = H_INDEX_TOKEN(contents, 0); + h_pprintln(stdout, tok0); + } - HParsedToken *res_strm = H_INDEX_TOKEN(contents, 1); - if (res_strm->token_type == TT_SEQUENCE) { // this seems like a big assumption - myNode->pn.textStream = res_strm; + if(contents->seq->used >= 2) + { + HParsedToken *res_strm = H_INDEX_TOKEN(contents, 1); + if (res_strm->token_type == TT_SEQUENCE) { // this seems like a big assumption + myNode->pn.textStream = res_strm; - //fprintf(stdout, "parse_pagenode: Page node contents = %p\n", (void *)contents); + //fprintf(stdout, "parse_pagenode: Page node contents = %p\n", (void *)contents); + } + else + myNode->pn.textStream = NULL; } - else - myNode->pn.textStream = NULL; } } else if (contents_t->token_type == TT_SEQUENCE) { -- GitLab