diff --git a/pdf.c b/pdf.c index 0569ac8a3beff00aa16ae8be435f8b1e021c2ced..f2121fca28c4e3790f2d77dcf7ddf5d2077e2d15 100644 --- a/pdf.c +++ b/pdf.c @@ -5407,7 +5407,11 @@ parse_xrefs(struct Env *aux) size_t maxObjNum = 0; Dict *trailer = NULL; - // XXX try formulating this as a parser using h_seek() + // Initialize the environment variables + aux->xrefs = xrefs; + aux->nxrefs = n; + + /* search for the "startxref" section from the back of the file */ HParser *p = h_left(p_startxref, h_end_p()); @@ -5417,20 +5421,28 @@ parse_xrefs(struct Env *aux) break; } if (res == NULL) { - fprintf(stderr, "%s: startxref not found\n", infile); - goto end; + fprintf(stderr, "VIOLATION[5]: startxref not found\n"); + return; } offset = H_INDEX_UINT(res->ast, 0); + // verify the offset recovered is bounded to be in the file + if ( (offset <=0) || (offset >= aux->sz) ) { + fprintf(stderr, "VIOLATION[5]: Invalid xref table offset = %ld. Valid range <0, %ld>\n", + offset, aux->sz); + return; + } + + for (;;) { //res = h_parse(p_xref, input + offset, sz - offset); p = NULL; p = h_right(h_seek(offset * 8, SEEK_SET), p_xref); // XXX res = h_parse(p, input, sz); if (res == NULL || res->ast == NULL || H_INDEX_TOKEN(res->ast, 0) == NULL) { - fprintf(stderr, "%s: error parsing xref section at " - "position %zu (%#zx)\n", infile, offset, offset); - break; + fprintf(stderr, "VIOLATION[5]: error parsing xref section at " + "position %zu (%#zx)\n", offset, offset); + return; } /* save this section in xrefs */ @@ -5484,24 +5496,36 @@ parse_xrefs(struct Env *aux) offset = (size_t)tok->sint; } -end: + + + // Make sure we parsed a valid trailer section + if (! trailer) { + fprintf(stderr, "VIOLATION[7]: Invalid Trailer Section or Trailer Section not found\n"); + return; + } + + + + // Process the document structure aux->xrefs = xrefs; aux->nxrefs = n; + + + if (n > maxObjNum) { fprintf(stderr, "%s: Number of xrefs found -%ld- " "Greater than specified /Size -%ld-.\n" - "Ignoring objects numberd greater than -%ld-!\n", + "Ignoring objects numbered greater than -%ld-!\n", infile, n, maxObjNum, n); aux->nxrefs = maxObjNum; } // Process the trailer dictionary - if (trailer) { // trailer==NULL or n==0 ==> xrefs were not parsed correctly - const HParsedToken *root = dictentry(trailer, "Root"); - assert(root->token_type == TT_Ref); - parse_catalog(aux, root); - } + const HParsedToken *root = dictentry(trailer, "Root"); + assert(root->token_type == TT_Ref); + parse_catalog(aux, root); + return; } @@ -5568,6 +5592,9 @@ main(int argc, char *argv[]) /* parse all cross-reference sections and trailer dictionaries */ parse_xrefs(&aux); + if ( (aux.nxrefs == 0) || (aux.xrefs == NULL) ) { + return 2; + } fprintf(stdout, "\n\nmain:: Done parsing xrefs and page tree. Starting main parser.\n\n");