From a1014f81d804955bb38b434865b733271aa3d7a7 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Thu, 30 Mar 2023 16:52:28 +0000 Subject: [PATCH] improve handling of parse errors in xref stream data Improve on the bugfix in commit a5abf1e2: - Reinstate the assert for 'res->ast != NULL'. If it fails, there is a bug in the parser, not an error in the input file. - Provide a distinct error message for the case where p_xref fails on a cross-reference stream because of invalid data. - Only skip storing the invalid section. Try to follow the /Prev entry in the stream dictionary to find more sections. --- pdf.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pdf.c b/pdf.c index afd483c..b377e34 100644 --- a/pdf.c +++ b/pdf.c @@ -4984,19 +4984,26 @@ parse_xrefs(const uint8_t *input, size_t sz, size_t *nxrefs) for (;;) { assert(offset <= sz); res = h_parse(p_xref, input + offset, sz - offset); - if (res == NULL || res->ast == NULL || H_INDEX_TOKEN(res->ast, 0) == NULL) { + if (res == NULL) { log_message(5, "%s: error parsing xref section at " "position %zu (%#zx)\n", infile, offset, offset); break; } + assert(res->ast != NULL); - /* save this section in xrefs */ - if (n >= SIZE_MAX / sizeof(HParsedToken *)) - errx(2, "parse_xrefs: realloc: size would overflow"); - xrefs = realloc(xrefs, (n + 1) * sizeof(HParsedToken *)); - if (xrefs == NULL) - err(2, "parse_xrefs"); - xrefs[n++] = res->ast; + if (H_INDEX_TOKEN(res->ast, 0) == NULL) { + log_message(5, "%s: error parsing xref stream data at " + "position %zu (%#zx)\n", infile, offset, offset); + /* skip this section, but continue following /Prev */ + } else { + /* data is valid, save this section in xrefs */ + if (n >= SIZE_MAX / sizeof(HParsedToken *)) + errx(2, "parse_xrefs: realloc: size overflow"); + xrefs = realloc(xrefs, (n + 1) * sizeof *xrefs); + if (xrefs == NULL) + err(2, "parse_xrefs"); + xrefs[n++] = res->ast; + } /* look up the next offset (to the previous xref section) */ tok = dictentry(H_INDEX(Dict, res->ast, 1), "Prev"); -- GitLab