From a0874f2656316bca9042b7c7a1926c2ae75e33c5 Mon Sep 17 00:00:00 2001
From: "Sven M. Hallberg" <pesco@khjk.org>
Date: Tue, 28 Jan 2020 23:06:03 +0100
Subject: [PATCH] handle linearized PDFs with updates

---
 pdf.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pdf.c b/pdf.c
index 550f6ee..6f342c6 100644
--- a/pdf.c
+++ b/pdf.c
@@ -1247,7 +1247,7 @@ parse_xrefs(const char *input, size_t sz, size_t *nxrefs)
 	HParseResult *res = NULL;
 	const HParsedToken **xrefs = NULL;	/* empty result */
 	const HParsedToken *tok = NULL;
-	size_t n = 0;
+	size_t n = 0, nfwd = 0;
 	size_t offset = 0;
 
 	// XXX try formulating this as a parser using h_seek()
@@ -1296,10 +1296,12 @@ parse_xrefs(const char *input, size_t sz, size_t *nxrefs)
 		 * validate the new offset. we don't want to get caught in a
 		 * loop. the offsets should strictly decrease, unless the file
 		 * is a "linearized" PDF. in that case there should be exactly
-		 * two xref sections in the reverse order, so we allow the
-		 * first section to point forward.
+		 * one xref section at the beginning of the file that is
+		 * allowed to point forward.
 		 */
-		if (n > 1 && tok->sint >= offset) {
+		if (tok->sint >= offset)
+			nfwd++;
+		if (nfwd > 1) {
 			fprintf(stderr, "%s: /Prev pointer of xref section at "
 			    "%zu (%#zx) points forward\n", infile, offset,
 			    offset);
-- 
GitLab