From 517b81ad9afe5c322e4f04aa94c0c7e6ffe267f6 Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Thu, 10 Feb 2022 20:17:24 +0100
Subject: [PATCH] Merge fix for double-free

Original commit messages:
commit 8a1b05c3a8b6a46dee59e4fd48a0eea2c09a91a4
Author: sumit.ray@baesystems.com <sumit.ray@baesystems.com>
Date:   Fri Feb 4 16:19:35 2022 -0500

    Removed check for null trailer - files using xref content streams may not have a the keyword trailer

commit 98378f2603fd1b4b93ca06d06b079ed81f284e0c
Author: sumit.ray@baesystems.com <sumit.ray@baesystems.com>
Date:   Fri Feb 4 10:25:15 2022 -0500

    Added commentary for a semantically more meaningful error message when the xref offset specified is outside bounds

commit 7cc9be2beea026219f6c2b851eaac0d5bf8d8cba
Author: sumit.ray@baesystems.com <sumit.ray@baesystems.com>
Date:   Thu Feb 3 18:14:55 2022 -0500

    Fix for double-free - added guard for invalid trailer section
---
 pdf.c | 53 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 13 deletions(-)

diff --git a/pdf.c b/pdf.c
index 0569ac8..f2121fc 100644
--- a/pdf.c
+++ b/pdf.c
@@ -5407,7 +5407,11 @@ parse_xrefs(struct Env *aux)
 	size_t maxObjNum = 0;
 	Dict *trailer = NULL;
 
-	// XXX try formulating this as a parser using h_seek()
+	// Initialize the environment variables
+	aux->xrefs = xrefs;
+	aux->nxrefs = n;
+
+
 
 	/* search for the "startxref" section from the back of the file */
 	HParser *p = h_left(p_startxref, h_end_p());
@@ -5417,20 +5421,28 @@ parse_xrefs(struct Env *aux)
 			break;
 	}
 	if (res == NULL) {
-		fprintf(stderr, "%s: startxref not found\n", infile);
-		goto end;
+		fprintf(stderr, "VIOLATION[5]: startxref not found\n");
+		return;
 	}
 	offset = H_INDEX_UINT(res->ast, 0);
 
+	// verify the offset recovered is bounded to be in the file
+	if ( (offset <=0) || (offset >= aux->sz) ) {
+		fprintf(stderr, "VIOLATION[5]: Invalid xref table offset = %ld. Valid range <0, %ld>\n",
+				offset, aux->sz);
+		return;
+	}
+
+
 	for (;;) {
 		//res = h_parse(p_xref, input + offset, sz - offset);
 		p = NULL;
 		p = h_right(h_seek(offset * 8, SEEK_SET), p_xref);	// XXX
 		res = h_parse(p, input, sz);
 		if (res == NULL || res->ast == NULL || H_INDEX_TOKEN(res->ast, 0) == NULL) {
-			fprintf(stderr, "%s: error parsing xref section at "
-			    "position %zu (%#zx)\n", infile, offset, offset);
-			break;
+			fprintf(stderr, "VIOLATION[5]: error parsing xref section at "
+			    "position %zu (%#zx)\n", offset, offset);
+			return;
 		}
 
 		/* save this section in xrefs */
@@ -5484,24 +5496,36 @@ parse_xrefs(struct Env *aux)
 		offset = (size_t)tok->sint;
 	}
 
-end:
+
+
+	// Make sure we parsed a valid trailer section
+	if (! trailer) {
+		fprintf(stderr, "VIOLATION[7]: Invalid Trailer Section or Trailer Section not found\n");
+		return;
+	}
+
+
+
+	// Process the document structure
 	aux->xrefs = xrefs;
 	aux->nxrefs = n;
+
+
+
 	if (n > maxObjNum) {
 		fprintf(stderr, "%s: Number of xrefs found -%ld- "
 						"Greater than specified /Size -%ld-.\n"
-						"Ignoring objects numberd greater than -%ld-!\n",
+						"Ignoring objects numbered greater than -%ld-!\n",
 						infile, n, maxObjNum, n);
 		aux->nxrefs = maxObjNum;
 	}
 
 
 	// Process the trailer dictionary
-	if (trailer) { // trailer==NULL or n==0 ==> xrefs were not parsed correctly
-		const HParsedToken *root = dictentry(trailer, "Root");
-		assert(root->token_type == TT_Ref);
-		parse_catalog(aux, root);
-	}
+	const HParsedToken *root = dictentry(trailer, "Root");
+	assert(root->token_type == TT_Ref);
+	parse_catalog(aux, root);
+
 	return;
 }
 
@@ -5568,6 +5592,9 @@ main(int argc, char *argv[])
 	/* parse all cross-reference sections and trailer dictionaries */
 	parse_xrefs(&aux);
 
+	if ( (aux.nxrefs == 0) || (aux.xrefs == NULL) ) {
+		return 2;
+	}
 	fprintf(stdout, "\n\nmain:: Done parsing xrefs and page tree. Starting main parser.\n\n");
 
 
-- 
GitLab