From f432fa306ad37a634aad7390cc08cb617e37e488 Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Tue, 26 Oct 2021 20:57:59 +0200
Subject: [PATCH] EarlyChange support

---
 lzw.c | 63 +++++++++++++++++++++++------------------------------------
 lzw.h |  7 ++++---
 pdf.c | 17 +++++++++++-----
 3 files changed, 40 insertions(+), 47 deletions(-)

diff --git a/lzw.c b/lzw.c
index c80eefd..304c88d 100644
--- a/lzw.c
+++ b/lzw.c
@@ -1,7 +1,6 @@
 /* Table for storing sequences represented by an LZW code */
 // XXX lookup is O(1) like this, but maybe memory use will be bad
 // XXX unify lzw_context_t and lzwspec
-// XXX EarlyChange support
 #include <hammer/hammer.h>
 #include <hammer/glue.h>
 // malloc, free
@@ -10,30 +9,18 @@
 #include <string.h>
 
 #include "lzw.h"
-//XXX lzw.h
-/*
-typedef struct LZW_context_S
-{
-	*
-	 * Table for storing sequences represented by an LZW code
-	 * 0-255, and 256 are special, representing literals, and the reset code. We could explicitly pre-fill them, but it's probably not necessary.
-	 *
-	HBytes * lzw_code_table[4096];
-
-	*
-	 * Holds the next expected LZW code. We also use this for telling LZW_9bitcodeword, LZW_10bitcodeword, etc. apart. Parses fail if "next" is larger than what can be represented on that many bits.
-	 *
-	int next;
-
-	*
-	 * Previous LZW code, used to construct the next string added to the table.
-	 *
-	uint64_t old;
-} LZW_context_T;
-*/
 
 FILE *debug; // DEBUG
 
+#define BITLIMIT_9 (ctx->earlychange ? 511 : 512)
+#define BITLIMIT_10 (ctx->earlychange ? 1023 : 1024)
+#define BITLIMIT_11 (ctx->earlychange ? 2047 : 2048)
+/*
+ *  Since bit lengths larger than 12 aren't allowed, EarlyChange doesn't matter here.
+ */
+#define BITLIMIT_12 4096
+
+
 void LZW_clear_table(LZW_context_T *ctx)
 {
 	/*
@@ -113,9 +100,9 @@ validate_LZW_9bitcodeword(HParseResult *p, void *u)
 	LZW_context_T * ctx = (LZW_context_T *) u;
 	//fprintf(debug, "9 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
-	if (ctx->next < 511) // DEBUG
+	if (ctx->next < BITLIMIT_9) // DEBUG
 		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
-	return (ctx->next < 511); // XXX: parameterize codeword boundaries via EarlyChange
+	return (ctx->next < BITLIMIT_9);
 }
 
 bool
@@ -124,9 +111,9 @@ validate_LZW_10bitcodeword(HParseResult *p, void *u)
 	LZW_context_T * ctx = (LZW_context_T *) u;
 	//fprintf(debug, "10 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
-	if (ctx->next >= 511 && ctx->next < 1023) // DEBUG
+	if (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10) // DEBUG
 		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
-	return (ctx->next >= 511 && ctx->next < 1023);
+	return (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10);
 }
 
 bool
@@ -135,9 +122,9 @@ validate_LZW_11bitcodeword(HParseResult *p, void *u)
 	LZW_context_T * ctx = (LZW_context_T *) u;
 	//fprintf(debug, "11 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
-	if (ctx->next >= 1023 && ctx->next < 2047) // DEBUG
+	if (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11) // DEBUG
 		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
-	return (ctx->next >= 1023 && ctx->next < 2047);
+	return (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11);
 }
 
 bool
@@ -146,9 +133,9 @@ validate_LZW_12bitcodeword(HParseResult *p, void *u)
 	LZW_context_T * ctx = (LZW_context_T *) u;
 	//fprintf(debug, "12 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
-	if (ctx->next >= 2047 && ctx->next < 4095) // DEBUG
+	if (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12) // DEBUG
 		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
-	return (ctx->next >= 2047 && ctx->next < 4095);
+	return (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12);
 }
 
 bool
@@ -158,7 +145,7 @@ validate_LZW_9bitlitspec(HParseResult *p, void *u)
 	//fprintf(debug, "9 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
 	uint64_t code = H_CAST_UINT(p->ast);
-	return (ctx->next < 511 && code < 258);
+	return (ctx->next < BITLIMIT_9 && code < 258);
 }
 
 bool
@@ -168,7 +155,7 @@ validate_LZW_10bitlitspec(HParseResult *p, void *u)
 	//fprintf(debug, "10 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
 	uint64_t code = H_CAST_UINT(p->ast);
-	return (ctx->next >= 511 && ctx->next < 1023 && code < 258);
+	return (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10 && code < 258);
 }
 
 bool
@@ -178,7 +165,7 @@ validate_LZW_11bitlitspec(HParseResult *p, void *u)
 	//fprintf(debug, "11 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
 	uint64_t code = H_CAST_UINT(p->ast);
-	return (ctx->next >= 1023 && ctx->next < 2047 && code < 258);
+	return (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11 && code < 258);
 }
 
 bool
@@ -188,7 +175,7 @@ validate_LZW_12bitlitspec(HParseResult *p, void *u)
 	//fprintf(debug, "12 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fflush(debug); // DEBUG
 	uint64_t code = H_CAST_UINT(p->ast);
-	return (ctx->next >= 2047 && ctx->next < 4095 && code < 258);
+	return (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12 && code < 258);
 }
 
 bool
@@ -429,6 +416,7 @@ void init_LZW_parser()
 		lit->len = 1;
 		context->lzw_code_table[i] = lit;
 	}
+	context->earlychange = 1;
 
 	H_VDRULE(LZW_9bitcodeword, h_bits(9, false), context);
 	H_VDRULE(LZW_10bitcodeword, h_bits(10, false), context);
@@ -466,12 +454,8 @@ HParseResult* parse_LZW_data(const uint8_t* input, size_t length)
 	return res;
 }
 
-void set_LZW_context(LZW_context_T *ctx)
-{
-	*context = *ctx; // XXX unnecessary, just clear context before each parse
-}
 
-void clear_LZW_context()
+void init_LZW_context(int earlychange)
 {
 	for(int i = 258; i < 4096; ++i)
 	{
@@ -484,4 +468,5 @@ void clear_LZW_context()
 	}
 	context->next = 258;
 	context->old = 257; //XXX: guaranteed to segfault if old isn't set before
+	context->earlychange = earlychange;
 }
diff --git a/lzw.h b/lzw.h
index d4f4308..2525004 100644
--- a/lzw.h
+++ b/lzw.h
@@ -25,14 +25,15 @@ typedef struct LZW_context_S
 	/*
 	 * EarlyChange = 1 means the bit size is increased "one code early" (Early change = 0 is "code length increases shall be postponed as long as possible"
 	 */
-	//int earlychange;
+	int earlychange;
 } LZW_context_T;
 
 HParser * p_lzwdata; // XXX can be internal
 
 void init_LZW_parser();
 HParseResult * parse_LZW_data(const uint8_t* input, size_t length);
-void set_LZW_context(LZW_context_T *ctx);
-void clear_LZW_context();
+//void set_LZW_context(LZW_context_T *ctx);
+//void clear_LZW_context();
+void init_LZW_context(int earlychange);
 
 #endif // PDF_LZW_H
diff --git a/pdf.c b/pdf.c
index 8e38bbb..976433a 100644
--- a/pdf.c
+++ b/pdf.c
@@ -3337,6 +3337,7 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 	int done;
 	//int ret;
 	const HParsedToken *v;
+	int earlychange;
 
 	/* set up the predictor (if any) */
 	#define SETPARM(VAR,STR) do {					\
@@ -3385,6 +3386,16 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 			err(1, "LZWDecode");
 	}
 
+	v = dictentry(parms, "EarlyChange");
+	if(v != NULL && v->token_type == TT_SINT && v->sint == 0)
+	{
+		earlychange = 0;
+	}
+	else
+	{
+		earlychange = 1;
+	}
+
 	//lzwspec *lzw_spec = new_lzw_spec(&b);
 	//bind_lzw_spec(lzw_spec);
 
@@ -3395,12 +3406,8 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 	//}
 	//done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
 	//assert(!done);	// XXX ITERATIVE
-	LZW_context_T * ctx = malloc(sizeof(LZW_context_T));
-	ctx->next = 258;
-	clear_LZW_context();
+	init_LZW_context(earlychange);
 	tmp_res = parse_LZW_data(b.token, b.len);
-	//clear_LZW_context();
-	free(ctx);
 
 	if(!tmp_res)
 	{
-- 
GitLab