diff --git a/pdf.c b/pdf.c
index 05d0d3d19cffca89e7dd58bd5c8c764a71daf2f5..cff81e4a32c15667f81b96a0f359ea1da82f8bf0 100644
--- a/pdf.c
+++ b/pdf.c
@@ -3326,22 +3326,80 @@ int read_lzw_buffer(void)
 }
 
 /* Table for storing sequences represented by an LZW code */
-char * lzw_code_table[4096];
-int next;
-uint64_t old;
+// XXX lookup is O(1) like this, but maybe memory use will be bad
+// XXX unify lzw_context_t and lzwspec
+
+typedef struct LZW_context_S
+{
+	/*
+	 * Table for storing sequences represented by an LZW code
+	 * 0-255, and 256 are special, representing literals, and the reset code. We could explicitly pre-fill them, but it's probably not necessary.
+	 */
+	const char * lzw_code_table[4096];
+
+	/*
+	 * Holds the next expected LZW code. We also use this for telling LZW_9bitcodeword, LZW_10bitcodeword, etc. apart. Parses fail if "next" is larger than what can be represented on that many bits.
+	 */
+	int next;
+
+	/*
+	 * Previous LZW code, used to construct the next string added to the table.
+	 */
+	uint64_t old;
+} LZW_context_T;
+
+void LZW_clear_table(LZW_context_T *ctx)
+{
+	/*
+	 *  Optimizations: since we leave the entries 0-257 empty, we don't need to free() them explicitly.
+	 *  And since codes are added to the table sequentially, we don't need to look past ctx->next;
+	 */
+	for(int i = 257; i < ctx->next; ++i)
+	{
+		const char * sequence = ctx->lzw_code_table[i];
+		if(sequence != NULL)
+		{
+			free(sequence);
+		}
+	}
+}
 
 HParser *p_lzwdata;
 
+/*
+ * First "code" in input. We output it literally, and set "old"
+ */
+HParsedToken*
+act_LZW_firstcode(const HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	uint64_t code = H_CAST_UINT(p->ast);
+	ctx->old = code;
+	return H_MAKE_BYTES(code, 1);
+}
+
+HParsedToken*
+act_LZW_clear(const HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	LZW_clear_table(ctx);
+	ctx->next = 258; // Caution: moving this before the call to LZW_clear_table() will cause a memory leak
+	return H_MAKE_BYTES(NULL, 0);
+}
+
+// XXX: validations
+// compare against expected next code, fail the parse if doesn't fit bit length
 
 // TODO: maybe a continuation can be used to remember the previous code
 // But then each codeword would need to get used as input twice
 HParsedToken*
-act_lzw_codeword(const HParseResult *p, void *u)
+act_LZW_codeword(const HParseResult *p, void *u)
 {
 	char * string;
 	char * output;
 	char * entry;
 	uint64_t code = H_CAST_UINT(p->ast);
+	LZW_context_T * ctx = (LZW_context_T *) u;
 
 
 	if(lzw_code_table[code] != NULL) // code is in the table
@@ -3353,6 +3411,7 @@ act_lzw_codeword(const HParseResult *p, void *u)
 		strncpy(output, entry, strlen(entry));
 		output[strlen(entry)] = postfix;
 		output[strlen(entry)+1] = '\0';
+		ctx->old = code;
 		return H_MAKE_BYTES(string, strlen(string));
 	}
 	else // code is not in the table
@@ -3364,14 +3423,18 @@ act_lzw_codeword(const HParseResult *p, void *u)
 		output[strlen(entry)] = postfix;
 		output[strlen(entry)+1] = '\0';
 		lzw_code_table[next] = output;
+		ctx->old = code;
 		return H_MAKE_BYTES(output, strlen(output)); //XXX: strlen and null-terminated strings may not be appropriate here. using fixed size strings would be preferable (HCountedArray?)
 	}
-	old = code;
 }
 
 void init_lzw_parser()
 {
-
+	H_RULE(LZW_9bitcodeword, h_nothing_p()); // XXX grammar
+	H_RULE(LZW_10bitcodeword, h_nothing_p());
+	H_RULE(LZW_11bitcodeword, h_nothing_p());
+	H_RULE(LZW_12bitcodeword, h_nothing_p());
+	H_ARULE(LZW_codeword, h_nothing_p());
 }
 
 HParseResult *