diff --git a/Makefile b/Makefile
index 0aaf446eb971efa7ca3f7513a4b005037e7943cd..04612316734667ac8e9f41fee599c36cad1de7a1 100644
--- a/Makefile
+++ b/Makefile
@@ -12,7 +12,7 @@ HAMMER_INCLUDE = .
 HAMMER_LIB = ./lib
 CFLAGS += -I$(HAMMER_INCLUDE)   # (-pg :: profile using gprof) (-g :: debug info)
 LDFLAGS += -L$(HAMMER_LIB)
-SOURCES = pdf.c lzw-lib.c
+SOURCES = pdf.c lzw.c
 
 .PHONY: all test clean
 all: pdf
diff --git a/lzw-ab-license.txt b/lzw-ab-license.txt
deleted file mode 100644
index 65d4a2e4b96304208852290e3d3bf6ee7ce3dde8..0000000000000000000000000000000000000000
--- a/lzw-ab-license.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-                       Copyright (c) David Bryant
-                          All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright notice,
-      this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-    * Neither the name of Conifer Software nor the names of its contributors
-      may be used to endorse or promote products derived from this software
-      without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/lzw-lib.c b/lzw-lib.c
deleted file mode 100644
index 38ecd30a3ed38605d2cb752efb66dc8ecb4861c6..0000000000000000000000000000000000000000
--- a/lzw-lib.c
+++ /dev/null
@@ -1,318 +0,0 @@
-////////////////////////////////////////////////////////////////////////////
-//                            **** LZW-AB ****                            //
-//               Adjusted Binary LZW Compressor/Decompressor              //
-//                     Copyright (c) 2016 David Bryant                    //
-//                           All Rights Reserved                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "lzw-lib.h"
-
-/* This library implements the LZW general-purpose data compression algorithm.
- * The algorithm was originally described as a hardware implementation by
- * Terry Welsh here:
- *
- *   Welch, T.A. â€œA Technique for High-Performance Data Compression.â€
- *   IEEE Computer 17,6 (June 1984), pp. 8-19.
- *
- * Since then there have been enumerable refinements and variations on the
- * basic technique, and this implementation is no different. The target of
- * the present implementation is embedded systems, and so emphasis was placed
- * on simplicity, fast execution, and minimal RAM usage.
- *
- * The symbols are stored in adjusted binary, which provides considerably
- * better compression performance with virtually no speed penalty compared to
- * the fixed sizes normally used. To ensure good performance on data with
- * varying characteristics (like executable images) the encoder resets as
- * soon as the dictionary is full. Also, worst-case performance is limited
- * to about 8% inflation by catching poor performance and forcing an early
- * reset before longer symbols are sent.
- *
- * The maximum symbol size is configurable on the encode side (from 9 bits
- * to 12 bits) and determines the RAM footprint required by both sides and,
- * to a large extent, the compression performance. This information is
- * communicated to the decoder in the first stream byte so that it can
- * allocate accordingly. The RAM requirements are as follows:
- *
- *    maximum    encoder RAM   decoder RAM
- *  symbol size  requirement   requirement
- * -----------------------------------------
- *     9-bit     1792 bytes    1024 bytes
- *    10-bit     4352 bytes    3072 bytes
- *    11-bit     9472 bytes    7168 bytes
- *    12-bit     19712 bytes   15360 bytes
- * 
- * This implementation uses malloc(), but obviously an embedded version could
- * use static arrays instead if desired (assuming that the maxbits was
- * controlled outside).
- */
-
-#define NULL_CODE       -1      // indicates a NULL prefix
-#define CLEAR_CODE      256     // code to flush dictionary and restart decoder
-#define EOD_CODE        257     // used in PDF's LZWDecode to signal end of data
-#define FIRST_STRING    258     // code of first dictionary string, PDF edition
-
-/* This macro writes the adjusted-binary symbol "code" given the maximum
- * symbol "maxcode". A macro is used here just to avoid the duplication in
- * the lzw_compress() function. The idea is that if "maxcode" is not one
- * less than a power of two (which it rarely will be) then this code can
- * often send fewer bits that would be required with a fixed-sized code.
- *
- * For example, the first code we send will have a "maxcode" of 257, so
- * every "code" would normally consume 9 bits. But with adjusted binary we
- * can actually represent any code from 0 to 253 with just 8 bits -- only
- * the 4 codes from 254 to 257 take 9 bits.
- */
-
-#define WRITE_CODE(code,maxcode) do {                           \
-    int code_bits = (maxcode) < 1024 ?                          \
-        ((maxcode) < 512 ? 8 : 9) :                             \
-        ((maxcode) < 2048 ? 10 : 11);                           \
-    int extras = (1 << (code_bits + 1)) - (maxcode) - 1;        \
-    if ((code) < extras) {                                      \
-        shifter |= ((long)(code) << bits);                      \
-        bits += code_bits;                                      \
-    }                                                           \
-    else {                                                      \
-        shifter |= ((long)(((code) + extras) >> 1) << bits);    \
-        bits += code_bits;                                      \
-        shifter |= ((long)(((code) + extras) & 1) << bits++);   \
-    }                                                           \
-    do { (*dst)(shifter); shifter >>= 8; output_bytes++;        \
-    } while ((bits -= 8) >= 8);                                 \
-} while (0)
-
-/* LZW compression function. Bytes (8-bit) are read and written through callbacks and the
- * "maxbits" parameter specifies the maximum symbol size (9-12), which in turn determines
- * the RAM requirement and, to a large extent, the level of compression achievable. A return
- * value of EOF from the "src" callback terminates the compression process. A non-zero return
- * value indicates one of the two possible errors -- bad "maxbits" param or failed malloc().
- */
-
-int lzw_compress (void (*dst)(int), int (*src)(void), int maxbits)
-{
-    int next = FIRST_STRING, prefix = NULL_CODE, bits = 0, total_codes, c;
-    unsigned long input_bytes = 0, output_bytes = 0;
-    short *first_references, *next_references;
-    unsigned char *terminators;
-    unsigned long shifter = 0;
-
-    if (maxbits < 9 || maxbits > 12)    // check for valid "maxbits" setting
-        return 1;
-
-    // based on the "maxbits" parameter, compute total codes and allocate dictionary storage
-
-    total_codes = 1 << maxbits;
-    first_references = malloc (total_codes * sizeof (first_references [0]));
-    next_references = malloc ((total_codes - 256) * sizeof (next_references [0]));
-    terminators = malloc ((total_codes - 256) * sizeof (terminators [0]));
-
-    if (!first_references || !next_references || !terminators)
-        return 1;                       // failed malloc()
-
-    // clear the dictionary
-
-    memset (first_references, 0, total_codes * sizeof (first_references [0]));
-    memset (next_references, 0, (total_codes - 256) * sizeof (next_references [0]));
-    memset (terminators, 0, (total_codes - 256) * sizeof (terminators [0]));
-
-    (*dst)(maxbits - 9);    // first byte in output stream indicates the maximum symbol bits
-
-    // This is the main loop where we read input bytes and compress them. We always keep track of the
-    // "prefix", which represents a pending byte (if < 256) or string entry (if >= FIRST_STRING) that
-    // has not been sent to the decoder yet. The output symbols are kept in the "shifter" and "bits"
-    // variables and are sent to the output every time 8 bits are available (done in the macro).
-
-    while ((c = (*src)()) != EOF) {
-        int cti;                            // coding table index
-
-        input_bytes++;
-
-        if (prefix == NULL_CODE) {          // this only happens the very first byte when we don't yet have a prefix
-            prefix = c;
-            continue;
-        }
-
-        if ((cti = first_references [prefix])) {    // if any longer strings are built on the current prefix...
-            while (1)
-                if (terminators [cti - 256] == c) { // we found a matching string, so we just update the prefix
-                    prefix = cti;                   // to that string and continue without sending anything
-                    break;
-                }
-                else if (!next_references [cti - 256]) {    // this string did not match the new character and
-                    next_references [cti - 256] = next;     // there aren't any more, so we'll add a new string
-                    cti = 0;                                // and point to it with "next_reference"
-                    break;
-                }
-                else
-                    cti = next_references [cti - 256];      // there are more possible matches to check, so loop back
-        }
-        else                                        // no longer strings are based on the current prefix, so now
-            first_references [prefix] = next;       // the current prefix plus the new byte will be the next string
-
-        // If "cti" is zero, we could not simply extend our "prefix" to a longer string because we did not find a
-        // dictionary match, so we send the symbol representing the current "prefix" and add the new string to the
-        // dictionary. Since the current byte "c" was not included in the prefix, that now becomes our new prefix.
-
-        if (!cti) {
-            WRITE_CODE (prefix, next);              // send symbol for current prefix (0 to next-1)
-            terminators [next - 256] = c;           // newly created string has current byte as the terminator
-            prefix = c;                             // current byte also becomes new prefix for next string
-
-            // This is where we bump the next string index and decide whether to clear the dictionary and start over.
-            // The triggers for that are either the dictionary is full or we've been outputting too many bytes and
-            // decide to cut our losses before the symbols get any larger. Note that for the dictionary full case we
-            // do NOT send the CLEAR_CODE because the decoder knows about this and we don't want to be redundant.
-
-            if (++next == total_codes || output_bytes > 8 + input_bytes + (input_bytes >> 4)) {
-                if (next < total_codes)
-                    WRITE_CODE (CLEAR_CODE, next);
-
-                // clear the dictionary and reset the byte counters -- basically everything starts over
-                // except that we keep the last pending "prefix" (which, of course, was never sent)
-
-                memset (first_references, 0, total_codes * sizeof (first_references [0]));
-                memset (next_references, 0, (total_codes - 256) * sizeof (next_references [0]));
-                memset (terminators, 0, (total_codes - 256) * sizeof (terminators [0]));
-                input_bytes = output_bytes = 0;
-                next = FIRST_STRING;
-            }
-        }
-    }
-
-    // we're done with input, so if we've received anything we still need to send that pesky pending prefix...
-
-    if (prefix != NULL_CODE) {
-        WRITE_CODE (prefix, next);
-
-        if (++next == total_codes)  // watch for clearing to the first string to stay in step with the decoder!
-            next = FIRST_STRING;    // (this was actually a corner-case bug that did not trigger often)
-    }
-
-    WRITE_CODE (next, next);        // the maximum possible code is always reserved for our END_CODE
-
-    if (bits)                       // finally, flush any pending bits from the shifter
-        (*dst)(shifter);
-
-    free (terminators); free (next_references); free (first_references);
-    return 0;
-}
-
-/* LZW decompression function. Bytes (8-bit) are read and written through callbacks.
- * A return value of EOF from the "src" callback terminates the compression process 
- * (although this should not normally occur). A non-zero return value
- * indicates an error, which in this case can be  a
- * failed malloc(), or if an EOF is read from the input stream before the compression
- * terminates naturally with END_CODE.
- */
-
-int lzw_decompress (void (*dst)(int), int (*src)(void))
-{
-    int read_byte, next = FIRST_STRING, prefix = CLEAR_CODE, bits = 0, total_codes;
-    unsigned char *terminators, *reverse_buffer;
-    unsigned long shifter = 0;
-    short *prefixes;
-
-    // PDF specific change: maxbits is not in the input stream
-    // we'll just be pessimistic and allocate the maximal size buffer
-
-    total_codes = 4096;
-    reverse_buffer = malloc ((total_codes - 256) * sizeof (reverse_buffer [0]));
-    prefixes = malloc ((total_codes - 256) * sizeof (prefixes [0]));
-    terminators = malloc ((total_codes - 256) * sizeof (terminators [0]));
-
-    if (!reverse_buffer || !prefixes || !terminators)       // check for mallco() failure
-        return 1;
-
-    // This is the main loop where we read input symbols. The values range from 0 to the code value
-    // of the "next" string in the dictionary. Note that receiving an EOF from the input
-    // stream is actually an error because we should have gotten the END_CODE first.
-
-    while (1) {
-        int code_bits = next < 512 ? 9 : (next < 1024 ? 10 : (next < 2048 ? 11 : 12) ), code;
-
-        #define TOP_BITMASK  (((1 << code_bits) - 1) << (bits - code_bits) )
-        #define BOTTOM_BITMASK ((1 << (bits - code_bits)) - 1)
-
-        do {
-            if ((read_byte = ((*src)())) == EOF) {
-                free (terminators); free (prefixes); free (reverse_buffer);
-                return 1;
-            }
-
-            /* shifter reworked: everything shifted left by a byte,
-             * and the byte we just read becomes the least significant
-             * byte */
-
-            // prepare to shift in next byte
-            shifter <<= 8;
-            /* the bitstrings forming the symbols are stored MSB first,
-            *  so we can just OR in the next */
-            shifter |= (unsigned long) read_byte;
-        } while ((bits += 8) < code_bits);
-
-
-        /* for a 12-bit code, the shifter's bits now look like 
-         * from MSB to LSB: 00...0cccccccccn...n
-         * where c are the bits of our code
-         * and n are the bits we're not yet interested in
-         * the number of times n is repeated is bits - code_bits 
-         * ie. the number of bits read in minus the bits we're interested in */
-
-        // shift our code bits into thier proper place, and save it as the final code
-        code = (int) shifter >> (bits - code_bits);
-        /* we can now clear the shifter's top bits. the result looks like:
-         * 00...0n...n
-         * number of n is bits-code_bits
-         * */
-        shifter &= BOTTOM_BITMASK;
-        // update the count of bytes in the shifter
-        bits -= code_bits;
-
-        if (code == EOD_CODE)                   // In PDF, EOD is signalled by 257, rather than the max code
-            break;
-        else if (code == CLEAR_CODE)        // otherwise check for a CLEAR_CODE to start over early
-            next = FIRST_STRING;
-        else if (prefix == CLEAR_CODE) {    // this only happens at the first symbol which is always sent
-            (*dst)(code);                   // literally and becomes our initial prefix
-            next++;
-        }
-        // Otherwise we have a valid prefix so we step through the string from end to beginning storing the
-        // bytes in the "reverse_buffer", and then we send them out in the proper order. One corner-case
-        // we have to handle here is that the string might be the same one that is actually being defined
-        // now (code == next-1). Also, the first 256 entries of "terminators" and "prefixes" are fixed and
-        // not allocated, so that messes things up a bit.
-        else {
-            int cti = (code == next-1) ? prefix : code;
-            unsigned char *rbp = reverse_buffer, c;
-
-            do *rbp++ = cti < 256 ? cti : terminators [cti - 256];      // step backward through string...
-            while ((cti = (cti < 256) ? NULL_CODE : prefixes [cti - 256]) != NULL_CODE);
-
-            c = *--rbp;     // the first byte in this string is the terminator for the last string, which is
-                            // the one that we'll create a new dictionary entry for this time
-
-            do (*dst)(*rbp);                        // send string in corrected order (except for the terminator
-            while (rbp-- != reverse_buffer);        // which we don't know yet)
-
-            if (code == next-1)
-                (*dst)(c);
-
-            prefixes [next - 1 - 256] = prefix;     // now update the next dictionary entry with the new string
-            terminators [next - 1 - 256] = c;       // (but we're always one behind, so it's not the string just sent)
-
-            if (++next == total_codes)              // check for full dictionary, which forces a reset (and, BTW,
-                next = FIRST_STRING;                // means we'll never use the dictionary entry we just wrote)
-        }
-
-        prefix = code;      // the code we just received becomes the prefix for the next dictionary string entry
-                            // (which we'll create once we find out the terminator)
-    }
-
-    free (terminators); free (prefixes); free (reverse_buffer);
-    return 0;
-}
diff --git a/lzw-lib.h b/lzw-lib.h
deleted file mode 100644
index 81fdeb15e6ade7ef0fd6089a4fdc3d3f2d593578..0000000000000000000000000000000000000000
--- a/lzw-lib.h
+++ /dev/null
@@ -1,15 +0,0 @@
-////////////////////////////////////////////////////////////////////////////
-//                            **** LZW-AB ****                            //
-//               Adjusted Binary LZW Compressor/Decompressor              //
-//                     Copyright (c) 2016 David Bryant                    //
-//                           All Rights Reserved                          //
-//      Distributed under the BSD Software License (see license.txt)      //
-////////////////////////////////////////////////////////////////////////////
-
-#ifndef LZWLIB_H_
-#define LZWLIB_H_
-
-int lzw_compress (void (*dst)(int), int (*src)(void), int maxbits);
-int lzw_decompress (void (*dst)(int), int (*src)(void));
-
-#endif /* LZWLIB_H_ */
diff --git a/lzw.c b/lzw.c
new file mode 100644
index 0000000000000000000000000000000000000000..364e18a97fec77cb665323fead35475c9ecf7450
--- /dev/null
+++ b/lzw.c
@@ -0,0 +1,455 @@
+/* Table for storing sequences represented by an LZW code */
+#include <hammer/hammer.h>
+#include <hammer/glue.h>
+// malloc, free
+#include <stdlib.h>
+// strlen
+#include <string.h>
+
+#include "lzw.h"
+
+FILE *debug; // DEBUG
+
+#define BITLIMIT_9 (ctx->earlychange ? 511 : 512)
+#define BITLIMIT_10 (ctx->earlychange ? 1023 : 1024)
+#define BITLIMIT_11 (ctx->earlychange ? 2047 : 2048)
+/*
+ *  Since bit lengths larger than 12 aren't allowed, EarlyChange doesn't matter here.
+ */
+#define BITLIMIT_12 4096
+
+
+void LZW_clear_table(LZW_context_T *ctx)
+{
+	/*
+	 *  Optimizations: since we leave the entries 0-257 fixed or empty, we don't need to free() them explicitly.
+	 *  And since codes are added to the table sequentially, we don't need to look past ctx->next;
+	 */
+	for(int i = 258; i < ctx->next; ++i)
+	{
+		HBytes * sequence = ctx->lzw_code_table[i];
+		if(sequence != NULL)
+		{
+			/*
+			 * Assumption: only the HBytes in the LZW table refer to the particular uint8_t arrays we're freeing.
+			 */
+			free((uint8_t *)sequence->token);
+			free(sequence);
+		}
+		ctx->lzw_code_table[i] = NULL;
+	}
+}
+
+/*
+ * Creates a HBytes from an array of bytes and its length, and inserts it into the lzw dictionary in ctx.
+ * Also increments ctx->next. The HBytes will keep the token pointer, to be freed later in lzw_clear_table or init_lzw_context.
+ */
+void lzw_table_insert(LZW_context_T *ctx, uint8_t *token, size_t token_len)
+{
+	HBytes * next_entry = malloc(sizeof(HBytes));
+	next_entry->token = token;
+	next_entry->len = token_len;
+	ctx->lzw_code_table[ctx->next] = next_entry;
+	ctx->next++;
+}
+
+HParser *p_lzwdata;
+LZW_context_T * context;
+
+/*
+ * First "code" in input. We output it literally, and set "old"
+ */
+HParsedToken*
+act_LZW_firstcode(const HParseResult *p, void *u)
+{
+	/*HBytes * next_entry;
+	size_t next_entry_size;
+	uint8_t * next_entry_token;*/
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	uint64_t code = H_CAST_UINT(p->ast);
+	uint8_t *output = H_ALLOC(uint8_t);
+	*output = (uint8_t) code;
+	//fprintf(debug, "firstcode code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fprintf(debug, "%lu ", p->ast->uint); // DEBUG
+	//fflush(debug); // DEBUG
+
+	ctx->old = code;
+	return H_MAKE_BYTES(output, 1);
+}
+
+HParsedToken*
+act_LZW_clear(const HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	LZW_clear_table(ctx);
+	//fprintf(debug, "clear code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fprintf(debug, "%lu ", p->ast->uint); // DEBUG
+	//fflush(debug); // DEBUG
+	ctx->next = 258; // Caution: moving this before the call to LZW_clear_table() will cause a memory leak
+	return H_MAKE_BYTES(NULL, 0);
+}
+
+bool
+validate_LZW_9bitcodeword(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "9 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	if (ctx->next < BITLIMIT_9) // DEBUG
+		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
+	return (ctx->next < BITLIMIT_9);
+}
+
+bool
+validate_LZW_10bitcodeword(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "10 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	if (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10) // DEBUG
+		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
+	return (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10);
+}
+
+bool
+validate_LZW_11bitcodeword(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "11 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	if (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11) // DEBUG
+		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
+	return (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11);
+}
+
+bool
+validate_LZW_12bitcodeword(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "12 bit code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	if (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12) // DEBUG
+		assert(H_CAST_UINT(p->ast) <= ctx->next); // DEBUG
+	return (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12);
+}
+
+bool
+validate_LZW_9bitlitspec(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "9 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (ctx->next < BITLIMIT_9 && code < 258);
+}
+
+bool
+validate_LZW_10bitlitspec(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "10 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (ctx->next >= BITLIMIT_9 && ctx->next < BITLIMIT_10 && code < 258);
+}
+
+bool
+validate_LZW_11bitlitspec(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "11 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (ctx->next >= BITLIMIT_10 && ctx->next < BITLIMIT_11 && code < 258);
+}
+
+bool
+validate_LZW_12bitlitspec(HParseResult *p, void *u)
+{
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	//fprintf(debug, "12 bit lit: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
+	//fflush(debug); // DEBUG
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (ctx->next >= BITLIMIT_11 && ctx->next < BITLIMIT_12 && code < 258);
+}
+
+bool
+validate_LZW_clear(HParseResult *p, void *u)
+{
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (code == 256);
+}
+
+bool
+validate_LZW_eod(HParseResult *p, void *u)
+{
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (code == 257);
+}
+
+bool
+validate_LZW_literal(HParseResult *p, void *u)
+{
+	uint64_t code = H_CAST_UINT(p->ast);
+	return (code < 256);
+}
+
+HParsedToken*
+act_LZW_literal(const HParseResult *p, void *u)
+{
+	size_t next_entry_size;
+	uint8_t * next_entry_token;
+	HBytes * prev_string;
+	uint64_t code = H_CAST_UINT(p->ast);
+	LZW_context_T * ctx = (LZW_context_T *) u;
+	/*
+	 * Literals go from 0-255, so they are guaranteed to fit into 1 byte. See also: validate_LZW_literal
+	 */
+	uint8_t *output = H_ALLOC(uint8_t);
+	*output = (uint8_t) code;
+	//fprintf(debug, "lit: %lu, next: %u\n", code, ctx->next); // DEBUG
+	//fprintf(debug, "%lu ", code); // DEBUG
+	//fflush(debug); // DEBUG
+
+	/*
+	 * Update the dictionary with the new string. Use of system allocator
+	 * here and in act_LZW_codeword is intentional, as LZW_clear_table/init_LZW_context free these
+	 */
+	prev_string = ctx->lzw_code_table[ctx->old];
+	next_entry_size = prev_string->len + 1;
+	next_entry_token = calloc(next_entry_size, sizeof(uint8_t));
+	memcpy(next_entry_token, prev_string->token, prev_string->len);
+	next_entry_token[next_entry_size - 1] = (uint8_t) code;
+	lzw_table_insert(ctx, next_entry_token, next_entry_size);
+
+	ctx->old = code;
+	return H_MAKE_BYTES(output, 1);
+}
+
+HParsedToken*
+act_LZW_codeword(const HParseResult *p, void *u)
+{
+	HBytes * prev_string;
+	uint64_t code = H_CAST_UINT(p->ast);
+	uint8_t prefix;
+	uint8_t * output_token;
+	uint8_t * next_entry_token;
+	size_t prev_string_length;
+	LZW_context_T * ctx = (LZW_context_T *) u;
+
+	//fprintf(debug, "code: %lu, next: %u\n", code, ctx->next); // DEBUG
+	//fprintf(debug, "%lu ", code); // DEBUG
+	//fflush(debug); // DEBUG
+
+
+	if(ctx->lzw_code_table[code] != NULL) // code is in the table
+	{
+		HBytes * code_str;
+		size_t code_token_length;
+
+		/*
+		 * Retrieve the output from the dictionary.
+		 * This is what we'll wrap in a HBytes for returning
+		 */
+		code_str = ctx->lzw_code_table[code];
+		code_token_length = code_str->len;
+		output_token = calloc(code_token_length, sizeof(uint8_t));
+		memcpy(output_token, code_str->token, code_token_length);
+
+		prev_string = ctx->lzw_code_table[ctx->old];
+		prev_string_length = prev_string->len;
+
+		/*
+		 * Update the dictionary
+		 */
+		prefix = output_token[0];
+		next_entry_token = calloc(prev_string_length+1, sizeof(uint8_t));
+		memcpy(next_entry_token, prev_string->token, prev_string_length);
+		next_entry_token[prev_string_length] = prefix;
+		lzw_table_insert(ctx, next_entry_token, prev_string_length+1);
+		ctx->old = code;
+
+		return H_MAKE_BYTES(output_token, code_token_length);
+	}
+	else // code is not in the table
+	{
+		uint8_t new_prefix;
+		HBytes * missing_table_entry;
+		uint8_t * missing_table_entry_token;
+		size_t output_length;
+
+		prev_string = ctx->lzw_code_table[ctx->old];
+		prev_string_length = prev_string->len;
+		prefix = prev_string->token[0];
+
+		/*
+		 * Put together the string for the current code, then insert it into the table. We also copy the token into a separate uint8_t to be returned by the function
+		 */
+		output_length = prev_string_length + 1;
+		output_token = calloc(output_length, sizeof(uint8_t));
+		memcpy(output_token, prev_string->token, prev_string_length);
+		/*
+		 * Output is one byte longer than prev_string, and the last byte is the first character of the previous string
+		 */
+		output_token[prev_string_length] = prefix;
+
+
+		missing_table_entry = malloc(sizeof(HBytes));
+		missing_table_entry->len = prev_string_length + 1;
+		missing_table_entry_token = calloc(missing_table_entry->len, sizeof(uint8_t));
+		memcpy(missing_table_entry_token, output_token, missing_table_entry->len);
+		missing_table_entry->token = missing_table_entry_token;
+		ctx->lzw_code_table[code] = missing_table_entry;
+
+		/*
+		 * Update the dictionary
+		 */
+		new_prefix = output_token[0];
+		next_entry_token = calloc(prev_string_length+1, sizeof(uint8_t));
+		memcpy(next_entry_token, prev_string->token, prev_string_length);
+		next_entry_token[prev_string_length] = new_prefix;
+		lzw_table_insert(ctx, next_entry_token, prev_string_length+1);
+		ctx->old = code;
+
+		return H_MAKE_BYTES(output_token, output_length);
+	}
+}
+
+HParsedToken*
+act_LZW_body(const HParseResult *p, void *u)
+{
+	size_t index = 0;
+	size_t total_buffer_size = 0;
+	size_t num_fragments = h_seq_len(p->ast);
+	uint8_t * buffer;
+	
+	/* sum total bytes in array, alloc buffer */
+	for(int i = 0; i < num_fragments; i++)
+	{
+		total_buffer_size += H_FIELD_BYTES(i).len;
+	}
+
+	buffer = h_arena_malloc(p->arena, sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
+
+	/* go through parse result, merge bytes */
+	for(int i = 0; i < num_fragments; i++)
+	{
+		size_t len = H_FIELD_BYTES(i).len;
+		memcpy(&buffer[index], H_FIELD_BYTES(i).token, len);
+		index += len;
+	}
+
+	//fprintf(debug, "\n\n"); // DEBUG
+	//fwrite(buffer, 1, total_buffer_size, debug); // DEBUG
+	//fflush(debug); // DEBUG
+
+	return H_MAKE_BYTES(buffer, total_buffer_size);
+}
+
+
+HParsedToken*
+act_LZW_data(const HParseResult *p, void *u)
+{
+	/* The AST this semantic action receives is a sequence that looks something like this:
+		elements[0] -> TT_BYTES representing the initial clear code
+		elements[1] -> TT_BYTES representing the first code (should be a literal)
+		elements[2] -> TT_BYTES containing the decompressed data (except for the first code)
+		elements[3] -> TT_UINT representing the EOD code
+		elements[4] -> TT_UINT representing the remaining bits from EOD to the end of the byte, should be 0
+	*/
+
+	//HCountedArray * seq = H_CAST_SEQ(p->ast);
+	//LZW_context_T *ctx = (LZW_context_T*) u; // DEBUG
+	size_t total_buffer_size = 0;
+	uint8_t * buffer;
+	HBytes first = H_FIELD_BYTES(1);
+	HBytes rest = H_FIELD_BYTES(2);
+
+	total_buffer_size = first.len + rest.len;
+
+	buffer = calloc(total_buffer_size, sizeof(uint8_t));
+	memcpy(buffer, first.token, first.len);
+	memcpy(buffer+first.len, rest.token, rest.len);
+	// XXX: Memory use would be greatly decreased if first.token and rest.token could be freed here (allocated in act_LZW_firstcode and act_LZW_body)
+
+	//fprintf(debug, "\n\n"); // DEBUG
+	/*for(int i = 258; i < ctx->next; ++i) // DEBUG
+	{
+		fprintf(debug, "i: %u, str: ", i);
+		fwrite(ctx->lzw_code_table[i]->token, ctx->lzw_code_table[i]->len, 1, debug);
+		fprintf(debug, "\n");
+	}
+	fflush(debug); // DEBUG */
+
+	return H_MAKE_BYTES(buffer, total_buffer_size);
+}
+
+
+void init_LZW_parser()
+{
+	context = malloc(sizeof(LZW_context_T));
+	memset(context, 0, sizeof(*context));
+	context->next = 258;
+	/* set up literals in LZW code table */
+	for(int i = 0; i < 256; i++)
+	{
+		uint8_t *token = malloc(sizeof(uint8_t));
+		*token = i;
+		HBytes *lit = malloc(sizeof(HBytes));
+		lit->token = token;
+		lit->len = 1;
+		context->lzw_code_table[i] = lit;
+	}
+	context->earlychange = 1;
+	context->old = 257;
+
+	H_VDRULE(LZW_9bitcodeword, h_bits(9, false), context);
+	H_VDRULE(LZW_10bitcodeword, h_bits(10, false), context);
+	H_VDRULE(LZW_11bitcodeword, h_bits(11, false), context);
+	H_VDRULE(LZW_12bitcodeword, h_bits(12, false), context);
+
+	H_VDRULE(LZW_9bitlitspec, h_bits(9, false), context);
+	H_VDRULE(LZW_10bitlitspec, h_bits(10, false), context);
+	H_VDRULE(LZW_11bitlitspec, h_bits(11, false), context);
+	H_VDRULE(LZW_12bitlitspec, h_bits(12, false), context);
+
+	H_RULE(LZW_remainingbits, h_many(h_bits(1, false))); //XXX: could validate that these bits are 0?
+
+	H_ADRULE(LZW_firstcode, LZW_9bitlitspec, context); // First code is always a literal, sets ctx->old
+
+	H_AVDRULE(LZW_clear, h_choice(LZW_9bitlitspec, LZW_10bitlitspec, LZW_11bitlitspec, LZW_12bitlitspec, NULL), context);
+	H_VDRULE(LZW_eod, h_choice(LZW_9bitlitspec, LZW_10bitlitspec, LZW_11bitlitspec, LZW_12bitlitspec, NULL), context);
+	H_AVDRULE(LZW_literal, h_choice(LZW_9bitlitspec, LZW_10bitlitspec, LZW_11bitlitspec, LZW_12bitlitspec, NULL), context);
+	H_ADRULE(LZW_codeword, h_choice(LZW_9bitcodeword, LZW_10bitcodeword, LZW_11bitcodeword, LZW_12bitcodeword, NULL), context);
+
+	H_ADRULE(LZW_body, h_many1(h_butnot(h_choice(LZW_literal, h_ignore(LZW_clear), LZW_codeword, NULL), LZW_eod)), context);
+
+	H_ADRULE(LZW_data, h_sequence(LZW_clear, LZW_firstcode, LZW_body, LZW_eod, LZW_remainingbits, NULL), context);
+	p_lzwdata = LZW_data;
+}
+
+
+HParseResult* parse_LZW_data(const uint8_t* input, size_t length)
+{
+	//debug = fopen("lzw_debug.txt", "a"); // DEBUG
+	HParseResult *res = h_parse(p_lzwdata, input, length);
+	//fclose(debug); // DEBUG
+	return res;
+}
+
+
+void init_LZW_context(int earlychange)
+{
+	for(int i = 258; i < 4096; ++i)
+	{
+		if(context->lzw_code_table[i] != NULL)
+		{
+			free((uint8_t *) context->lzw_code_table[i]->token); // These can be freed without issue, because HParsedTokens containing them have separate deep copies
+			free(context->lzw_code_table[i]);
+		}
+		context->lzw_code_table[i] = NULL;
+	}
+	context->next = 258;
+	context->old = 257;
+	context->earlychange = earlychange;
+}
diff --git a/lzw.h b/lzw.h
new file mode 100644
index 0000000000000000000000000000000000000000..183ee301cbdf38fca9bbb6bfc66d5adff28cda39
--- /dev/null
+++ b/lzw.h
@@ -0,0 +1,35 @@
+#ifndef PDF_LZW_H
+#define PDF_LZW_H
+
+#include <hammer/hammer.h>
+
+
+typedef struct LZW_context_S
+{
+	/*
+	 * Table for storing sequences represented by an LZW code
+	 * 0-255, and 256 are special, representing literals, and the reset code. We could explicitly pre-fill them, but it's probably not necessary.
+	 */
+	HBytes * lzw_code_table[4096];
+
+	/*
+	 * Holds the next expected LZW code. We also use this for telling LZW_9bitcodeword, LZW_10bitcodeword, etc. apart. Parses fail if "next" is larger than what can be represented on that many bits.
+	 */
+	int next;
+
+	/*
+	 * Previous LZW code, used to construct the next string added to the table.
+	 */
+	uint64_t old;
+
+	/*
+	 * EarlyChange = 1 means the bit size is increased "one code early" (Early change = 0 is "code length increases shall be postponed as long as possible"
+	 */
+	int earlychange;
+} LZW_context_T;
+
+void init_LZW_parser();
+HParseResult * parse_LZW_data(const uint8_t* input, size_t length);
+void init_LZW_context(int earlychange);
+
+#endif // PDF_LZW_H
diff --git a/pdf.c b/pdf.c
index 78f284d1e65765df0ff023b57d03751bf1a39321..3268e0775b4e5044902bf3718d91cf5b8912934b 100644
--- a/pdf.c
+++ b/pdf.c
@@ -3197,133 +3197,7 @@ FlateDecode(const Dict *parms, HBytes b, HParser *p)
 #endif
 
 
-
-
-/* LZW helpers */
-
-typedef struct
-{
-	uint8_t *lzw_buf;
-	size_t total_buf_size;
-	size_t write_head;
-	size_t write_tail;
-	uint8_t write_checksum;
-	size_t eof_loc;
-
-	HBytes *input_stream;
-	size_t read_head;
-	size_t read_tail;
-	uint8_t read_checksum;
-} lzwspec;
-
-lzwspec *cur_lzw_spec;
-
-/* used by write_lzw_buffer to get more space for decoding if needed */
-void
-grow_lzw_buffer(size_t amount)
-{
-	uint8_t *ret_buf = realloc(cur_lzw_spec->lzw_buf, (cur_lzw_spec->total_buf_size+amount) * sizeof(uint8_t));
-	if(ret_buf != NULL)
-	{
-		cur_lzw_spec->total_buf_size += amount;
-		cur_lzw_spec->lzw_buf = ret_buf;
-	}
-	else
-	{
-		fprintf(stderr, "LZWDecode: h_arena_realloc() failed");
-		return;
-	}
-}
-
-lzwspec *
-new_lzw_spec(HBytes *bytes)
-{
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-	lzwspec *ret = malloc(sizeof(lzwspec));
-	memset(ret, 0, sizeof(lzwspec));
-	ret->input_stream = bytes;
-	ret->lzw_buf = malloc(BUFSIZE);
-	ret->total_buf_size = BUFSIZE;
-	return ret;
-}
-
-void
-delete_lzw_spec(lzwspec *spec)
-{
-	free(spec->lzw_buf);
-	free(spec);
-}
-
-void
-bind_lzw_spec(lzwspec *spec)
-{
-	cur_lzw_spec = spec;
-}
-
-
-#include "lzw-lib.h"
-
-/* Buffer writer function for the lzw-ab implementation, with a fixed signature.
- * Although the type is defined as int, it is expected to write one byte at a time.
- * Modifies cur_lzw_spec. Set up the lzw spec to use with bind_lzw_spec() */
-
-void
-write_lzw_buffer(int value)
-{
-	size_t const BUFSIZE = sizeof(uint8_t) * 1024;
-
-	if(!cur_lzw_spec->lzw_buf)
-	{
-		fprintf(stderr, "LZWDecode: lzw_buf is null!");
-		assert(cur_lzw_spec->lzw_buf != NULL);
-	}
-
-	assert(cur_lzw_spec->write_head <= cur_lzw_spec->total_buf_size);
-
-	if (value == EOF) {
-        cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head] = (uint8_t) value;
-        cur_lzw_spec->eof_loc = cur_lzw_spec->write_head;
-        cur_lzw_spec->write_head++;
-        return;
-    }
-
-	/* We can get away with this cast due to writing single bytes. */
-    cur_lzw_spec->lzw_buf[cur_lzw_spec->write_head++] = (uint8_t) value;
-
-	/* If you looked at lzw-ab's code, the write head is reset here
-	 * This function uses write_head as the offset of the last written item */
-    if (cur_lzw_spec->write_head >= cur_lzw_spec->total_buf_size)
-    {
-        grow_lzw_buffer(BUFSIZE);
-    }
-
-    cur_lzw_spec->write_checksum = cur_lzw_spec->write_checksum * 3 + (uint8_t) value;
-}
-
-
-/* Fixed signature function for reading bytes. Modifies cur_lzw_spec. Set cur_lzw_spec
- * with bind_lzw_spec() */
-int read_lzw_buffer(void)
-{
-	uint8_t byte_read;
-	int ret_value;
-
-	/* Input data is already waiting in the buffer */
-    if (cur_lzw_spec->read_head == cur_lzw_spec->read_tail)
-        cur_lzw_spec->read_tail = cur_lzw_spec->input_stream->len;
-
-    if (cur_lzw_spec->read_head < cur_lzw_spec->read_tail)
-    {
-        byte_read = cur_lzw_spec->input_stream->token[cur_lzw_spec->read_head++];
-        cur_lzw_spec->read_checksum = cur_lzw_spec->read_checksum * 3 + byte_read;
-        ret_value = byte_read;
-    }
-    else
-        ret_value = EOF;
-
-    return ret_value;
-}
-
+#include "lzw.h"
 
 HParseResult *
 LZWDecode(const Dict *parms, HBytes b, HParser *p)
@@ -3331,9 +3205,11 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 	struct predictor pred = {1, 1, 8, 1};
 	int (*depredict)(struct predictor *, uint8_t *, size_t);
 	HParseResult *res;
+	HParseResult *tmp_res;
 	int done;
-	int ret;
+	//int ret;
 	const HParsedToken *v;
+	int earlychange;
 
 	/* set up the predictor (if any) */
 	#define SETPARM(VAR,STR) do {					\
@@ -3382,23 +3258,40 @@ LZWDecode(const Dict *parms, HBytes b, HParser *p)
 			err(1, "LZWDecode");
 	}
 
-	lzwspec *lzw_spec = new_lzw_spec(&b);
-	bind_lzw_spec(lzw_spec);
+	v = dictentry(parms, "EarlyChange");
+	if(v != NULL && v->token_type == TT_SINT && v->sint == 0)
+	{
+		earlychange = 0;
+	}
+	else
+	{
+		earlychange = 1;
+	}
+
+	init_LZW_context(earlychange);
+	tmp_res = parse_LZW_data(b.token, b.len);
 
-	ret = lzw_decompress(write_lzw_buffer, read_lzw_buffer);
-	if (ret) {
-		fprintf(stderr, "lzw_decompress: error (%d)\n", ret);
-		assert(!"LZWDecode: failed to decompress\n");
+	if(!tmp_res)
+	{
+		fprintf(stderr, "parse error in LZWDecode filter");
+		return NULL;
 	}
-	done = depredict(&pred, cur_lzw_spec->lzw_buf, cur_lzw_spec->write_head-1);
-	assert(!done);	// XXX ITERATIVE
+
+	assert(tmp_res->ast->token_type == TT_BYTES);
+
+	uint8_t * tmp_buf = malloc(sizeof(uint8_t) * tmp_res->ast->bytes.len);
+	memcpy(tmp_buf, tmp_res->ast->bytes.token, tmp_res->ast->bytes.len);
+	done = depredict(&pred, tmp_buf, tmp_res->ast->bytes.len);
+	assert(!done);
+
+	//done = depredict(&pred, res->ast->bytes.token, res->ast->bytes.len);
+	//assert(!done);
 
 	// SR::TODO:: Do a H_MAKE rather than a parse and let the caller do the parse
-	res = h_parse(p, pred.out, pred.nout);
+	res = h_parse(p, pred.out, pred.nout); // XXX: should kstream try to decode streams with no Type?
+	//res = h_parse(p, tmp_res->ast->bytes.token, tmp_res->ast->bytes.len); // XXX depred buffer
 	free(pred.out);
-
-	bind_lzw_spec(NULL);
-	delete_lzw_spec(lzw_spec);
+	free(tmp_buf);
 
 	return res;
 }
@@ -5622,6 +5515,7 @@ main(int argc, char *argv[])
 	/* build parsers */
 	aux = (struct Env){infile, input, sz};
 	init_parser(&aux);
+	init_LZW_parser();
 
 
 	/* parse all cross-reference sections and trailer dictionaries */