diff --git a/lzw.c b/lzw.c
index cf6a221a0365f4ffbe8a0d56ac46a36d76ded1e0..995a9856c44a68d9abcc3fbb6bf8651e39597d8e 100644
--- a/lzw.c
+++ b/lzw.c
@@ -81,8 +81,9 @@ lzw_table_extend(LZW_context_T *ctx, int code)
 /*
  * Assemble the output sequence represented by the given code word.
  * The given buffer must have the appropriate size.
+ * Returns the number of bytes written.
  */
-static void
+static size_t
 lzw_code_string(LZW_context_T *ctx, int code, uint8_t *buf)
 {
 	size_t i, n;
@@ -94,6 +95,8 @@ lzw_code_string(LZW_context_T *ctx, int code, uint8_t *buf)
 		code = ctx->table[code].prefix;
 	}
 	assert(code == -1);	/* reached the end */
+
+	return n;
 }
 
 HParser *p_lzwdata;
@@ -181,8 +184,6 @@ static HParsedToken*
 act_output(const HParseResult *p, void *u)
 {
 	uint64_t code = H_CAST_UINT(p->ast);
-	uint8_t * output_token;
-	size_t output_length;
 	LZW_context_T * ctx = (LZW_context_T *) u;
 
 	//fprintf(debug, "code: %lu, next: %u\n", code, ctx->next); // DEBUG
@@ -209,68 +210,80 @@ act_output(const HParseResult *p, void *u)
 	lzw_table_extend(ctx, code);
 
 	/*
-	 * Assemble and return the output string.
+	 * Just return the code again.
+	 * We will assemble the output in act_lzwblock() below.
 	 */
-	output_length = ctx->table[code].len;
-	output_token = h_arena_malloc(p->arena, output_length);
-	lzw_code_string(ctx, code, output_token);
-	return H_MAKE_BYTES(output_token, output_length);
+	return (HParsedToken *)p->ast;	// XXX casting away the const OK?
 }
 
-static HParsedToken*
-act_lzwbody(const HParseResult *p, void *u)
+/*
+ * Assemble the string represented by a block of code words under a given
+ * table. The incoming HParsedToken is a sequence of code words (TT_UINT).
+ */
+static HParsedToken *
+act_lzwblock(const HParseResult *p, void *u)
 {
-	size_t index = 0;
-	size_t total_buffer_size = 0;
-	size_t num_fragments = h_seq_len(p->ast);
-	uint8_t * buffer;
-
-	/* sum total bytes in array, alloc buffer */
-	for(int i = 0; i < num_fragments; i++)
-	{
-		total_buffer_size += H_FIELD_BYTES(i).len;
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+	LZW_context_T *ctx = u;
+	uint8_t *buf, *cur;
+	size_t sz, i;
+	int code;
+
+	/* determine total output size, alloc buffer */
+	sz = 0;
+	for (i = 0; i < seq->used; i++) {
+		code = (int) H_CAST_UINT(seq->elements[i]);
+		sz += ctx->table[code].len;
 	}
+	buf = h_arena_malloc(p->arena, sz);
 
-	buffer = h_arena_malloc(p->arena, sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
-
-	/* go through parse result, merge bytes */
-	for(int i = 0; i < num_fragments; i++)
-	{
-		size_t len = H_FIELD_BYTES(i).len;
-		memcpy(&buffer[index], H_FIELD_BYTES(i).token, len);
-		index += len;
+	/* go through sequence, merge output bytes into buf */
+	cur = buf;
+	for (i = 0; i < seq->used; i++) {
+		code = (int) H_CAST_UINT(seq->elements[i]);
+		cur += lzw_code_string(ctx, code, cur);
 	}
+	assert(cur == buf + sz);
 
 	//fprintf(debug, "\n\n"); // DEBUG
-	//fwrite(buffer, 1, total_buffer_size, debug); // DEBUG
+	//fwrite(buf, 1, sz, debug); // DEBUG
 	//fflush(debug); // DEBUG
 
-	return H_MAKE_BYTES(buffer, total_buffer_size);
+	return H_MAKE_BYTES(buf, sz);
 }
 
-
-static HParsedToken*
+/*
+ * Concatenate blocks to form the final output string.
+ * The incoming HParsedToken is a sequence of HBytes.
+ */
+static HParsedToken *
 act_lzwdata(const HParseResult *p, void *u)
 {
-	/* The AST this semantic action receives is a sequence that looks something like this:
-		elements[0] -> TT_BYTES representing the initial clear code
-		elements[1] -> TT_BYTES containing the decompressed data
-		elements[2] -> TT_UINT representing the EOD code
-	*/
-
-	//HCountedArray * seq = H_CAST_SEQ(p->ast);
-	//LZW_context_T *ctx = (LZW_context_T*) u; // DEBUG
-
-	//fprintf(debug, "\n\n"); // DEBUG
-	/*for(int i = 258; i < ctx->next; ++i) // DEBUG
-	{
-		fprintf(debug, "i: %u, str: ", i);
-		fwrite(ctx->lzw_code_table[i].token, ctx->lzw_code_table[i].len, 1, debug);
-		fprintf(debug, "\n");
+	HCountedArray *seq = H_CAST_SEQ(p->ast);
+	HBytes bs;
+	uint8_t *buf, *cur;
+	size_t sz, i;
+
+	/* fast path: single element? nothing to do */
+	if (seq->used == 1)
+		return seq->elements[0];
+
+	/* determine total output size, alloc buffer */
+	sz = 0;
+	for (i = 0; i < seq->used; i++)
+		sz += H_CAST_BYTES(seq->elements[i]).len;
+	buf = h_arena_malloc(p->arena, sz);
+
+	/* go through sequence, copying bytes into buf */
+	cur = buf;
+	for (i = 0; i < seq->used; i++) {
+		bs = H_CAST_BYTES(seq->elements[i]);
+		memcpy(cur, bs.token, bs.len);
+		cur += bs.len;
 	}
-	fflush(debug); // DEBUG */
+	assert(cur == buf + sz);
 
-	return H_FIELD_TOKEN(1);
+	return H_MAKE_BYTES(buf, sz);
 }
 
 
@@ -299,8 +312,8 @@ void init_LZW_parser()
 	H_VDRULE (eod,		codeword, context);
 	H_AVDRULE(output,	codeword, context);
 
-	H_ARULE(lzwbody,	h_many(h_choice(clear, output, NULL)));
-	H_ARULE(lzwdata,	h_sequence(clear, lzwbody, eod, NULL));
+	H_ADRULE(lzwblock,	h_right(clear, h_many(output)), context);
+	H_ARULE (lzwdata,	h_left(h_many1(lzwblock), eod));
 	    // XXX validate that the last byte is zero-padded?
 	    // XXX require h_end_p()?