From be7dc08f416a652dc6fcfa43a44329a8ec3a6bfb Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Sat, 23 Oct 2021 20:01:37 +0200
Subject: [PATCH] (WIP) Rework act_LZW_data

---
 lzw.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

diff --git a/lzw.c b/lzw.c
index bc0df2b..ebe3ea0 100644
--- a/lzw.c
+++ b/lzw.c
@@ -341,13 +341,9 @@ act_LZW_codeword(const HParseResult *p, void *u)
 	}
 }
 
-
 HParsedToken*
-act_LZW_data(const HParseResult *p, void *u)
+act_LZW_body(const HParseResult *p, void *u)
 {
-	// XXX: There's probably a better way to merge a lot of HBytes into one large HBytes
-	// XXX: does it need flattening beforehand?
-	//HCountedArray * seq = H_CAST_SEQ(p->ast);
 	size_t index = 0;
 	size_t total_buffer_size = 0;
 	size_t num_fragments = h_seq_len(p->ast);
@@ -356,7 +352,7 @@ act_LZW_data(const HParseResult *p, void *u)
 	/* sum total bytes in array, alloc buffer */
 	for(int i = 0; i < num_fragments; i++)
 	{
-		total_buffer_size += H_FIELD_BYTES(i).len;
+		total_buffer_size += H_FIELD_BYTES(i).len; // XXX can seq->elements[i] be NULL due to h_ignore?
 	}
 
 	buffer = malloc(sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
@@ -373,6 +369,33 @@ act_LZW_data(const HParseResult *p, void *u)
 }
 
 
+HParsedToken*
+act_LZW_data(const HParseResult *p, void *u)
+{
+	/* The AST this semantic action receives is a sequence that looks something like this:
+		elements[0] -> TT_BYTES representing the initial clear code
+		elements[1] -> TT_BYTES representing the first code (should be a literal)
+		elements[2] -> TT_BYTES containing the decompressed data (except for the first code)
+		elements[3] -> TT_UINT representing the EOD code
+		elements[4] -> TT_UINT representing the remaining bits from EOD to the end of the byte, should be 0
+	*/
+
+	//HCountedArray * seq = H_CAST_SEQ(p->ast);
+	size_t total_buffer_size = 0;
+	uint8_t * buffer;
+	HBytes first = H_FIELD_BYTES(1);
+	HBytes rest = H_FIELD_BYTES(2);
+
+	total_buffer_size = first.len + rest.len;
+
+	buffer = malloc(sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
+	memcpy(buffer, first.token, first.len);
+	memcpy(buffer+first.len, rest.token, rest.len);
+
+	return H_MAKE_BYTES(buffer, total_buffer_size);
+}
+
+
 void init_LZW_parser()
 {
 	context = malloc(sizeof(LZW_context_T));
@@ -409,7 +432,9 @@ void init_LZW_parser()
 	H_AVDRULE(LZW_literal, h_choice(LZW_9bitlitspec, LZW_10bitlitspec, LZW_11bitlitspec, LZW_12bitlitspec, NULL), context);
 	H_ADRULE(LZW_codeword, h_choice(LZW_9bitcodeword, LZW_10bitcodeword, LZW_11bitcodeword, LZW_12bitcodeword, NULL), context);
 
-	H_ADRULE(LZW_data, h_sequence(LZW_clear, LZW_firstcode, h_many1(h_butnot(h_choice(LZW_literal, LZW_clear, LZW_codeword, NULL), LZW_eod)), LZW_eod, LZW_remainingbits, NULL), context);
+	H_ADRULE(LZW_body, h_many1(h_butnot(h_choice(LZW_literal, h_ignore(LZW_clear), LZW_codeword, NULL), LZW_eod)), context);
+
+	H_ADRULE(LZW_data, h_sequence(LZW_clear, LZW_firstcode, LZW_body, LZW_eod, LZW_remainingbits, NULL), context);
 	p_lzwdata = LZW_data;
 }
 
-- 
GitLab