From 58caea11bf4cabe76499cfa218461a1cf0cbe6ba Mon Sep 17 00:00:00 2001
From: Pompolic <pompolic@special-circumstanc.es>
Date: Fri, 29 Oct 2021 19:22:31 +0200
Subject: [PATCH] Use arena alloc for most variables in semantic actions

---
 lzw.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lzw.c b/lzw.c
index cfda29b..eaa543c 100644
--- a/lzw.c
+++ b/lzw.c
@@ -54,7 +54,7 @@ act_LZW_firstcode(const HParseResult *p, void *u)
 	uint8_t * next_entry_token;*/
 	LZW_context_T * ctx = (LZW_context_T *) u;
 	uint64_t code = H_CAST_UINT(p->ast);
-	uint8_t *output = malloc(sizeof(uint8_t));
+	uint8_t *output = H_ALLOC(uint8_t);
 	*output = (uint8_t) code;
 	//fprintf(debug, "firstcode code: %lu, next: %u\n", p->ast->uint, ctx->next); // DEBUG
 	//fprintf(debug, "%lu ", p->ast->uint); // DEBUG
@@ -193,12 +193,16 @@ act_LZW_literal(const HParseResult *p, void *u)
 	/*
 	 * Literals go from 0-255, so they are guaranteed to fit into 1 byte. See also: validate_LZW_literal
 	 */
-	uint8_t *output = malloc(sizeof(uint8_t));
+	uint8_t *output = H_ALLOC(uint8_t);
 	*output = (uint8_t) code;
 	//fprintf(debug, "lit: %lu, next: %u\n", code, ctx->next); // DEBUG
 	//fprintf(debug, "%lu ", code); // DEBUG
 	//fflush(debug); // DEBUG
 
+	/*
+	 * Update the dictionary with the new string. Use of system malloc()
+	 * here and in act_LZW_codeword is intentional, as LZW_clear_table/init_LZW_context free these
+	 */
 	prev_string = ctx->lzw_code_table[ctx->old]; //XXX: insert_table(ctx, uint8_t*) function
 	next_entry_size = prev_string->len + 1;
 	next_entry_token = malloc(sizeof(uint8_t) * next_entry_size);
@@ -329,7 +333,7 @@ act_LZW_body(const HParseResult *p, void *u)
 		total_buffer_size += H_FIELD_BYTES(i).len; // XXX can seq->elements[i] be NULL due to h_ignore?
 	}
 
-	buffer = malloc(sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
+	buffer = h_arena_malloc(p->arena, sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
 
 	/* go through parse result, merge bytes */
 	for(int i = 0; i < num_fragments; i++)
@@ -370,6 +374,7 @@ act_LZW_data(const HParseResult *p, void *u)
 	buffer = malloc(sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc
 	memcpy(buffer, first.token, first.len);
 	memcpy(buffer+first.len, rest.token, rest.len);
+	// XXX: Memory use would be greatly decreased if first.token and rest.token could be freed here (allocated in act_LZW_firstcode and act_LZW_body)
 
 	//fprintf(debug, "\n\n"); // DEBUG
 	/*for(int i = 258; i < ctx->next; ++i) // DEBUG
@@ -443,7 +448,7 @@ void init_LZW_context(int earlychange)
 	{
 		if(context->lzw_code_table[i] != NULL)
 		{
-			free((uint8_t *) context->lzw_code_table[i]->token); //XXX: assumption: we copy strings into hbytes instead of pointing to them directly
+			free((uint8_t *) context->lzw_code_table[i]->token); // These can be freed without issue, because HParsedTokens containing them have separate deep copies
 			free(context->lzw_code_table[i]);
 		}
 		context->lzw_code_table[i] = NULL;
-- 
GitLab