diff --git a/lzw.c b/lzw.c index cf6a221a0365f4ffbe8a0d56ac46a36d76ded1e0..995a9856c44a68d9abcc3fbb6bf8651e39597d8e 100644 --- a/lzw.c +++ b/lzw.c @@ -81,8 +81,9 @@ lzw_table_extend(LZW_context_T *ctx, int code) /* * Assemble the output sequence represented by the given code word. * The given buffer must have the appropriate size. + * Returns the number of bytes written. */ -static void +static size_t lzw_code_string(LZW_context_T *ctx, int code, uint8_t *buf) { size_t i, n; @@ -94,6 +95,8 @@ lzw_code_string(LZW_context_T *ctx, int code, uint8_t *buf) code = ctx->table[code].prefix; } assert(code == -1); /* reached the end */ + + return n; } HParser *p_lzwdata; @@ -181,8 +184,6 @@ static HParsedToken* act_output(const HParseResult *p, void *u) { uint64_t code = H_CAST_UINT(p->ast); - uint8_t * output_token; - size_t output_length; LZW_context_T * ctx = (LZW_context_T *) u; //fprintf(debug, "code: %lu, next: %u\n", code, ctx->next); // DEBUG @@ -209,68 +210,80 @@ act_output(const HParseResult *p, void *u) lzw_table_extend(ctx, code); /* - * Assemble and return the output string. + * Just return the code again. + * We will assemble the output in act_lzwblock() below. */ - output_length = ctx->table[code].len; - output_token = h_arena_malloc(p->arena, output_length); - lzw_code_string(ctx, code, output_token); - return H_MAKE_BYTES(output_token, output_length); + return (HParsedToken *)p->ast; // XXX casting away the const OK? } -static HParsedToken* -act_lzwbody(const HParseResult *p, void *u) +/* + * Assemble the string represented by a block of code words under a given + * table. The incoming HParsedToken is a sequence of code words (TT_UINT). + */ +static HParsedToken * +act_lzwblock(const HParseResult *p, void *u) { - size_t index = 0; - size_t total_buffer_size = 0; - size_t num_fragments = h_seq_len(p->ast); - uint8_t * buffer; - - /* sum total bytes in array, alloc buffer */ - for(int i = 0; i < num_fragments; i++) - { - total_buffer_size += H_FIELD_BYTES(i).len; + HCountedArray *seq = H_CAST_SEQ(p->ast); + LZW_context_T *ctx = u; + uint8_t *buf, *cur; + size_t sz, i; + int code; + + /* determine total output size, alloc buffer */ + sz = 0; + for (i = 0; i < seq->used; i++) { + code = (int) H_CAST_UINT(seq->elements[i]); + sz += ctx->table[code].len; } + buf = h_arena_malloc(p->arena, sz); - buffer = h_arena_malloc(p->arena, sizeof(uint8_t) * total_buffer_size); // XXX arena alloc, calloc - - /* go through parse result, merge bytes */ - for(int i = 0; i < num_fragments; i++) - { - size_t len = H_FIELD_BYTES(i).len; - memcpy(&buffer[index], H_FIELD_BYTES(i).token, len); - index += len; + /* go through sequence, merge output bytes into buf */ + cur = buf; + for (i = 0; i < seq->used; i++) { + code = (int) H_CAST_UINT(seq->elements[i]); + cur += lzw_code_string(ctx, code, cur); } + assert(cur == buf + sz); //fprintf(debug, "\n\n"); // DEBUG - //fwrite(buffer, 1, total_buffer_size, debug); // DEBUG + //fwrite(buf, 1, sz, debug); // DEBUG //fflush(debug); // DEBUG - return H_MAKE_BYTES(buffer, total_buffer_size); + return H_MAKE_BYTES(buf, sz); } - -static HParsedToken* +/* + * Concatenate blocks to form the final output string. + * The incoming HParsedToken is a sequence of HBytes. + */ +static HParsedToken * act_lzwdata(const HParseResult *p, void *u) { - /* The AST this semantic action receives is a sequence that looks something like this: - elements[0] -> TT_BYTES representing the initial clear code - elements[1] -> TT_BYTES containing the decompressed data - elements[2] -> TT_UINT representing the EOD code - */ - - //HCountedArray * seq = H_CAST_SEQ(p->ast); - //LZW_context_T *ctx = (LZW_context_T*) u; // DEBUG - - //fprintf(debug, "\n\n"); // DEBUG - /*for(int i = 258; i < ctx->next; ++i) // DEBUG - { - fprintf(debug, "i: %u, str: ", i); - fwrite(ctx->lzw_code_table[i].token, ctx->lzw_code_table[i].len, 1, debug); - fprintf(debug, "\n"); + HCountedArray *seq = H_CAST_SEQ(p->ast); + HBytes bs; + uint8_t *buf, *cur; + size_t sz, i; + + /* fast path: single element? nothing to do */ + if (seq->used == 1) + return seq->elements[0]; + + /* determine total output size, alloc buffer */ + sz = 0; + for (i = 0; i < seq->used; i++) + sz += H_CAST_BYTES(seq->elements[i]).len; + buf = h_arena_malloc(p->arena, sz); + + /* go through sequence, copying bytes into buf */ + cur = buf; + for (i = 0; i < seq->used; i++) { + bs = H_CAST_BYTES(seq->elements[i]); + memcpy(cur, bs.token, bs.len); + cur += bs.len; } - fflush(debug); // DEBUG */ + assert(cur == buf + sz); - return H_FIELD_TOKEN(1); + return H_MAKE_BYTES(buf, sz); } @@ -299,8 +312,8 @@ void init_LZW_parser() H_VDRULE (eod, codeword, context); H_AVDRULE(output, codeword, context); - H_ARULE(lzwbody, h_many(h_choice(clear, output, NULL))); - H_ARULE(lzwdata, h_sequence(clear, lzwbody, eod, NULL)); + H_ADRULE(lzwblock, h_right(clear, h_many(output)), context); + H_ARULE (lzwdata, h_left(h_many1(lzwblock), eod)); // XXX validate that the last byte is zero-padded? // XXX require h_end_p()?