diff --git a/pdf.c b/pdf.c index 04bb83cad66ec455d844da6e74db28d9238f62a2..800aa0d31fe56ae0471110a98ff797407052e66b 100644 --- a/pdf.c +++ b/pdf.c @@ -4,7 +4,7 @@ * Paul Vines 2020 */ -#include <string.h> /* strncmp(), memset() */ +#include <string.h> /* strncmp(), memset(), memcpy() */ #include <stdlib.h> /* exit() */ #include <hammer/hammer.h> @@ -250,6 +250,31 @@ act_hupper(const HParseResult *p, void *u) return H_MAKE_UINT(H_CAST_UINT(p->ast) - 'A'); } +HParsedToken* +act_hdigitpair(const HParseResult *p, void *u) +{ + uint8_t b = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + size_t digits_processed = 0; + uint8_t digits[2]; + for(size_t i = 0; i < seq->used; ++i) + { + switch(seq->elements[i]->token_type) + { + case TT_UINT: + digits[digits_processed] = H_CAST_UINT(seq->elements[i]); + digits_processed++; + break; + default: + break; + } + assert(digits_processed == 2); + } + + b = (digits[0] << 4) + digits[1]; + return H_MAKE_UINT(b); +} + HParsedToken * act_ahextruncated(const HParseResult *p, void *u) { @@ -265,6 +290,43 @@ act_ahextruncated(const HParseResult *p, void *u) return H_MAKE_UINT(b); } +HParsedToken * +act_hs_end(const HParseResult *p, void *u) +{ + HParsedToken *res; + HCountedArray *seq = H_CAST_SEQ(p->ast); + assert(seq->used >= 1); + + res = H_MAKE_UINT(H_CAST_UINT(seq->elements[0])); + return res; +} + +HParsedToken * +act_ahexstream(const HParseResult *p, void *u) +{ + uint8_t *result_bytes; + size_t required_bytes; + HCountedArray *seq = H_CAST_SEQ(p->ast); + HParsedToken *res; + + /* Ignore the the last element, which is EOD */ + required_bytes = (seq->used - 1); + + result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * required_bytes); + + /* memcpy all but the last group's bytes into a single array */ + for (size_t i = 0; i < seq->used-1; ++i) + { + assert(i < required_bytes); + result_bytes[i] = H_CAST_UINT(seq->elements[i]); + } + + + res = H_MAKE_BYTES(result_bytes, required_bytes); + return res; +} + + HParsedToken * act_a85zero(const HParseResult *p, void *u) { @@ -317,7 +379,7 @@ validate_a85fivedigits(HParseResult *p, void *u) HParsedToken * act_a85group(const HParseResult *p, void *u) { - uint8_t *bytes = h_arena_malloc(p->arena, 4); + uint8_t *bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * 4); uint32_t fourbytes = H_CAST_UINT(p->ast); bytes[0] = (fourbytes & 0xFF000000) >> 24; @@ -422,7 +484,45 @@ act_a85partialgroup(const HParseResult *p, void *u) return H_MAKE_BYTES(bytes, bytes_used); } -// TODO: flatten sequence in a85string semantic action +HParsedToken * +act_a85string(const HParseResult *p, void *u) +{ + uint8_t *result_bytes; + size_t chunk_number; + size_t required_bytes; + size_t out_pos = 0; + HCountedArray *seq = H_CAST_SEQ(p->ast); + HParsedToken *res; + + /* Number of 4-byte chunks, minus the potential last partial group and EOD */ + chunk_number = seq->used - 2; + + /* Special-case: last chunk before EOD may be 4, 3, 2 or 1 bytes + * The latter two happening if the group was parsed from a partial + * group consisting less than 5 chars */ + HBytes *last_chunk = &H_CAST_BYTES(seq->elements[seq->used-1]); + required_bytes = (chunk_number * 4 + last_chunk->len); + + result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * required_bytes); + + /* memcpy all but the last group's bytes into a single array */ + for (size_t i = 0; i < seq->used-1; ++i) + { + HBytes *chunk = &H_CAST_BYTES(seq->elements[i]); + assert(out_pos < required_bytes); + memcpy(&(result_bytes[out_pos]), chunk->token, 4); + out_pos += 4; + assert(out_pos < required_bytes); + } + + memcpy(&(result_bytes[out_pos]), last_chunk->token, last_chunk->len); + out_pos += last_chunk->len; + /* We should have filled the array exactly by this point */ + assert(out_pos == required_bytes-1); + + res = H_MAKE_BYTES(result_bytes, required_bytes); + return res; +} HParsedToken * act_nat(const HParseResult *p, void *u) @@ -1057,15 +1157,15 @@ init_parser(struct Env *aux) H_VARULE(a85partial4group, h_repeat_n(MANY_LWS(a85digit), 4)); H_ARULE(a85partialgroup, CHX(a85partial4group, a85partial3group, a85partial2group)); - H_RULE(a85string, SEQ(h_many(a85group), OPT(a85partialgroup), IGN(a85eod))); + H_ARULE(a85string, SEQ(h_many(a85group), OPT(a85partialgroup), IGN(a85eod))); /* AsciiHexDecode */ H_RULE(ahexeod, h_ch('>')); - H_RULE(hdigitpair, SEQ(IGN(OPT(h_many(lwchar))), hdigit, IGN(OPT(h_many(lwchar))), hdigit)); - H_ARULE(ahextruncated, SEQ(IGN(OPT(h_many(lwchar))), hdigit, IGN(OPT(h_many(lwchar))), ahexeod)); + H_ARULE(hdigitpair, SEQ(IGN(OPT(h_many(lwchar))), hdigit, IGN(OPT(h_many(lwchar))), hdigit)); + H_ARULE(ahextruncated, SEQ(IGN(OPT(h_many(lwchar))), hdigit, IGN(OPT(h_many(lwchar))))); - H_RULE(hs_end, CHX(hdigitpair, ahextruncated)); - H_RULE(hexstream, SEQ(h_many(hdigitpair), hs_end)); + H_ARULE(hs_end, SEQ(CHX(hdigitpair, ahextruncated), ahexeod)); + H_ARULE(ahexstream, SEQ(h_many(hdigitpair), hs_end)); init_runlengthdecode_parser(aux); @@ -1077,7 +1177,7 @@ init_parser(struct Env *aux) p_xref = CHX(xr_td, xrstm); p_objdef = objdef; p_a85string = a85string; - p_ahexstream = hexstream; + p_ahexstream = ahexstream; p_ws = ws; p_wel = wel; p_elemr = h_action(elemr, h_act_flatten, NULL);