diff --git a/pdf.c b/pdf.c index 800aa0d31fe56ae0471110a98ff797407052e66b..18ebac16e37b3b2f2156452168e06e3dc405a5ed 100644 --- a/pdf.c +++ b/pdf.c @@ -786,7 +786,17 @@ act_dict_(const HParseResult *p, void *env) #define act_array_ h_act_flatten HParsedToken * -act_longlength(const HParseResult *p, void *env) +act_shortlength(const HParseResult *p, void *u) +{ + uint8_t length = H_CAST_UINT(p->ast); + /* Length can range from 0-127, corresponding to the range 1-128, inclusive */ + uint8_t finallength = length+1; + + return H_MAKE_UINT(finallength); +} + +HParsedToken * +act_longlength(const HParseResult *p, void *u) { uint8_t length = H_CAST_UINT(p->ast); uint8_t finallength = 257-length; @@ -795,7 +805,7 @@ act_longlength(const HParseResult *p, void *env) } HParsedToken * -act_longrun(const HParseResult *p, void *env) +act_longrun(const HParseResult *p, void *u) { HParsedToken **elements = h_seq_elements(p->ast); HParsedToken *res = H_MAKE_SEQ(); @@ -811,6 +821,25 @@ act_longrun(const HParseResult *p, void *env) return res; } +HParsedToken * +act_rldstring(const HParseResult *p, void *u) +{ + const HParsedToken *flattened = h_seq_flatten(p->arena, p->ast); + HCountedArray *flattened_seq = H_CAST_SEQ(flattened); + uint8_t bytes_required; + uint8_t *result_bytes; + + bytes_required = flattened_seq->used - 1; + result_bytes = h_arena_malloc(p->arena, sizeof(uint8_t) * bytes_required); + + for (size_t i = 0; i < flattened_seq->used-1; ++i) + { + result_bytes[i] = H_CAST_UINT(flattened_seq->elements[i]); + } + + return H_MAKE_BYTES(result_bytes, bytes_required); +} + /* * input grammar */ @@ -837,7 +866,7 @@ init_runlengthdecode_parser(struct Env *aux) { H_RULE(rldeod, h_ch(0x80)); H_ARULE(longlength, h_ch_range(0x81, 0xFF)); - H_RULE(shortlength, h_ch_range(0x1, 0x7F)); + H_ARULE(shortlength, h_ch_range(0x0, 0x7F)); H_RULE(shortdata, h_uint8()); H_RULE(longdata, h_uint8()); @@ -845,7 +874,7 @@ init_runlengthdecode_parser(struct Env *aux) H_RULE(shortrun, h_length_value(shortlength, shortdata)); H_ARULE(longrun, SEQ(longlength, longdata)); - H_RULE(rldstring, SEQ(CHX(shortrun, longrun), IGN(rldeod))); + H_ARULE(rldstring, SEQ(h_many(CHX(shortrun, longrun)), IGN(rldeod))); p_rldstring = rldstring; } diff --git a/t/rld_pdf.pdf b/t/rld_pdf.pdf new file mode 100644 index 0000000000000000000000000000000000000000..49116628c303ea5fe2284e44e20f99f85872dc65 --- /dev/null +++ b/t/rld_pdf.pdf @@ -0,0 +1,62 @@ +%PDF-1.4 +1 0 obj +<< /Type /Catalog +/Outlines 2 0 R +/Pages 3 0 R +>> +endobj +2 0 obj +<< /Type /Outlines +/Count 0 +>> +endobj +3 0 obj +<< /Type /Pages +/Kids [ 4 0 R ] +/Count 1 +>> +endobj +4 0 obj +<< /Type /Page +/Parent 3 0 R +/MediaBox [ 0 0 612 792 ] +/Contents 5 0 R +/Resources << /ProcSet 6 0 R +/Font << /F1 7 0 R >> +>> +>> +endobj +5 0 obj +<< /Length 23 /Filter /RunLengthDecode>> +stream + AAAAAAAAAAAAAAAAAAAA@B +endstream +endobj +6 0 obj +[ /PDF /Text ] +endobj +7 0 obj +<< /Type /Font +/Subtype /Type1 +/Name /F1 +/BaseFont /Helvetica +/Encoding /MacRomanEncoding +>> +endobj +xref +0 8 +0000000000 65535 f +0000000009 00000 n +0000000074 00000 n +0000000120 00000 n +0000000179 00000 n +0000000322 00000 n +0000000419 00000 n +0000000450 00000 n +trailer +<< /Size 8 +/Root 1 0 R +>> +startxref +557 +%%EOF