From 61f42f8863330652b6d9f0a822e1c90ffb8e5439 Mon Sep 17 00:00:00 2001 From: "Sven M. Hallberg" <pesco@khjk.org> Date: Thu, 20 Feb 2020 21:01:04 +0100 Subject: [PATCH] add (disabled) object stream parser --- pdf.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/pdf.c b/pdf.c index 3197c20..3f276a0 100644 --- a/pdf.c +++ b/pdf.c @@ -73,6 +73,20 @@ p_return_uint__m(HAllocator *mm__, uint64_t x) return h_action__m(mm__, p_epsilon, act_return_uint, (void *)x); } +/* like h_sepBy but parses a fixed number of elements */ +HParser * +p_sepBy_n__m(HAllocator *mm__, HParser *p, HParser *sep, size_t n) +{ + if (n == 0) + return p_epsilon; + + HParser *sep_p = h_sequence__m(mm__, sep, p, NULL); + HParser *tail = h_repeat_n__m(mm__, sep_p, n - 1); + HParser *seq = h_sequence__m(mm__, p, tail, NULL); + + return h_action__m(mm__, seq, h_act_flatten, NULL); +} + /* a helper to compare an HBytes to a string */ bool bytes_eq(HBytes b, const char *s) @@ -705,8 +719,11 @@ HParser *p_xref; HParser *p_objdef; HParser *p_a85string; HParser *p_ahexstream; - HParser *p_rldstring; +HParser *p_ws; +HParser *p_wel; +HParser *p_elemr; +HParser *p_npair; /* continuations for h_bind() */ HParser *kstream(HAllocator *, const HParsedToken *, void *); @@ -957,11 +974,6 @@ init_parser(struct Env *aux) VIOL(h_optional(KW("endobj")), "Missing endobj token (severity=1)")))); H_RULE(body, h_many(objdef)); - /* for object streams XXX -> p_objstm__m(...) */ - // sepBy_n(p, sep,n) = SEQ(p, REP(SEQ(sep, p), n-1)) - //H_RULE(osidx, sepBy_n(npair, SEQ(wel,ws), N)) - //H_RULE(objstm, SEQ(ws, osidx, elemr, ws)); -- elemr from array - /* cross-reference section */ H_RULE(xreol, CHX(SEQ(sp, cr), SEQ(sp, lf), crlf)); // ^ XXX does the real world follow this rule?! cf. loop.pdf @@ -1063,6 +1075,10 @@ init_parser(struct Env *aux) p_objdef = objdef; p_a85string = a85string; p_ahexstream = hexstream; + p_ws = ws; + p_wel = wel; + p_elemr = h_action(elemr, h_act_flatten, NULL); + p_npair = npair; p_fail = h_nothing_p(); p_epsilon = epsilon; @@ -1075,6 +1091,7 @@ init_parser(struct Env *aux) act_nat, NULL)); H_RULE(violsev, SEQ(IGN(viol_preamble), severity_num)); p_violsev = violsev; + #if 0 // XXX testing int r; @@ -1617,8 +1634,8 @@ p_take__m(HAllocator *mm__, size_t n, struct Env *aux) return h_left__m(mm__, bytes, skip); } -HParser * -p_xrefdata__m(HAllocator *mm__, const Dict *dict); +HParser *p_xrefdata__m(HAllocator *, const Dict *); +HParser *p_objstm__m(HAllocator *, const Dict *); HParser * p_stream_data__m(HAllocator *mm__, const Dict *dict) @@ -1886,6 +1903,29 @@ p_xrefdata__m(HAllocator *mm__, const Dict *dict) return h_sequence__ma(mm__, (void **)p_subs); } +HParser * +p_objstm__m(HAllocator *mm__, const Dict *dict) +{ + const HParsedToken *v; + size_t N; + + v = dictentry(dict, "N"); + if (v == NULL || v->token_type != TT_SINT || v->sint < 0 || + (uint64_t)v->sint > SIZE_MAX) { + fprintf(stderr, "missing /N on object stream\n"); + return p_fail; + } + N = v->sint; + + HParser *wel_ws = h_sequence__m(mm__, p_wel, p_ws, NULL); + HParser *idx = p_sepBy_n__m(mm__, p_npair, wel_ws, N); + + return h_sequence__m(mm__, p_ws, idx, p_elemr, p_ws, NULL); + // XXX leading and trailing ws OK? + + // XXX consistency-check against /First, idx, /N +} + /* * This continuation is very similar to kstream, except that it does not * rely on /Length to consume the right amount of input. If /Length is -- GitLab