diff --git a/pdf.c b/pdf.c index 9d851f1432f267ba683ac3509ccc7209f8ea8c5a..e811113a13fea9477ead5eb15bfff3d4b1225f61 100644 --- a/pdf.c +++ b/pdf.c @@ -68,9 +68,13 @@ bytes_eq(HBytes b, const char *s) return strncmp(s, (const char *)(b.token), b.len) == 0 && b.len == strlen(s); } +/* This probably belongs next to Ref and XREntry, + * but dictentry() needs to know about this type */ +typedef HCountedArray Dict; + /* a helper to look up a value in a dictionary */ const HParsedToken * -dictentry(const HCountedArray *dict, const char *key) +dictentry(const Dict *dict, const char *key) { HParsedToken *ent; HBytes k; @@ -116,7 +120,7 @@ struct Env { /* * custom token types */ -HTokenType TT_XREntry, TT_Ref, TT_HParseResult; +HTokenType TT_XREntry, TT_Ref, TT_Dict, TT_HParseResult; typedef struct { enum {XR_FREE, XR_INUSE, XR_OBJSTM} type; @@ -169,6 +173,43 @@ pp_parseresult(FILE *stream, const HParsedToken *tok, int indent, int delta) h_pprint(stream, res->ast, indent, delta); } +/* Can the default behavior be overriden to only print the V: field? */ +/* Warning: boolean keywords get printed as (null), + * since the LIT() macro parses them with h_ignore() */ +void +pp_dict(FILE *stream, const HParsedToken *tok, int indent, int delta) +{ + if (tok->seq->used == 0) + fprintf(stream, "{ }"); + /* If there is more than one key-value pair, print all but + * the last, separated by commas. */ + else if(tok->seq->used > 1) + { + fprintf(stream, "{%*s", delta - 1, ""); + for (size_t i = 0; i < tok->seq->used-1; i++) + { + const HCountedArray *k_v; + k_v = tok->seq->elements[i]->seq; + h_pprint(stream, k_v->elements[0], indent + delta, delta); + fprintf(stream, ":"); + h_pprint(stream, k_v->elements[1], indent + delta, delta); + fprintf(stream, "\n%*s, ", indent, ""); + } + + } + + /* Special case: the last entry doesn't have a comma after it */ + const HCountedArray *k_v; + k_v = tok->seq->elements[(tok->seq->used)-1]->seq; + h_pprint(stream, k_v->elements[0], indent + delta, delta); + fprintf(stream, ":"); + h_pprint(stream, k_v->elements[1], indent + delta, delta); + + if (tok->seq->used > 2) + fprintf(stream, "\n%*s}", indent, ""); + else + fprintf(stream, " }"); +} /* * semantic actions @@ -412,6 +453,11 @@ validate_xrstm(HParseResult *p, void *u) bytes_eq(v->bytes, "XRef")); } +HParsedToken * +act_dict(const HParseResult *p, void *env) +{ + return H_MAKE(Dict, (void*)p->ast->seq); +} /* * input grammar @@ -433,6 +479,7 @@ init_parser(struct Env *aux) TT_HParseResult = h_allocate_token_new("HParseResult", NULL, pp_parseresult); TT_XREntry = h_allocate_token_new("XREntry", NULL, pp_xrentry); TT_Ref = h_allocate_token_new("Ref", NULL, pp_ref); + TT_Dict = h_allocate_token_new("Dict", NULL, pp_dict); /* lines */ H_RULE(cr, p_mapch('\r', '\n')); /* semantic value: \n */ @@ -538,7 +585,7 @@ init_parser(struct Env *aux) H_RULE(dclose, LIT(">>")); H_RULE(obj, h_indirect()); H_RULE(k_v, SEQ(name, obj)); - H_RULE(dict, h_middle(TOKD(dopen), h_many(k_v), TOKD(dclose))); + H_ARULE(dict, h_middle(TOKD(dopen), h_many(k_v), TOKD(dclose))); H_RULE(array, h_middle(TOKD(lbrack), h_many(obj), TOKD(rbrack))); // XXX validate: dict keys must be unique @@ -1408,7 +1455,7 @@ parse_xrefs(const uint8_t *input, size_t sz, size_t *nxrefs) xrefs[n++] = res->ast; /* look up the next offset (to the previous xref section) */ - tok = dictentry(H_INDEX_SEQ(res->ast, 1), "Prev"); + tok = dictentry(H_INDEX(Dict, res->ast, 1), "Prev"); if (tok == NULL) break; if (tok->token_type != TT_SINT) {