diff --git a/pdf.c b/pdf.c index 1f869c19c4970a8e9d2c955ffbe6a61bed6d0be6..686cd7bb319ad59d1cd7a24651c73fb131c60dab 100644 --- a/pdf.c +++ b/pdf.c @@ -441,13 +441,16 @@ validate_xrstm(HParseResult *p, void *u) } HParsedToken * -act_dict(const HParseResult *p, void *env) +act_dict_(const HParseResult *p, void *env) { Dict *dict = H_CAST_SEQ(p->ast); return H_MAKE(Dict, dict); } +#define act_array_ h_act_flatten + + /* * input grammar */ @@ -511,8 +514,8 @@ init_parser(struct Env *aux) /* whitespace */ H_RULE(comment, SEQ(percent, line)); - H_RULE(wel, IGN(CHX(wchar, comment)); - H_RULE(ws, IGN(h_many(wel)))); + H_RULE(wel, IGN(CHX(wchar, comment))); + H_RULE(ws, IGN(h_many(wel))); H_RULE(lws, IGN(h_many(IGN(lwchar)))); /* tokenization */ @@ -571,17 +574,39 @@ init_parser(struct Env *aux) H_ARULE(string, CHX(litstr, hexstr)); h_bind_indirect(snest, snest_); - /* arrays and dictionaries */ + H_RULE(array, h_indirect()); + H_RULE(dict, h_indirect()); + + /* classify objects by whether they start/end with a delimiter: */ + H_RULE(robj, CHX(ref, null, boole, real, intg)); /* rchars */ + H_RULE(dobj, CHX(string, array, dict)); /* dchars */ + H_RULE(obj, CHX(robj, name, dobj)); + + /* dictionaries */ H_RULE(dopen, LIT("<<")); H_RULE(dclose, LIT(">>")); - H_RULE(obj, h_indirect()); - H_RULE(k_v, SEQ(name, ws, obj)); - //H_RULE(k_v, CHX(SEQ(name, wel,ws, obj), - // SEQ(name, dobj))); - H_ARULE(dict, h_middle(dopen, MANY_WS(k_v), dclose)); + H_RULE(k_v, CHX(SEQ(name, wel,ws, obj), + SEQ(name, CHX(name,dobj)))); + H_ARULE(dict_, h_middle(dopen, MANY_WS(k_v), dclose)); // XXX this allows, for instance, "<<<<" to be parsed as "<< <<". ok? - H_RULE(array, h_middle(lbrack, MANY_WS(obj), rbrack)); // XXX validate: dict keys must be unique + h_bind_indirect(dict, dict_); + + /* arrays */ + H_RULE(elemd, h_indirect()); /* elements following a delimiter */ + H_RULE(elemr, h_indirect()); /* elements following a regular char */ + H_ARULE(array_, h_middle(lbrack, elemd, rbrack)); + H_RULE(elemd_, CHX(SEQ(ws, dobj, elemd), + SEQ(ws, name, elemr), + SEQ(ws, robj, elemr), + SEQ(ws))); // XXX ws without SEQ() should suffice + H_RULE(elemr_, CHX(SEQ(ws, dobj, elemd), + SEQ(ws, name, elemr), + SEQ(wel,ws, robj, elemr), + SEQ(ws))); // XXX ws without SEQ() should suffice + h_bind_indirect(elemd, elemd_); + h_bind_indirect(elemr, elemr_); + h_bind_indirect(array, array_); /* streams */ H_RULE(stmbeg, SEQ(dict, ws, LIT("stream"), OPT(cr), lf)); @@ -589,10 +614,6 @@ init_parser(struct Env *aux) H_RULE(stream, h_left(h_bind(stmbeg, kstream, aux), stmend)); // XXX is whitespace allowed between the eol and "endstream"? - H_RULE(obj_, CHX(ref, null, boole, real, intg, name, string, - array, dict)); - h_bind_indirect(obj, obj_); - /* * file structure */