diff --git a/pdf.c b/pdf.c index 6bcd7e408759004c4223f4630e5857cea85ad3ef..817fdcd21ed5c36977763b553cde811b890899a2 100644 --- a/pdf.c +++ b/pdf.c @@ -319,6 +319,10 @@ struct Env { Catalog_T catalog; /* Catalog object and document structure */ TextState_T tstate; /* current text state */ + unsigned int paren_nest_depth; /* String nesting depth */ + unsigned int array_nest_depth; /* Array nesting depth */ + unsigned int dict_nest_depth; /* Dictionary nesting depth */ + }; Fontinfo_T *lookup_font(TextState_T *state, struct Env *aux); @@ -1163,7 +1167,126 @@ act_rldstring(const HParseResult *p, void *u) return H_MAKE_BYTES(result_bytes, bytes_required); } +/* + * The following functions implement a fix for being able to exhaust memory by nesting strings, objects and dicts too deeply. + * A counter in aux (separately for each type of nesting) is incremented on each opening token, and decremented on each closing token. + * The counters are not decremented when they are 0 to avoid underflow. + * + * The counters are global across the document (but respect document structure, + * e.g. parentheses in streams don't count, unless for some reason they're parsed with the lparen rule.) + * The sopen, dopen, and array_begin validations make the parse fail if this nesting depth is exceeded. + * Because currently there are no diagnostic messages, this can probably result in unexpected parses. + * + * validate_rparen, validate_array_end, and validate_dclose will make the parse fail if a respective closing token ( ')', ']', '>>' ) + * is encountered before an sopen, array_begin, or dopen (or there are more closing parentheses than opening ones.) This is not exactly + * the same as balanced parentheses: the order different types of parentheses appear in is not considered. Different brackets can be considered + * independently because other H_RULEs already reject cases with interleaved opening/closing tokens such as "<< [ /Foo /Bar >> ]". + */ + +#define PAREN_MAX_NEST_DEPTH 256 + +bool +validate_sopen(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->paren_nest_depth < PAREN_MAX_NEST_DEPTH; +} + +HParsedToken * +act_sopen(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + aux->paren_nest_depth += 1; + + return H_MAKE_UINT(H_CAST_UINT(p->ast)); +} + +bool +validate_sclose(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->paren_nest_depth > 0; +} + +HParsedToken* +act_sclose(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + if(aux->paren_nest_depth > 0) + aux->paren_nest_depth -= 1; + + return H_MAKE_UINT(H_CAST_UINT(p->ast)); +} + +#define ARRAY_MAX_NEST_DEPTH 256 + +bool +validate_array_begin(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->array_nest_depth < ARRAY_MAX_NEST_DEPTH; +} + +HParsedToken * +act_array_begin(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + aux->array_nest_depth += 1; + + return H_MAKE_UINT(H_CAST_UINT(p->ast)); +} + +bool +validate_array_end(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->array_nest_depth > 0; +} + +HParsedToken * +act_array_end(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + if(aux->array_nest_depth > 0) + aux->array_nest_depth -= 1; + + return H_MAKE_UINT(H_CAST_UINT(p->ast)); +} +#define DICT_MAX_NEST_DEPTH 256 + +bool +validate_dopen(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->dict_nest_depth < DICT_MAX_NEST_DEPTH; +} + +HParsedToken* +act_dopen(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + aux->dict_nest_depth += 1; + + return H_MAKE_BYTES(H_CAST_BYTES(p->ast).token, H_CAST_BYTES(p->ast).len); +} + +bool +validate_dclose(HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + return aux->dict_nest_depth > 0; +} + +HParsedToken* +act_dclose(const HParseResult *p, void *u) +{ + struct Env *aux = (struct Env*) u; + if(aux->dict_nest_depth > 0) + aux->dict_nest_depth -= 1; + + return H_MAKE_BYTES(H_CAST_BYTES(p->ast).token, H_CAST_BYTES(p->ast).len); +} /* * ******************************************************************** @@ -2442,8 +2565,10 @@ init_parser(struct Env *aux) H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon))); /* NB: lone backslashes and escaped newlines are ignored */ H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol))); - H_RULE(snest_, SEQ(lparen, schars, rparen)); - H_RULE(litstr, h_middle(lparen, schars, rparen)); + H_AVDRULE(sopen, lparen, aux); + H_AVDRULE(sclose, rparen, aux); + H_RULE(snest_, SEQ(sopen, schars, sclose)); + H_RULE(litstr, h_middle(sopen, schars, sclose)); H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle)); H_ARULE(string, CHX(litstr, hexstr)); h_bind_indirect(snest, snest_); @@ -2457,8 +2582,8 @@ init_parser(struct Env *aux) H_RULE(obj, CHX(robj, name, dobj)); /* dictionaries */ - H_RULE(dopen, LIT("<<")); - H_RULE(dclose, LIT(">>")); + H_AVDRULE(dopen, LIT("<<"), aux); + H_AVDRULE(dclose, LIT(">>"), aux); H_RULE(k_v, CHX(CHX(SEQ(name, wel,ws, obj), SEQ(name, CHX(name,dobj))), VIOL(SEQ(name, wel,ws), "Key with no value (severity=2)"))); @@ -2470,7 +2595,10 @@ init_parser(struct Env *aux) /* arrays */ H_RULE(elemd, h_indirect()); /* elements following a delimiter */ H_RULE(elemr, h_indirect()); /* elements following a regular char */ - H_ARULE(array_, h_middle(lbrack, elemd, rbrack)); + H_AVDRULE(array_begin, lbrack, aux); /* semantic action and validation implementing a limit on how deeply arrays can be nested */ + + H_AVDRULE(array_end, rbrack, aux); /* nesting limit and counter underflow check */ + H_ARULE(array_, h_middle(array_begin, elemd, array_end)); H_RULE(elemd_, CHX(SEQ(ws, dobj, elemd), SEQ(ws, name, elemr), SEQ(ws, robj, elemr),