diff --git a/pdf.c b/pdf.c index 7ab7a1d96c51bad5e8b99b4c79ef4062837686bb..27135177d3011e86db92c93762835f491da51da2 100644 --- a/pdf.c +++ b/pdf.c @@ -657,7 +657,7 @@ act_nesc(const HParseResult *p, void *u) return H_MAKE_UINT(H_FIELD_UINT(1)*16 + H_FIELD_UINT(2)); } -#define act_str_ h_act_flatten +#define act_schars h_act_flatten #define act_string act_token HParsedToken * @@ -671,9 +671,6 @@ act_octal(const HParseResult *p, void *u) return H_MAKE_UINT(x); } -#define act_oct3 act_octal -#define act_oct2 act_octal -#define act_oct1 act_octal HParsedToken * act_xrent(const HParseResult *p, void *u) @@ -906,6 +903,7 @@ init_parser(struct Env *aux) //H_RULE(dchar, IN(DCHARS)); /* delimiter */ H_RULE(rchar, NOT_IN(WCHARS DCHARS)); /* regular */ H_RULE(nchar, NOT_IN(WCHARS DCHARS "#")); /* name */ + H_RULE(schar, NOT_IN("()\n\r\\")); /* string literal */ H_ARULE(digit, h_ch_range('0', '9')); H_ARULE(pdigit, h_ch_range('1', '9')); H_ARULE(hlower, h_ch_range('a', 'f')); @@ -960,16 +958,10 @@ init_parser(struct Env *aux) /* numbers */ H_ARULE(sign, CHX(minus, IGN(plus))); H_VRULE(intnn, nat); - #if 1 H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ SEQ(digits, period, empty), /* 123. */ SEQ(empty, period, digits))); /* .123 */ // XXX ^ we _could_ move the "123." case into intnn... - #else - // XXX the .123 case above somehow leads to a conflict with litstr... - H_ARULE(realnn, CHX(SEQ(digits, period, digits), /* 12.3 */ - SEQ(digits, period, empty))); /* 123. */ - #endif H_RULE(numbnn, CHX(realnn, intnn)); H_RULE(snumb, SEQ(sign, numbnn)); H_ARULE(numb, CHX(snumb, numbnn)); @@ -979,59 +971,24 @@ init_parser(struct Env *aux) H_ARULE(nstr, h_many(CHX(nchar, nesc))); /* '/' is valid */ H_RULE(name, h_right(slash, nstr)); - /* strings - * - * this is so convoluted in order to make it LALR including the - * precedence rules for octal escapes ("\123" vs "\12 3" vs "\1 23") - * and end-of-line ("CRLF" vs "CR LF"). - * - * we have to split the base rule 'str' into variants 'str_o' and - * 'str_l' depending on whether they may start with an octal digit or - * linefeed, respectively. - */ - H_RULE(str_ol, h_indirect()); - H_RULE(str_o, h_indirect()); - H_RULE(str_l, h_indirect()); - H_RULE(str, h_indirect()); + /* strings */ + H_RULE(snest, h_indirect()); H_RULE(bsn, p_mapch('n', 0x0a)); /* LF */ H_RULE(bsr, p_mapch('r', 0x0d)); /* CR */ H_RULE(bst, p_mapch('t', 0x09)); /* HT */ H_RULE(bsb, p_mapch('b', 0x08)); /* BS (backspace) */ H_RULE(bsf, p_mapch('f', 0x0c)); /* FF */ H_RULE(escape, CHX(bsn, bsr, bst, bsb, bsf, lparen, rparen, bslash)); - H_ARULE(oct3, REP(odigit,3)); - H_ARULE(oct2, REP(odigit,2)); - H_ARULE(oct1, REP(odigit,1)); - H_RULE(octesc, CHX(SEQ(oct3, str), - SEQ(oct2, str_o), - SEQ(oct1, str_o))); - H_RULE(eolesc, CHX(SEQ(IGN(crlf), str), - SEQ(IGN(cr), str_l), - SEQ(IGN(lf), str))); - H_RULE(schar_o, NOT_IN("()\n\r\\" "01234567")); - H_RULE(schar_e, NOT_IN("()\n\r\\" "01234567" "nrtbf")); - H_RULE(str_o_, CHX(SEQ(lf, str), str_ol)); /* str "but not" odigit */ - H_RULE(str_l_, CHX(SEQ(odigit, str), str_ol)); /* str "but not" lf */ - H_RULE(str_ol_, CHX(SEQ(cr, str_l), /* str "but neither" */ - SEQ(crlf, str), - SEQ(schar_o, str), - SEQ(lparen, str, rparen, str), - SEQ(IGN(bslash), escape, str), - SEQ(IGN(bslash), schar_e, str), /* "lone" bs */ - /* NB: ^ lone backslashes are to be ignored per spec, but we - * let them "escape" with the following character. this works - * because they are never truly alone. */ - SEQ(IGN(bslash), octesc), - SEQ(IGN(bslash), eolesc), /* line split */ - epsilon)); - H_ARULE(str_, CHX(SEQ(lf, str), SEQ(odigit, str), str_ol)); - H_RULE(litstr, h_middle(lparen, str, rparen)); + H_ARULE(octal, CHX(REP(odigit,3), REP(odigit,2), REP(odigit,1))); + H_RULE(wrap, IGN(eol)); + H_RULE(sesc, h_right(bslash, CHX(escape, octal, wrap, epsilon))); + /* NB: lone backslashes and escaped newlines are ignored */ + H_ARULE(schars, h_many(CHX(schar, snest, sesc, eol))); + H_RULE(snest_, SEQ(lparen, schars, rparen)); + H_RULE(litstr, h_middle(lparen, schars, rparen)); H_RULE(hexstr, h_middle(langle, MANY_WS(hdigit), rangle)); H_ARULE(string, CHX(litstr, hexstr)); - h_bind_indirect(str_ol, str_ol_); - h_bind_indirect(str_o, str_o_); - h_bind_indirect(str_l, str_l_); - h_bind_indirect(str, str_); + h_bind_indirect(snest, snest_); H_RULE(array, h_indirect()); H_RULE(dict, h_indirect());