Skip to content
Snippets Groups Projects
Commit 76bf6567 authored by Sven M. Hallberg's avatar Sven M. Hallberg
Browse files

parse xref streams without looking at /Length

parent 5d138cd7
No related branches found
No related tags found
No related merge requests found
......@@ -72,6 +72,15 @@ dictentry(const HCountedArray *dict, const char *key)
}
/*
* auxiliary global data structure needed by the parser
*/
struct Env {
const char *input;
size_t sz;
};
/*
* semantic actions
*/
......@@ -198,6 +207,15 @@ act_octal(const HParseResult *p, void *u)
#define act_xrefs h_act_last
HParsedToken *
act_rest(const HParseResult *p, void *env)
{
struct Env *aux = env;
size_t offset = H_CAST_UINT(p->ast) / 8;
return H_MAKE_BYTES(aux->input + offset, aux->sz - offset);
}
/* stream semantics (defined further below) */
bool validate_xrstm(HParseResult *, void *);
HParsedToken *act_xrstm(const HParseResult *, void *);
......@@ -212,11 +230,12 @@ HParser *p_pdfdbg;
HParser *p_startxref;
HParser *p_xref;
/* continuation for h_bind() */
/* continuations for h_bind() */
HParser *kstream(HAllocator *, const HParsedToken *, void *);
HParser *kxstream(HAllocator *, const HParsedToken *, void *);
void
init_parser(const char *input)
init_parser(struct Env *aux)
{
/* lines */
H_RULE(cr, p_mapch('\r', '\n')); /* semantic value: \n */
......@@ -329,7 +348,7 @@ init_parser(const char *input)
/* streams */
H_RULE(stmbeg, SEQ(dict, KW("stream"), OPT(cr), lf));
H_RULE(stmend, SEQ(OPT(eol), LIT("endstream")));
H_RULE(stream, h_left(h_bind(stmbeg, kstream, (void *)input), stmend));
H_RULE(stream, h_left(h_bind(stmbeg, kstream, aux), stmend));
// XXX is whitespace allowed between the eol and "endstream"?
H_RULE(obj_, CHX(ref, null, boole, real, intg, name, string,
......@@ -349,7 +368,7 @@ init_parser(const char *input)
H_RULE(objdef, SEQ(pnat, nat, KW("obj"), indobj, KW("endobj")));
H_RULE(body, h_many(objdef)); // XXX object streams
/* cross-reference table */
/* cross-reference section */
H_RULE(xreol, CHX(SEQ(sp, cr), SEQ(sp, lf), crlf));
// ^ XXX does the real world follow this rule?! cf. loop.pdf
H_RULE(xrtyp, CHX(h_ch('n'), h_ch('f')));
......@@ -360,7 +379,11 @@ init_parser(const char *input)
H_RULE(xrhead, SEQ(xrnat, IGN(sp), xrnat, nl));
H_RULE(xrsub, SEQ(xrhead, h_many(xrent)));
H_ARULE(xrefs, SEQ(KW("xref"), nl, h_many(xrsub)));
H_AVRULE(xrstm, SEQ(pnat, nat, KW("obj"), stream, KW("endobj")));
/* cross-reference streams */
H_RULE(rest, h_action(h_tell(), act_rest, aux));
H_AVRULE(xrstm, SEQ(pnat, nat, KW("obj"), stmbeg, rest));
// XXX skip however much we consumed and check for "endstream endobj"?
/* trailer */
H_RULE(startxr, SEQ(nl, KW("startxref"), nl,
......@@ -368,7 +391,7 @@ init_parser(const char *input)
LIT("%%EOF"), CHX(nl, end)));
// XXX should lws be allowed before EOF marker?
// NB: lws before xref offset is allowed, cf. p.48 (example 4)
H_RULE(xr_td, CHX(SEQ(xrefs, KW("trailer"), dict), xrstm));
H_RULE(xr_td, SEQ(xrefs, KW("trailer"), dict));
H_RULE(tail, SEQ(body, h_optional(xr_td), startxr));
H_RULE(pdf, SEQ(header, h_many1(tail), end));
......@@ -379,10 +402,15 @@ init_parser(const char *input)
p_pdf = pdf;
p_pdfdbg = pdfdbg;
p_startxref = startxr;
p_xref = xr_td;
p_xref = CHX(xr_td, xrstm);
}
/* combine current position combined with env=(input,sz) into HBytes */
/*
* stream object handling incl. cross-reference streams
*/
/* combine current position with env=(input,sz) into HBytes */
HParsedToken *
act_ks_bytes(const HParseResult *p, void *env)
{
......@@ -405,7 +433,7 @@ act_ks_bytes(const HParseResult *p, void *env)
HParser *
kstream(HAllocator *mm__, const HParsedToken *x, void *env)
{
const char *input = env;
struct Env *aux = env;
const HParsedToken *dict_t = H_INDEX_TOKEN(x, 0);
const HCountedArray *dict = H_CAST_SEQ(dict_t);
const HParsedToken *v = NULL;
......@@ -422,7 +450,7 @@ kstream(HAllocator *mm__, const HParsedToken *x, void *env)
/* dummy struct to hold the pair (input,sz) */
HBytes *bytes = h_alloc(mm__, sizeof(HBytes));
bytes->token = input;
bytes->token = aux->input;
bytes->len = sz;
HParser *tell = h_tell__m(mm__);
......@@ -473,7 +501,7 @@ validate_xrstm(HParseResult *p, void *u)
* interpret a cross-reference stream and return it in the same form as other
* cross-reference sections:
*
* p = pnat nat (dict bytes)
* p = (pnat nat (dict ...) bytes)
* result = (xrefs dict)
*/
HParsedToken *
......@@ -482,7 +510,7 @@ act_xrstm(const HParseResult *p, void *u)
HParsedToken *bytes, *dict, *result;
dict = H_INDEX_TOKEN(p->ast, 2, 0);
bytes = H_INDEX_TOKEN(p->ast, 2, 1);
bytes = H_INDEX_TOKEN(p->ast, 3);
// XXX decode XRefStm
......@@ -597,6 +625,7 @@ end:
int
main(int argc, char *argv[])
{
struct Env aux;
HParseResult *res = NULL;
const HParsedToken **xrefs;
const uint8_t *input;
......@@ -622,7 +651,8 @@ main(int argc, char *argv[])
err(1, "mmap");
/* build parsers */
init_parser(input);
aux = (struct Env){input, sz};
init_parser(&aux);
/* parse all cross-reference sections and trailer dictionaries */
xrefs = parse_xrefs(input, sz, &nxrefs);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment