diff --git a/izodparse/ok.py b/izodparse/ok.py deleted file mode 100644 index ee916463a6a7ed964a267dccccafeb57fd792811..0000000000000000000000000000000000000000 --- a/izodparse/ok.py +++ /dev/null @@ -1,8 +0,0 @@ -"Testing utilities." - -def ok(a, b): assert a == b, (a, b) - -def please_be(a, b): - if a != b: - raise ValueError(a, '!=', b) - diff --git a/izodparse/pdf.py b/izodparse/pdf.py index 71b1e564c16de7a444b07e26ee84905658db04e6..1d81799e0f4a7c4a7280c54f636e8ac1a7555d95 100755 --- a/izodparse/pdf.py +++ b/izodparse/pdf.py @@ -148,7 +148,7 @@ import zlib from . import peg from .peg import Any, Charset, Lit, Thunk, Parse -from .ok import ok +from .util import ok, memoprop ### PDF and PostScript and CMap parsing ### @@ -265,9 +265,18 @@ class Pdf: self.xref_start = int(blob[sx:].split()[1]) # XXX there could be many sections self.xrefs = XrefSection(self, self.xref_start) - self.trailer_plumbing = self.parse(self.xrefs.end, drop_ws(b'trailer') + dictionary) - self.trailer = porcelainize(self, self.trailer_plumbing[1][1]) - self.catalog = self.trailer['Root'] + + @memoprop + def trailer_plumbing(self): + return self.parse(self.xrefs.end, drop_ws(b'trailer') + dictionary) + + @memoprop + def trailer(self): + return porcelainize(self, self.trailer_plumbing[1][1]) + + @memoprop + def catalog(self): + return self.trailer['Root'] def read(self, offset, size=64): return self.blob[offset:offset+size] diff --git a/izodparse/peg.py b/izodparse/peg.py index 42cfaa3f47ce01f52286a08fe773fdf767d1409e..5e4889c7ca54b77e4dfb33029cc49a4396306998 100644 --- a/izodparse/peg.py +++ b/izodparse/peg.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -r"""A small PEG parser generator. +r"""Packrat parsing engine with predictive lookahead parsing. Too slow to use in practice (30 kilobytes/sec), but hopefully validates our understanding of the problem and communicates it more @@ -28,28 +28,9 @@ along with izodparse. If not, see <http://www.gnu.org/licenses/>. """ import types -from .ok import ok, please_be +from .util import ok, please_be, memoprop -class memoprop: - """"Simplified, non-multithreaded version of functools.cached_property. - - For Pythons earlier than 3.8. Doesn't support __slots__, custom - __dict__, etc. - - """ - def __init__(self, func): - self.func = func - - def __get__(self, instance, cls): - cache = instance.__dict__ - if self not in cache: - cache[self] = self.func(instance) - return cache[self] - - -### Packrat parsing engine with predictive lookahead parsing ### - # First, some debugging utilities: def represent_cset(cs): "Debugging helper for understanding first sets; takes a set() of ints or chars." diff --git a/izodparse/util.py b/izodparse/util.py new file mode 100644 index 0000000000000000000000000000000000000000..761320845243aa229552c8d5698d84856e8687cc --- /dev/null +++ b/izodparse/util.py @@ -0,0 +1,24 @@ +"Some basic utilities for testing and lazy evaluation." + +def ok(a, b): assert a == b, (a, b) + +def please_be(a, b): + if a != b: + raise ValueError(a, '!=', b) + +class memoprop: + """"Simplified, non-multithreaded version of functools.cached_property. + + For Pythons earlier than 3.8. Doesn't support __slots__, custom + __dict__, etc. + + """ + def __init__(self, func): + self.func = func + + def __get__(self, instance, cls): + cache = instance.__dict__ + if self not in cache: + cache[self] = self.func(instance) + return cache[self] + diff --git a/plans.org b/plans.org index 9eedac711decc78e95358947cdb5b7b0a481cb17..94a84195cdfa540571c4dd84713b88d119241b1f 100644 --- a/plans.org +++ b/plans.org @@ -1,4 +1,4 @@ -* things I am thinking of doing [3/14] +* things I am thinking of doing [4/15] ** DONE split out pdftour into separate repo. charpy? (too popular) rebound? (too popular) schmidtconcrete? (ok) sclerometer? (wrong test) izod? (seems okay) 1zod or iz0d? better. @@ -73,7 +73,10 @@ This file has a bunch of stuff in it about my local machine config. ** DONE make izodparse an installable Python package Man, I forgot all about the distutils/setuptools mess. ** DONE split out parsing-engine stuff from PDF/PS stuff -** TODO make xrefs, etc., lazy properties +** DONE make trailer lazy +This way I can open, for example, Dercuano, even though I can't parse +its trailer yet. +** TODO add PDF support for fractions ** TODO examine example PDF file with compressed object streams and no fonts in page resource dictionaries [0/3] *** TODO find it *** TODO copy it