assertion "idx == textlen" failed in act_txtobj
Seen with 20221020/00013ea1eb09151f390745c4519ecbd39539ca174ae662e252cdce656e6b8ec0
from the instigator corpus.
Stack trace:
(gdb) bt
#0 thrkill () at /tmp/-:3
#1 0x5882dc5563a8502e in ?? ()
#2 0x0000063f761422ae in _libc_abort () at /usr/src/lib/libc/stdlib/abort.c:51
#3 0x0000063f7615c712 in _libc___assert2 (file=Variable "file" is not available.
)
at /usr/src/lib/libc/gen/assert.c:52
#4 0x0000063c89865136 in act_txtobj (p=0x63f45a0f318, u=0x7f7ffffbeb98)
at pdf.c:2079
#5 0x0000063f790743e3 in parse_action (env=0x63f2bd9d5e0, state=0x63f3b52a018)
at build/debug/src/parsers/action.c:16
#6 0x0000063f790855f7 in perform_lowlevel_parse (state=0x63f3b52a018,
parser=0x63f2bd8bd00) at build/debug/src/backends/packrat.c:49
#7 0x0000063f79085e33 in h_do_parse (parser=0x63f2bd8bd00,
state=0x63f3b52a018) at build/debug/src/backends/packrat.c:230
#8 0x0000063f7907e28e in parse_many (env=0x63f2bd7ca80, state=0x63f3b52a018)
at build/debug/src/parsers/many.c:26
#9 0x0000063f790855f7 in perform_lowlevel_parse (state=0x63f3b52a018,
parser=0x63f2bd71900) at build/debug/src/backends/packrat.c:49
#10 0x0000063f79085e33 in h_do_parse (parser=0x63f2bd71900,
state=0x63f3b52a018) at build/debug/src/backends/packrat.c:230
#11 0x0000063f7907595a in parse_bind (be_=0x63f2bd871e0, state=0x63f3b52a018)
at build/debug/src/parsers/bind.c:36
#12 0x0000063f790855f7 in perform_lowlevel_parse (state=0x63f3b52a018,
parser=0x63f2bd71e80) at build/debug/src/backends/packrat.c:49
#13 0x0000063f79085e33 in h_do_parse (parser=0x63f2bd71e80,
state=0x63f3b52a018) at build/debug/src/backends/packrat.c:230
#14 0x0000063f7908622d in h_packrat_parse (mm__=0x63f7909fec0,
parser=0x63f2bd71e80, input_stream=0x7f7ffffbe5b0)
at build/debug/src/backends/packrat.c:330
#15 0x0000063f790986fe in h_parse__m (mm__=0x63f7909fec0,
parser=0x63f2bd71e80,
input=0x63ed3161018 "BT\n/F1 1 Tf\n10 0 0 10 40.39 740.95 Tm\n0 g\n/GS1 gs\n0 Tc\n0 Tw\n( Employment status of the civilian noninstitutional population by sex, age, race, and Hispanic origin, 2001 annual)Tj\n0 -1.149 TD\n(averages"..., length=12002) at build/debug/src/hammer.c:588
#16 0x0000063f7909867a in h_parse (parser=0x63f2bd71e80,
input=0x63ed3161018 "BT\n/F1 1 Tf\n10 0 0 10 40.39 740.95 Tm\n0 g\n/GS1 gs\n0 Tc\n0 Tw\n( Employment status of the civilian noninstitutional population by sex, age, race, and Hispanic origin, 2001 annual)Tj\n0 -1.149 TD\n(averages"..., length=12002) at build/debug/src/hammer.c:573
#17 0x0000063c8986e8f8 in parse_pagenode (aux=0x7f7ffffbeb98,
myNode=0x63ee9425018, myRef=0x63eaa6fe360, myDict=0x63e8a00a648,
parent_t=0x63e99330360, parent_n=0x63f679a8398, arena=0x63f2bd78740)
at pdf.c:4081
#18 0x0000063c8986f2b8 in parse_pagetree (aux=0x7f7ffffbeb98,
myNode=0x63f679a8398, myRef=0x63e99330360, myDict=0x63f61cc7628,
parent_t=0x63ed6e23980, parent_n=0x7f7ffffbebf0) at pdf.c:4255
#19 0x0000063c8986f206 in parse_pagetree (aux=0x7f7ffffbeb98,
myNode=0x7f7ffffbebf0, myRef=0x63ed6e23980, myDict=0x63e916ef628,
parent_t=0x0, parent_n=0x0) at pdf.c:4249
#20 0x0000063c8986f55d in parse_catalog (aux=0x7f7ffffbeb98,
root=0x63eb8cff260) at pdf.c:4326
#21 0x0000063c89870c1f in parse_xrefs (aux=0x7f7ffffbeb98) at pdf.c:4947
#22 0x0000063c89871531 in main (argc=1, argv=0x7f7ffffbed70) at pdf.c:5128
Context:
#4 0x0000063c89865136 in act_txtobj (p=0x63f45a0f318, u=0x7f7ffffbeb98)
at pdf.c:2079
2079 assert(idx == textlen);
(gdb) list
2074
2075 default:
2076 ; // ignore
2077 }
2078 }
2079 assert(idx == textlen);
2080
2081 // update the position on the page
2082 node->ts.curr_pos.tx = *px;
2083 node->ts.curr_pos.ty = *py;
(gdb) print idx
$1 = 157
(gdb) print textlen
$2 = 158
Off by one. Hm.