Forked from
Hammer / hammer
151 commits behind the upstream repository.
-
xentrac authored
In `h_sequence__ma` the same bug occurs, and it manifests as a crash in the same way, so I've added a test for it. In `h_permutation__ma` it evidently exists in the same form, but I haven't figured out how to reproduce it; in that case I added a fix to the implementation, but no test.
t_regression.c 10.69 KiB
#include <glib.h>
#include <stdint.h>
#include "glue.h"
#include "hammer.h"
#include "test_suite.h"
#include "internal.h"
static void test_bug118(void) {
// https://github.com/UpstandingHackers/hammer/issues/118
// Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5
HParseResult* p;
const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A";
#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN)
H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false)));
H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false)));
#undef MY_ENDIAN
H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL));
H_RULE(header_ok, h_sequence(nibble, nibble, NULL));
H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL));
H_RULE(parser_ok, h_sequence(header_ok, samples, NULL));
H_RULE(parser_weird, h_sequence(header_weird, samples, NULL));
p = h_parse(parser_weird, input, 6);
g_check_cmp_int32(p->bit_length, ==, 44);
h_parse_result_free(p);
p = h_parse(parser_ok, input, 6);
g_check_cmp_int32(p->bit_length, ==, 40);
h_parse_result_free(p);
}
static void test_seq_index_path(void) {
HArena *arena = h_new_arena(&system_allocator, 0);
HParsedToken *seq = h_make_seqn(arena, 1);
HParsedToken *seq2 = h_make_seqn(arena, 2);
HParsedToken *tok1 = h_make_uint(arena, 41);
HParsedToken *tok2 = h_make_uint(arena, 42);
seq->seq->elements[0] = seq2;
seq->seq->used = 1;
seq2->seq->elements[0] = tok1;
seq2->seq->elements[1] = tok2;
seq2->seq->used = 2;
g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE);
g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT);
g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41);
g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42);
}
#define MK_INPUT_STREAM(buf,len,endianness_) \
{ \
.input = (uint8_t*)buf, \
.length = len, \
.index = 0, \
.bit_offset = 0, \
.endianness = endianness_ \
}
static void test_read_bits_48(void) {
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 32, false), ==, 0x78563412);
g_check_cmp_int64(h_read_bits(&is, 16, false), ==, 0xBC9A);
}
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 31, false), ==, 0x78563412);
g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x17934);
}
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 33, false), ==, 0x78563412);
g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x5E4D);
}
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 36, false), ==, 0xA78563412);
g_check_cmp_int64(h_read_bits(&is, 12, false), ==, 0xBC9);
}
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 40, false), ==, 0x9A78563412);
g_check_cmp_int64(h_read_bits(&is, 8, false), ==, 0xBC);
}
{
HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
g_check_cmp_int64(h_read_bits(&is, 48, false), ==, 0xBC9A78563412);
}
}
static void test_llk_zero_end(void) {
HParserBackend be = PB_LLk;
HParser *z = h_ch('\x00');
HParser *az = h_sequence(h_ch('a'), z, NULL);
HParser *ze = h_sequence(z, h_end_p(), NULL);
HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL);
// some cases surrounding the bug
g_check_parse_match (z, be, "\x00", 1, "u0");
g_check_parse_failed(z, be, "", 0);
g_check_parse_match (ze, be, "\x00", 1, "(u0)");
g_check_parse_failed(ze, be, "\x00b", 2);
g_check_parse_failed(ze, be, "", 0);
g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)");
g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)");
g_check_parse_failed(aze, be, "a\x00b", 3);
// the following should not parse but did when the LL(k) backend failed to
// check for the end of input, mistaking it for a zero character.
g_check_parse_failed(az, be, "a", 1);
g_check_parse_failed(aze, be, "a", 1);
}
HParser *k_test_wrong_bit_length(HAllocator *mm__, const HParsedToken *tok, void *env)
{
return h_ch__m(mm__, 'b');
}
static void test_wrong_bit_length(void) {
HParseResult *r;
HParser *p;
p = h_right(h_ch('a'), h_ch('b'));
r = h_parse(p, (const uint8_t *)"ab", 2);
g_check_cmp_int64(r->bit_length, ==, 16);
h_parse_result_free(r);
p = h_bind(h_ch('a'), k_test_wrong_bit_length, NULL);
r = h_parse(p, (const uint8_t *)"ab", 2);
g_check_cmp_int64(r->bit_length, ==, 16);
h_parse_result_free(r);
}
static void test_lalr_charset_lhs(void) {
HParserBackend be = PB_LALR;
HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL),
h_in((uint8_t*)"AB",2), NULL));
// the above would abort because of an unhandled case in trying to resolve
// a conflict where an item's left-hand-side was an HCF_CHARSET.
// however, the compile should fail - the conflict cannot be resolved.
if(h_compile(p, be, NULL) == 0) {
g_test_message("LALR compile didn't detect ambiguous grammar");
// it says it compiled it - well, then it should parse it!
// (this helps us see what it thinks it should be doing.)
g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)");
g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))");
g_test_fail();
return;
}
}
static void test_cfg_many_seq(void) {
HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL));
g_check_parse_match(p, PB_LLk, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
g_check_parse_match(p, PB_GLR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
// these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty
// reshape on h_many.
}
static uint8_t test_charset_bits__buf[256];
static void *test_charset_bits__alloc(HAllocator *allocator, size_t size)
{
g_check_cmp_uint64(size, ==, 256/8);
assert(size <= 256);
return test_charset_bits__buf;
}
static void test_charset_bits(void) {
// charset would allocate 256 bytes instead of 256 bits (= 32 bytes)
HAllocator alloc = {
.alloc = test_charset_bits__alloc,
.realloc = NULL,
.free = NULL,
};
test_charset_bits__buf[32] = 0xAB;
HCharset cs = new_charset(&alloc);
for(size_t i=0; i<32; i++)
g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0);
g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB);
}
// Allocator for reproducing error 19.
// The bug is a result of uninitialized data being used, initially
// assumed to be zero. Unfortunately, this assumption is often true,
// so reproducing the bug reliably and in a minimal fashion requires
// making it false. Fortunately, glibc malloc has an M_PERTURB option
// for making that assumption false. Unfortunately, we want the test
// to reproduce the bug on systems that don't use glibc. Fortunately,
// the standard Hammer system allocator has a DEBUG__MEMFILL option to
// fill uninitialized memory with a fill byte. Unfortunately, you
// have to recompile Hammer with that symbol #defined in order to
// enable it. Fortunately, hammer allows you to supply your own
// allocator. So this is a simple non-#define-dependent allocator
// that writes 0xbabababa† over all the memory it allocates. (But not
// the memory it reallocs, because, as it happens, the uninitialized
// memory in this case didn't come from a realloc.)
//
// Honestly I think we ought to remove the #ifdefs from
// system_allocator and always compile both the DEBUG__MEMFILL version
// and the non-DEBUG__MEMFILL version, merely changing which one is
// system_allocator, which is after all a struct of three pointers
// that can even be modified at run-time.
//
// † Can you hear it, Mr. Toot?
static void* deadbeefing_malloc(HAllocator *allocator, size_t size) {
char *block = malloc(size);
if (block) memset(block, 0xba, size);
return block;
}
// Don't deadbeef on realloc because it isn't necessary to reproduce this bug.
static void* deadbeefing_realloc(HAllocator *allocator, void *uptr, size_t size) {
return realloc(uptr, size);
}
static void deadbeefing_free(HAllocator *allocator, void *uptr) {
free(uptr);
}
static HAllocator deadbeefing_allocator = {
.alloc = deadbeefing_malloc,
.realloc = deadbeefing_realloc,
.free = deadbeefing_free,
};
static void test_bug_19() {
void *args[] = {
h_ch_range__m(&deadbeefing_allocator, '0', '9'),
h_ch_range__m(&deadbeefing_allocator, 'A', 'Z'),
h_ch_range__m(&deadbeefing_allocator, 'a', 'z'),
NULL,
};
HParser *parser = h_choice__ma(&deadbeefing_allocator, args);
// In bug 19 ("GLR backend reaches unreachable code"), this call
// would fail because h_choice__ma allocated an HParser with h_new
// and didn't initialize its ->desugared field; consequently in
// the call chain h_compile ... h_lalr_compile ... h_desugar,
// h_desugar would find that ->desugared was already non-NULL (set
// to 0xbabababa in the above deadbeefing_malloc), and just return
// it, leading to a crash immediately afterwards in collect_nts.
// We don't actually care if the compile succeeds or fails, just
// that it doesn't crash.
h_compile(parser, PB_GLR, NULL);
// The same bug happened in h_sequence__ma.
h_compile(h_sequence__ma(&deadbeefing_allocator, args), PB_GLR, NULL);
// It also exists in h_permutation__ma, but it doesn't happen to
// manifest in the same way. I don't know how to write a test for
// the h_permutation__ma case.
g_assert_true(1);
}
void register_regression_tests(void) {
g_test_add_func("/core/regression/bug118", test_bug118);
g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
g_test_add_func("/core/regression/read_bits_48", test_read_bits_48);
g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end);
g_test_add_func("/core/regression/wrong_bit_length", test_wrong_bit_length);
g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
g_test_add_func("/core/regression/charset_bits", test_charset_bits);
g_test_add_func("/core/regression/bug19", test_bug_19);
}