Skip to content
Snippets Groups Projects
Forked from Hammer / hammer
151 commits behind the upstream repository.
  • xentrac's avatar
    Fix bug #19 in permutations and sequences too · 032430e3
    xentrac authored
    In `h_sequence__ma` the same bug occurs, and it manifests as a crash
    in the same way, so I've added a test for it.  In `h_permutation__ma`
    it evidently exists in the same form, but I haven't figured out how to
    reproduce it; in that case I added a fix to the implementation, but no
    test.
    032430e3
    History
t_regression.c 10.69 KiB
#include <glib.h>
#include <stdint.h>
#include "glue.h"
#include "hammer.h"
#include "test_suite.h"
#include "internal.h"

static void test_bug118(void) {
  // https://github.com/UpstandingHackers/hammer/issues/118
  // Adapted from https://gist.github.com/mrdomino/c6bc91a7cb3b9817edb5

  HParseResult* p;
  const uint8_t *input = (uint8_t*)"\x69\x5A\x6A\x7A\x8A\x9A";
 
#define MY_ENDIAN (BIT_BIG_ENDIAN | BYTE_LITTLE_ENDIAN)
    H_RULE(nibble, h_with_endianness(MY_ENDIAN, h_bits(4, false)));
    H_RULE(sample, h_with_endianness(MY_ENDIAN, h_bits(10, false)));
#undef MY_ENDIAN
 
    H_RULE(samples, h_sequence(h_repeat_n(sample, 3), h_ignore(h_bits(2, false)), NULL));
 
    H_RULE(header_ok, h_sequence(nibble, nibble, NULL));
    H_RULE(header_weird, h_sequence(nibble, nibble, nibble, NULL));
 
    H_RULE(parser_ok, h_sequence(header_ok, samples, NULL));
    H_RULE(parser_weird, h_sequence(header_weird, samples, NULL));
 
 
    p = h_parse(parser_weird, input, 6);
    g_check_cmp_int32(p->bit_length, ==, 44);
    h_parse_result_free(p);
    p = h_parse(parser_ok, input, 6);
    g_check_cmp_int32(p->bit_length, ==, 40);
    h_parse_result_free(p);
}

static void test_seq_index_path(void) {
  HArena *arena = h_new_arena(&system_allocator, 0);

  HParsedToken *seq = h_make_seqn(arena, 1);
  HParsedToken *seq2 = h_make_seqn(arena, 2);
  HParsedToken *tok1 = h_make_uint(arena, 41);
  HParsedToken *tok2 = h_make_uint(arena, 42);

  seq->seq->elements[0] = seq2;
  seq->seq->used = 1;
  seq2->seq->elements[0] = tok1;
  seq2->seq->elements[1] = tok2;
  seq2->seq->used = 2;

  g_check_cmp_int(h_seq_index_path(seq, 0, -1)->token_type, ==, TT_SEQUENCE);
  g_check_cmp_int(h_seq_index_path(seq, 0, 0, -1)->token_type, ==, TT_UINT);
  g_check_cmp_int64(h_seq_index_path(seq, 0, 0, -1)->uint, ==, 41);
  g_check_cmp_int64(h_seq_index_path(seq, 0, 1, -1)->uint, ==, 42);
}

#define MK_INPUT_STREAM(buf,len,endianness_)  \
  {					      \
      .input = (uint8_t*)buf,		      \
      .length = len,			      \
      .index = 0,			      \
      .bit_offset = 0,			      \
      .endianness = endianness_		      \
  }

static void test_read_bits_48(void) {
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 32, false), ==, 0x78563412);
    g_check_cmp_int64(h_read_bits(&is, 16, false), ==, 0xBC9A);
  }
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 31, false), ==, 0x78563412);
    g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x17934);
  }
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 33, false), ==, 0x78563412);
    g_check_cmp_int64(h_read_bits(&is, 17, false), ==, 0x5E4D);
  }
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 36, false), ==, 0xA78563412);
    g_check_cmp_int64(h_read_bits(&is, 12, false), ==, 0xBC9);
  }
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 40, false), ==, 0x9A78563412);
    g_check_cmp_int64(h_read_bits(&is, 8, false), ==, 0xBC);
  }
  {
    HInputStream is = MK_INPUT_STREAM("\x12\x34\x56\x78\x9A\xBC", 6, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN);
    g_check_cmp_int64(h_read_bits(&is, 48, false), ==, 0xBC9A78563412);
  }
}

static void test_llk_zero_end(void) {
    HParserBackend be = PB_LLk;
    HParser *z = h_ch('\x00');
    HParser *az = h_sequence(h_ch('a'), z, NULL);
    HParser *ze = h_sequence(z, h_end_p(), NULL);
    HParser *aze = h_sequence(h_ch('a'), z, h_end_p(), NULL);

    // some cases surrounding the bug
    g_check_parse_match (z, be, "\x00", 1, "u0");
    g_check_parse_failed(z, be, "", 0);
    g_check_parse_match (ze, be, "\x00", 1, "(u0)");
    g_check_parse_failed(ze, be, "\x00b", 2);
    g_check_parse_failed(ze, be, "", 0);
    g_check_parse_match (az, be, "a\x00", 2, "(u0x61 u0)");
    g_check_parse_match (aze, be, "a\x00", 2, "(u0x61 u0)");
    g_check_parse_failed(aze, be, "a\x00b", 3);

    // the following should not parse but did when the LL(k) backend failed to
    // check for the end of input, mistaking it for a zero character.
    g_check_parse_failed(az, be, "a", 1);
    g_check_parse_failed(aze, be, "a", 1);
}

HParser *k_test_wrong_bit_length(HAllocator *mm__, const HParsedToken *tok, void *env)
{
    return h_ch__m(mm__, 'b');
}

static void test_wrong_bit_length(void) {
    HParseResult *r;
    HParser *p;

    p = h_right(h_ch('a'), h_ch('b'));
    r = h_parse(p, (const uint8_t *)"ab", 2);
    g_check_cmp_int64(r->bit_length, ==, 16);
    h_parse_result_free(r);

    p = h_bind(h_ch('a'), k_test_wrong_bit_length, NULL);
    r = h_parse(p, (const uint8_t *)"ab", 2);
    g_check_cmp_int64(r->bit_length, ==, 16);
    h_parse_result_free(r);
}
static void test_lalr_charset_lhs(void) {
    HParserBackend be = PB_LALR;

    HParser *p = h_many(h_choice(h_sequence(h_ch('A'), h_ch('B'), NULL),
                                 h_in((uint8_t*)"AB",2), NULL));

    // the above would abort because of an unhandled case in trying to resolve
    // a conflict where an item's left-hand-side was an HCF_CHARSET.
    // however, the compile should fail - the conflict cannot be resolved.

    if(h_compile(p, be, NULL) == 0) {
        g_test_message("LALR compile didn't detect ambiguous grammar");

        // it says it compiled it - well, then it should parse it!
        // (this helps us see what it thinks it should be doing.)
        g_check_parse_match(p, be, "AA",2, "(u0x41 u0x41)");
        g_check_parse_match(p, be, "AB",2, "((u0x41 u0x42))");

        g_test_fail();
        return;
    }
}

static void test_cfg_many_seq(void) {
    HParser *p = h_many(h_sequence(h_ch('A'), h_ch('B'), NULL));

    g_check_parse_match(p, PB_LLk,  "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    g_check_parse_match(p, PB_LALR, "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    g_check_parse_match(p, PB_GLR,  "ABAB",4, "((u0x41 u0x42) (u0x41 u0x42))");
    // these would instead parse as (u0x41 u0x42 u0x41 u0x42) due to a faulty
    // reshape on h_many.
}

static uint8_t test_charset_bits__buf[256];
static void *test_charset_bits__alloc(HAllocator *allocator, size_t size)
{
    g_check_cmp_uint64(size, ==, 256/8);
    assert(size <= 256);
    return test_charset_bits__buf;
}
static void test_charset_bits(void) {
    // charset would allocate 256 bytes instead of 256 bits (= 32 bytes)

    HAllocator alloc = {
        .alloc = test_charset_bits__alloc,
        .realloc = NULL,
        .free = NULL,
    };
    test_charset_bits__buf[32] = 0xAB;
    HCharset cs = new_charset(&alloc);
    for(size_t i=0; i<32; i++)
        g_check_cmp_uint32(test_charset_bits__buf[i], ==, 0);
    g_check_cmp_uint32(test_charset_bits__buf[32], ==, 0xAB);
}


// Allocator for reproducing error 19.

// The bug is a result of uninitialized data being used, initially
// assumed to be zero.  Unfortunately, this assumption is often true,
// so reproducing the bug reliably and in a minimal fashion requires
// making it false.  Fortunately, glibc malloc has an M_PERTURB option
// for making that assumption false.  Unfortunately, we want the test
// to reproduce the bug on systems that don't use glibc.  Fortunately,
// the standard Hammer system allocator has a DEBUG__MEMFILL option to
// fill uninitialized memory with a fill byte.  Unfortunately, you
// have to recompile Hammer with that symbol #defined in order to
// enable it.  Fortunately, hammer allows you to supply your own
// allocator.  So this is a simple non-#define-dependent allocator
// that writes 0xbabababa† over all the memory it allocates.  (But not
// the memory it reallocs, because, as it happens, the uninitialized
// memory in this case didn't come from a realloc.)
//
// Honestly I think we ought to remove the #ifdefs from
// system_allocator and always compile both the DEBUG__MEMFILL version
// and the non-DEBUG__MEMFILL version, merely changing which one is
// system_allocator, which is after all a struct of three pointers
// that can even be modified at run-time.
//
// † Can you hear it, Mr. Toot?

static void* deadbeefing_malloc(HAllocator *allocator, size_t size) {
    char *block = malloc(size);
    if (block) memset(block, 0xba, size);
    return block;
}

// Don't deadbeef on realloc because it isn't necessary to reproduce this bug.
static void* deadbeefing_realloc(HAllocator *allocator, void *uptr, size_t size) {
    return realloc(uptr, size);
}

static void deadbeefing_free(HAllocator *allocator, void *uptr) {
    free(uptr);
}

static HAllocator deadbeefing_allocator = {
    .alloc = deadbeefing_malloc,
    .realloc = deadbeefing_realloc,
    .free = deadbeefing_free,
};

static void test_bug_19() {
    void *args[] = {
        h_ch_range__m(&deadbeefing_allocator, '0', '9'),
        h_ch_range__m(&deadbeefing_allocator, 'A', 'Z'),
        h_ch_range__m(&deadbeefing_allocator, 'a', 'z'),
        NULL,
    };

    HParser *parser = h_choice__ma(&deadbeefing_allocator, args);

    // In bug 19 ("GLR backend reaches unreachable code"), this call
    // would fail because h_choice__ma allocated an HParser with h_new
    // and didn't initialize its ->desugared field; consequently in
    // the call chain h_compile ... h_lalr_compile ... h_desugar,
    // h_desugar would find that ->desugared was already non-NULL (set
    // to 0xbabababa in the above deadbeefing_malloc), and just return
    // it, leading to a crash immediately afterwards in collect_nts.
    // We don't actually care if the compile succeeds or fails, just
    // that it doesn't crash.
    h_compile(parser, PB_GLR, NULL);

    // The same bug happened in h_sequence__ma.
    h_compile(h_sequence__ma(&deadbeefing_allocator, args), PB_GLR, NULL);

    // It also exists in h_permutation__ma, but it doesn't happen to
    // manifest in the same way.  I don't know how to write a test for
    // the h_permutation__ma case.
    g_assert_true(1);
}

void register_regression_tests(void) {
  g_test_add_func("/core/regression/bug118", test_bug118);
  g_test_add_func("/core/regression/seq_index_path", test_seq_index_path);
  g_test_add_func("/core/regression/read_bits_48", test_read_bits_48);
  g_test_add_func("/core/regression/llk_zero_end", test_llk_zero_end);
  g_test_add_func("/core/regression/wrong_bit_length", test_wrong_bit_length);
  g_test_add_func("/core/regression/lalr_charset_lhs", test_lalr_charset_lhs);
  g_test_add_func("/core/regression/cfg_many_seq", test_cfg_many_seq);
  g_test_add_func("/core/regression/charset_bits", test_charset_bits);
  g_test_add_func("/core/regression/bug19", test_bug_19);
}