Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Showing with 2089 additions and 90 deletions
.TH HAMMER 1 2014-04-23 Hammer
.SH NAME
Hammer \- a bit oriented parsing library
.SH SYNOPSIS
#include <hammer\/hammer.h>
.SH DESCRIPTION
.B Hammer
is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer currently builds under Linux, OS X, and Windows.
.SH NOTES
Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
Thread-safe, reentrant
Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
Parsing backends:
Packrat parsing
LL(k)
GLR
LALR
Regular expressions
Language bindings:
C++
Java (not currently building; give us a few days)
Python
Ruby
Perl
Go
PHP
.NET
.SH EXAMPLE
.nf
1 #include <hammer/hammer.h>
2 #include <stdio.h>
3
4 int main(int argc, char *argv[]) {
5 uint8_t input[1024];
6 size_t inputsize;
7
8 HParser *hello_parser = h_token("Hello World", 11);
9
10 inputsize = fread(input, 1, sizeof(input), stdin);
11
12 HParseResult *result = h_parse(hello_parser, input, inputsize);
13 if(result) {
14 printf("yay!\n");
15 } else {
16 printf("boo!\n");
17 }
18 }
.fi
'\" t
.\" Title: hammer
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets v1.76.1 <http://docbook.sf.net/>
.\" Date: 29 April 2014
.\" Manual: \ \&
.\" Source: \ \& 8.6.9
.\" Language: English
.\"
.TH "HAMMER" "3" "29 April 2014" "\ \& 8\&.6\&.9" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.\" http://bugs.debian.org/507673
.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\" -----------------------------------------------------------------
.\" * set default formatting
.\" -----------------------------------------------------------------
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.\" -----------------------------------------------------------------
.\" * MAIN CONTENT STARTS HERE *
.\" -----------------------------------------------------------------
.SH "NAME"
Hammer \- a bit oriented parsing library
.SH "SYNOPSIS"
.sp
.B #include <hammer/hammer.h>
.SH "DESCRIPTION"
.sp
.B Hammer(3)
is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer currently builds under Linux, OS X, and Windows.
.SH "NOTES"
Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
Thread-safe, reentrant
Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
Parsing backends:
Packrat parsing
LL(k)
GLR
LALR
Regular expressions
Language bindings:
C++
Java (not currently building; give us a few days)
Python
Ruby
Perl
Go
PHP
.NET
.SH "EXAMPLE"
.nf
1 #include <hammer/hammer.h>
2 #include <stdio.h>
3
4 int main(int argc, char *argv[]) {
5 uint8_t input[1024];
6 size_t inputsize;
7
8 HParser *hello_parser = h_token("Hello World", 11);
9
10 inputsize = fread(input, 1, sizeof(input), stdin);
11
12 HParseResult *result = h_parse(hello_parser, input, inputsize);
13 if(result) {
14 printf("yay!\\n");
15 } else {
16 printf("boo!\\n");
17 }
18 h_parse_result_free(result);
19 return 0 == result;
20 }
.fi
.SH "AUTHOR"
.sp
Hammer was originally written by Meredith Patterson and TQ Hirsch\&. Many people have contributed to it\&.
.SH "RESOURCES"
.sp
github: https://github\&.com/upstandinghackers/hammer/
.SH "COPYING"
.sp
Free use of this software is granted under the terms of the GNU General Public License (GPL)\& v2.
from __future__ import absolute_import, division, print_function
Import('env')
example = env.Clone()
example.Append(LIBS="hammer", LIBPATH="../src")
if 'GPROF' in env and env['GPROF'] == 1:
hammer_lib_name="hammer_pg"
else:
hammer_lib_name="hammer"
example.Append(LIBS=hammer_lib_name, LIBPATH="../src")
dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
ttuser = example.Program('ttuser', 'ttuser.c')
base64 = example.Program('base64', 'base64.c')
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2])
\ No newline at end of file
ties = example.Program('ties', ['ties.c', 'grammar.c'])
env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
......@@ -45,7 +45,7 @@ int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParseResult *result;
HParseResult *result;
init_parser();
......@@ -57,6 +57,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
#!/usr/bin/env python2
# Example parser: Base64, syntax only.
#
# Demonstrates how to construct a Hammer parser that recognizes valid Base64
# sequences.
#
# Note that no semantic evaluation of the sequence is performed, i.e. the
# byte sequence being represented is not returned, or determined. See
# base64_sem1.py and base64_sem2.py for examples how to attach appropriate
# semantic actions to the grammar.
from __future__ import absolute_import, division, print_function
import sys
import hammer as h
def init_parser():
# CORE
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
# AUX.
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.ch(b'=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_(b'AEIMQUYcgkosw048')
bsfdig_2bit = h.in_(b'AQgw')
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
base64 = h.sequence(h.many(base64_3),
h.optional(h.choice(base64_2, base64_1)))
return h.sequence(h.whitespace(base64), h.whitespace(h.end_p()))
def main():
document = init_parser()
s = sys.stdin.read()
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = document.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
print(result)
if __name__ == '__main__':
import sys
main()
......@@ -29,9 +29,9 @@ HParsedToken *act_bsfdig(const HParseResult *p, void* user_data)
uint8_t c = H_CAST_UINT(p->ast);
if(c >= 0x40 && c <= 0x5A) // A-Z
if(c >= 0x41 && c <= 0x5A) // A-Z
res->uint = c - 0x41;
else if(c >= 0x60 && c <= 0x7A) // a-z
else if(c >= 0x61 && c <= 0x7A) // a-z
res->uint = c - 0x61 + 26;
else if(c >= 0x30 && c <= 0x39) // 0-9
res->uint = c - 0x30 + 52;
......@@ -149,12 +149,13 @@ HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -166,6 +167,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
#!/usr/bin/env python2
# Example parser: Base64, with fine-grained semantic actions
#
# Demonstrates how to attach semantic actions to grammar rules and piece by
# piece transform the parse tree into the desired semantic representation,
# in this case a sequence of 8-bit values.
#
# Those rules using h.action get an attached action, which must be declared
# (as a function).
#
# This variant of the example uses fine-grained semantic actions that
# transform the parse tree in small steps in a bottom-up fashion. Compare
# base64_sem2.py for an alternative approach using a single top-level action.
from __future__ import absolute_import, division, print_function
import functools
import sys
import hammer as h
# Semantic actions for the grammar below, each corresponds to an "ARULE".
# They must be named act_<rulename>.
def act_bsfdig(p, user_data=None):
# FIXME See the note in init_parser()
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
elif 0x61 <= c <= 0x7A: # a-z
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == b'+':
return 62
elif c == b'/':
return 63
else:
raise ValueError
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
def act_index0(p, user_data=None):
return p[0]
def act_ignore(p, user_data=None):
return None
act_bsfdig_4bit = act_bsfdig
act_bsfdig_2bit = act_bsfdig
act_equals = act_ignore
act_ws = act_ignore
act_document = act_index0
def act_base64_n(n, p, user_data=None):
"""General-form action to turn a block of base64 digits into bytes.
"""
res = [0]*n
x = 0
bits = 0
for i in range(0, n+1):
x <<= 6
x |= p[i] or 0
bits += 6
x >>= bits % 8 # align, i.e. cut off extra bits
for i in range(n):
item = x & 0xFF
res[n-1-i] = item # output the last byte and
x >>= 8 # discard it
return tuple(res)
act_base64_3 = functools.partial(act_base64_n, 3)
act_base64_2 = functools.partial(act_base64_n, 2)
act_base64_1 = functools.partial(act_base64_n, 1)
def act_base64(p, user_data=None):
assert isinstance(p, tuple)
assert len(p) == 2
assert isinstance(p[0], tuple)
res = []
# concatenate base64_3 blocks
for elem in p[0]:
res.extend(elem)
# append one trailing base64_2 or _1 block
tok = p[1]
if isinstance(tok, tuple):
res.extend(tok)
return tuple(res)
def init_parser():
"""Return a parser with the grammar to be recognized.
"""
# CORE
# This is a direct translation of the C example. In C the literal 0x30
# is interchangable with the char literal '0' (note the single quotes).
# This is not the case in Python.
# TODO In the interests of being more Pythonic settle on either string
# literals, or integers
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.action(h.ch(b'='), act_equals)
bsfdig = h.action(h.choice(alpha, digit, plus, slash), act_bsfdig)
bsfdig_4bit = h.action(h.in_(b"AEIMQUYcgkosw048"), act_bsfdig_4bit)
bsfdig_2bit = h.action(h.in_(b"AQgw"), act_bsfdig_2bit)
base64_3 = h.action(h.repeat_n(bsfdig, 4), act_base64_3)
base64_2 = h.action(h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals),
act_base64_2)
base64_1 = h.action(h.sequence(bsfdig, bsfdig_2bit, equals, equals),
act_base64_1)
base64 = h.action(h.sequence(h.many(base64_3),
h.optional(h.choice(base64_2,
base64_1))),
act_base64)
# TODO This is not quite the same as the C example, with uses act_ignore.
# But I can't get hammer to filter any value returned by act_ignore.
ws = h.ignore(h.many(space))
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
act_document)
# BUG sometimes inputs that should just don't parse.
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
# Using less actions seemed to make it less likely.
return document
def main():
parser = init_parser()
s = sys.stdin.read()
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = parser.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
print(result)
if __name__ == '__main__':
main()
......@@ -31,9 +31,9 @@ uint8_t bsfdig_value(const HParsedToken *p)
if(p && p->token_type == TT_UINT) {
uint8_t c = p->uint;
if(c >= 0x40 && c <= 0x5A) // A-Z
if(c >= 0x41 && c <= 0x5A) // A-Z
value = c - 0x41;
else if(c >= 0x60 && c <= 0x7A) // a-z
else if(c >= 0x61 && c <= 0x7A) // a-z
value = c - 0x61 + 26;
else if(c >= 0x30 && c <= 0x39) // 0-9
value = c - 0x30 + 52;
......@@ -153,12 +153,13 @@ const HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -170,6 +171,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
#!/usr/bin/env python2
# Example parser: Base64, with fine-grained semantic actions
#
# Demonstrates how to attach semantic actions to a grammar and transform the
# parse tree into the desired semantic representation, in this case a sequence
# of 8-bit values.
#
# Those rules using h.action get an attached action, which must be declared
# (as a function).
#
# This variant of the example uses coarse-grained semantic actions,
# transforming the entire parse tree in one big step. Compare base64_sem1.py
# for an alternative approach using a fine-grained piece-by-piece
# transformation.
from __future__ import absolute_import, division, print_function
import functools
import sys
import hammer as h
# Semantic actions for the grammar below, each corresponds to an "ARULE".
# They must be named act_<rulename>.
def bsfdig_value(p):
"""Return the numeric value of a parsed base64 digit.
"""
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if c:
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
elif 0x61 <= c <= 0x7A: # a-z
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == b'+':
return 62
elif c == b'/':
return 63
return 0
def act_base64(p, user_data=None):
assert isinstance(p, tuple)
assert len(p) == 2
assert isinstance(p[0], tuple)
# grab b64_3 block sequence
# grab and analyze b64 end block (_2 or _1)
b64_3 = p[0]
b64_2 = p[1]
b64_1 = p[1]
if not isinstance(b64_2, tuple):
b64_1 = b64_2 = None
elif b64_2[2] == '=':
b64_2 = None
else:
b64_1 = None
# allocate result sequence
res = []
# concatenate base64_3 blocks
for digits in b64_3:
assert isinstance(digits, tuple)
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
x <<= 6; x |= bsfdig_value(digits[2])
x <<= 6; x |= bsfdig_value(digits[3])
res.append((x >> 16) & 0xFF)
res.append((x >> 8) & 0xFF)
res.append(x & 0xFF)
# append one trailing base64_2 or _1 block
if b64_2:
digits = b64_2
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
x <<= 6; x |= bsfdig_value(digits[2])
res.append((x >> 10) & 0xFF)
res.append((x >> 2) & 0xFF)
elif b64_1:
digits = b64_1
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
res.append((x >> 4) & 0xFF)
return tuple(res)
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
def act_index0(p, user_data=None):
return p[0]
def act_ignore(p, user_data=None):
return None
act_ws = act_ignore
act_document = act_index0
def init_parser():
"""Set up the parser with the grammar to be recognized.
"""
# CORE
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.ch(b'=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_(b"AEIMQUYcgkosw048")
bsfdig_2bit = h.in_(b"AQgw")
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
base64 = h.action(h.sequence(h.many(base64_3),
h.optional(h.choice(base64_2,
base64_1))),
act_base64)
# TODO This is not quite the same as the C example, with uses act_ignore.
# But I can't get hammer to filter any value returned by act_ignore.
ws = h.ignore(h.many(space))
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
act_document)
# BUG sometimes inputs that should just don't parse.
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
# Using less actions seemed to make it less likely.
return document
def main():
parser = init_parser()
s = sys.stdin.read()
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = parser.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
print(result)
if __name__ == '__main__':
main()
// Generates a system of equations for generating functions from a grammar.
//
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
//
// If a desugared parser has user_data set, the generating function systems will try
// to interpret it as a string:
//
// If this string for an h_ch starts with the character 0, then that character
// will have weight 0 in the generating function.
//
// Use the remaining string to set the preferred name of that parser in the
// generating function.
//
#include <inttypes.h>
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
#include "grammar.h"
#include <stdio.h>
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
// if user_data exists and is printable:
if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) {
char* user_str = (char*)(nt->user_data);
if(*user_str != '\0') {
// user_data is a non-empty string
return user_str;
} else {
return user_str+1;
}
}
static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
// find nt's number in g
size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
// NB the start symbol (number 0) is always "A".
int i;
for(i=14; i>=0 && (n>0 || i==14); i--) {
buf[i] = 'A' + n%26;
n = n/26; // shift one digit
}
return buf+i+1;
}
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
const HCFGrammar *g, const HCFSequence *seq) {
// tally up numbers of choices, and lengths of emitted strings.
// Immediately emit any nonterminals encountered.
HCFChoice** x = seq->items;
fprintf(file, "1");
if (*x == NULL) {
// empty sequence
// GF is 1
return;
} else {
char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
HCharset cs;
unsigned int i, cscount=0;
for(; *x; x++) {
switch((*x)->type) {
case HCF_CHAR:
if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
(*length)++;
}
break;
case HCF_END:
break;
case HCF_CHARSET:
cs = (*x)->charset;
for(i=0; i<256; i++) {
if (charset_isset(cs, i)) {
cscount++;
}
}
*count *= cscount;
break;
default: // HCF_CHOICE, non-terminal symbol
fprintf(file, "*%s", nonterminal_name(g, *x));
break;
}
}
}
}
// For each nt in g->nts
// For each choice in nt->key->seq
// For all elements in sequence
// Accumulate counts
// Accumulate string lengths
// Emit count*t^length
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
if (g->nts->used < 1) {
return;
}
// determine maximum string length of symbol names
int len;
size_t s;
for(len=1, s=26; s < g->nts->used; len++, s*=26);
// emit the SageMath ring init string
// iterate over g->nts, output symbols
size_t i;
HHashTableEntry *hte;
fprintf(file, "ring.<t");
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
const HCFChoice *nt = hte->key;
fprintf(file, ",");
fprintf(file, "%s", nonterminal_name(g, nt));
}
}
fprintf(file, "> = QQ[]\n");
// iterate over g->nts
// emit a Sage ideal definition
int j=0;
fprintf(file, "ID = ring.ideal(");
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
if(j>0) {
fprintf(file, ",");
}
j++;
const HCFChoice *nt = hte->key;
const char *ntn = nonterminal_name(g, nt);
if(*ntn == 0) {
continue;
}
fprintf(file, "%s - (", ntn);
for(HCFSequence **seq = nt->seq; *seq; seq++) {
if (seq != nt->seq) {
fprintf(file, " + ");
}
uint32_t count=1, length=0;
readsequence(file, &count, &length, g, *seq);
if(count == 1) {
if(length == 1) {
fprintf(file, "*t");
}
if(length > 1) {
fprintf(file, "*t^%d", length);
}
} else if(count > 1) {
if(length == 0) {
fprintf(file, "*%d", count);
}
if(length == 1) {
fprintf(file, "*%d*t", count);
}
if (length > 1) {
fprintf(file, "*%d*t^%d", count, length);
}
}
}
fprintf(file, ")");
}
}
fprintf(file, ")\n");
}
// Generates a system of equations for generating functions from a grammar.
//
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
//
// Currently does absolutely no elegance, no caching of information, but rather
// just prints the generating functions to a provided FILE*.
//
// If a desugared parser has user_data set, the generating function systems will try
// to interpret it as a string:
//
// If this string for an h_ch starts with the character 0, then that character
// will have weight 0 in the generating function.
//
// Use the remaining string to set the preferred name of that parser in the
// generating function.
//
#ifndef HAMMER_GRAMMAR__H
#define HAMMER_GRAMMAR__H
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
// Filched from cfgrammar.c this function extracts the name from user_data if it
// is set; otherwise assigns a name automatically from its position in some
// ordering of non-terminals.
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt);
// This function prints out the monomial generated by a single HCFSequence
// It returns the resulting exponent for t in length and the number of alternatives
// accumulated in length. The monomial is (mostly) printed out to the provided FILE*,
// the caller is responsible for adding a scalar and a power of t to the printout.
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
const HCFGrammar *g, const HCFSequence *seq);
// This function walks through a grammar and generates an equation for each
// production rule. The results are printed out to the provided FILE*.
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g);
#endif
// Intention: read in a parser, generate the system of equations for its
// generating functions
//
#include <inttypes.h>
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
#include "grammar.h"
#include <stdio.h>
HAllocator *mm__;
HParser* cfExample() {
HParser *n = h_ch('n');
HParser *E = h_indirect();
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
h_bind_indirect(E, E_);
return E;
}
// The tie knot parsers below would work better if we could patch the gen.function
// code above to allow user specification of non-default byte string "lengths",
// so that U symbols don't contribute with factors of t to the gen. function.
//
// Alternatively: use multivariate generating functions to spit out different
// variables for different terminals. This gets really messy with bigger alphabets.
HParser* finkmao() {
HParser *L = h_ch('L');
HParser *R = h_ch('R');
HParser *C = h_ch('C');
HParser *U = h_ch('U');
HParser *Lnext = h_indirect();
HParser *Rnext = h_indirect();
HParser *Cnext = h_indirect();
HParser *L_ = h_choice(h_sequence(R, Rnext, NULL),
h_sequence(C, Cnext, NULL),
h_sequence(R, C, U, NULL), NULL);
HParser *R_ = h_choice(h_sequence(L, Lnext, NULL),
h_sequence(C, Cnext, NULL),
h_sequence(L, C, U, NULL), NULL);
HParser *C_ = h_choice(h_sequence(R, Rnext, NULL),
h_sequence(L, Lnext, NULL), NULL);
h_bind_indirect(Lnext, L_);
h_bind_indirect(Rnext, R_);
h_bind_indirect(Cnext, C_);
HParser *tie = h_sequence(L, Lnext, NULL);
h_desugar_augmented(mm__, tie);
L->desugared->user_data = "L";
R->desugared->user_data = "R";
C->desugared->user_data = "C";
Lnext->desugared->user_data = "Ln";
Rnext->desugared->user_data = "Rn";
Cnext->desugared->user_data = "Cn";
tie->desugared->user_data = "tie";
U->desugared->user_data = "0U";
return tie;
}
HParser* finkmaoTW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(),
NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tuck = h_choice(h_sequence(T, T, U, NULL),
h_sequence(W, W, U, NULL),
NULL);
HParser *pairstar = h_indirect();
HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(pairstar, pstar_);
HParser* tie = h_sequence(prefix, pairstar, tuck, NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
pstar_->desugared->user_data = "pairstar";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depth1TW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tuck = h_choice(h_sequence(T, T, U, NULL),
h_sequence(W, W, U, NULL),
NULL);
HParser *tuckpairstar = h_indirect();
HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL),
h_sequence(tuck, tuckpairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tuckpairstar, tpstar_);
HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
tpstar_->desugared->user_data = "tuckpairstar";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depth1() {
HParser *L = h_ch('L');
HParser *R = h_ch('R');
HParser *C = h_ch('C');
HParser *U = h_ch('U');
HParser *lastR = h_indirect();
HParser *lastL = h_indirect();
HParser *lastC = h_indirect();
HParser *R_ = h_choice(h_sequence(L, R, lastR, NULL),
h_sequence(C, R, lastR, NULL),
h_sequence(L, C, lastC, NULL),
h_sequence(L, C, U, lastC, NULL),
h_sequence(L, C, U, NULL),
h_sequence(C, L, lastL, NULL),
h_sequence(C, L, U, lastL, NULL),
h_sequence(C, L, U, NULL),
NULL);
HParser *L_ = h_choice(h_sequence(R, L, lastR, NULL),
h_sequence(C, L, lastR, NULL),
h_sequence(R, C, lastC, NULL),
h_sequence(R, C, U, lastC, NULL),
h_sequence(R, C, U, NULL),
h_sequence(C, R, lastR, NULL),
h_sequence(C, R, U, lastR, NULL),
h_sequence(C, R, U, NULL),
NULL);
HParser *C_ = h_choice(h_sequence(L, C, lastR, NULL),
h_sequence(R, C, lastR, NULL),
h_sequence(L, R, lastR, NULL),
h_sequence(L, R, U, lastR, NULL),
h_sequence(L, R, U, NULL),
h_sequence(R, L, lastL, NULL),
h_sequence(R, L, U, lastL, NULL),
h_sequence(R, L, U, NULL),
NULL);
h_bind_indirect(lastR, R_);
h_bind_indirect(lastL, L_);
h_bind_indirect(lastC, C_);
HParser* tie = h_choice(h_sequence(L, lastL, NULL),
h_sequence(R, lastR, NULL),
h_sequence(C, lastC, NULL),
NULL);
h_desugar_augmented(mm__, tie);
L->desugared->user_data = "L";
R->desugared->user_data = "R";
C->desugared->user_data = "C";
U->desugared->user_data = "0U";
lastL ->desugared->user_data = "Ln";
lastR->desugared->user_data = "Rn";
lastC->desugared->user_data = "Cn";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depthNTW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tstart = h_indirect();
HParser *tw0 = h_indirect();
HParser *tw1 = h_indirect();
HParser *tw2 = h_indirect();
HParser *wstart = h_indirect();
HParser *wt0 = h_indirect();
HParser *wt1 = h_indirect();
HParser *wt2 = h_indirect();
HParser *T_ = h_choice(h_sequence(T, T, tw2, U, NULL),
h_sequence(T, W, tw0, U, NULL),
NULL);
HParser *tw0_ = h_choice(h_sequence(T, T, tw2, U, NULL),
h_sequence(T, W, tw0, U, NULL),
h_sequence(W, T, tw0, U, NULL),
h_sequence(W, W, tw1, U, NULL),
h_sequence(tstart, tw2, U, NULL),
h_sequence(wstart, tw1, U, NULL),
NULL);
HParser *tw1_ = h_choice(h_sequence(T, T, tw0, U, NULL),
h_sequence(T, W, tw1, U, NULL),
h_sequence(W, T, tw1, U, NULL),
h_sequence(W, W, tw2, U, NULL),
h_sequence(tstart, tw0, U, NULL),
h_sequence(wstart, tw2, U, NULL),
NULL);
HParser *tw2_ = h_choice(h_sequence(T, T, tw1, U, NULL),
h_sequence(T, W, tw2, U, NULL),
h_sequence(W, T, tw2, U, NULL),
h_sequence(W, W, tw0, U, NULL),
h_sequence(tstart, tw1, U, NULL),
h_sequence(wstart, tw0, U, NULL),
h_epsilon_p(),
NULL);
HParser *W_ = h_choice(h_sequence(W, W, wt2, U, NULL),
h_sequence(W, T, wt0, U, NULL),
NULL);
HParser *wt0_ = h_choice(h_sequence(W, W, wt2, U, NULL),
h_sequence(W, T, wt0, U, NULL),
h_sequence(T, W, wt0, U, NULL),
h_sequence(T, T, wt1, U, NULL),
h_sequence(wstart, wt2, U, NULL),
h_sequence(tstart, wt1, U, NULL),
NULL);
HParser *wt1_ = h_choice(h_sequence(W, W, wt0, U, NULL),
h_sequence(W, T, wt1, U, NULL),
h_sequence(T, W, wt1, U, NULL),
h_sequence(T, T, wt2, U, NULL),
h_sequence(wstart, wt0, U, NULL),
h_sequence(tstart, wt2, U, NULL),
NULL);
HParser *wt2_ = h_choice(h_sequence(W, W, wt1, U, NULL),
h_sequence(W, T, wt2, U, NULL),
h_sequence(T, W, wt2, U, NULL),
h_sequence(T, T, wt0, U, NULL),
h_sequence(wstart, wt1, U, NULL),
h_sequence(tstart, wt0, U, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tstart, T_);
h_bind_indirect(tw0, tw0_);
h_bind_indirect(tw1, tw1_);
h_bind_indirect(tw2, tw2_);
h_bind_indirect(wstart, W_);
h_bind_indirect(wt0, wt0_);
h_bind_indirect(wt1, wt1_);
h_bind_indirect(wt2, wt2_);
HParser *tuck = h_choice(tstart, wstart, NULL);
HParser *tuckpairstar = h_indirect();
HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL),
h_sequence(tuck, tuckpairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tuckpairstar, tpstar_);
HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
tpstar_->desugared->user_data = "tuckpairstar";
tie->desugared->user_data = "tie";
return tie;
}
int main(int argc, char **argv) {
mm__ = &system_allocator;
HParser *p = finkmao();
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if (g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;
}
printf("\n==== Generating functions ====\n");
h_pprint_gfeqns(stdout, g);
printf("\n==== Grammar ====\n");
h_pprint_grammar(stdout, g, 0);
}
/*
* Example parser that demonstrates the use of user-defined token types.
*
* Note the custom printer function that hooks into h_pprint().
*/
#include "../src/hammer.h"
#include "../src/glue.h"
/*
* custom tokens
*/
HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC;
void
pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
/*
* Pretty-printer rules:
*
* - Output 'indent' spaces after every newline you produce.
* - Do not add indent on the first line of output.
* - Do not add a trailing newline.
* - Indent sub-objects by adding 'delta' to 'indent'.
*/
if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
fprintf(stream, "\n%*s", indent, "");
h_pprint(stream, tok->user, indent, delta);
}
/* XXX define umamb_sub as well */
void
init(void)
{
TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
TT_OBJ = h_allocate_token_new("object", NULL, pprint);
TT_ADJ = h_allocate_token_new("adjective", NULL, pprint);
TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
}
/*
* semantic actions
*
* Normally these would be more interesting, but for this example, we just wrap
* our tokens in their intended types.
*/
HParsedToken *act_subj(const HParseResult *p, void *u) {
return H_MAKE(SUBJ, (void *)p->ast);
}
HParsedToken *act_pred(const HParseResult *p, void *u) {
return H_MAKE(PRED, (void *)p->ast);
}
HParsedToken *act_obj(const HParseResult *p, void *u) {
return H_MAKE(OBJ, (void *)p->ast);
}
HParsedToken *act_adj(const HParseResult *p, void *u) {
return H_MAKE(ADJ, (void *)p->ast);
}
HParsedToken *act_advc(const HParseResult *p, void *u) {
return H_MAKE(ADVC, (void *)p->ast);
}
/*
* grammar
*/
HParser *
build_parser(void)
{
/* words */
#define W(X) h_whitespace(h_literal((const uint8_t *)(#X)))
H_RULE(art, h_choice(W(a), W(the), NULL));
H_RULE(noun, h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
W(bear), W(fence), W(tree), W(car), W(cow), NULL));
H_RULE(verb, h_choice(W(eats), W(jumps), W(falls), NULL));
H_ARULE(adj, h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
W(magenta), W(yellow), W(black), W(brown), NULL));
H_RULE(adverb, h_choice(W(with), W(over), W(after), NULL));
#undef W
/* phrases */
H_RULE(nphrase, h_sequence(art, h_many(adj), noun, NULL));
/* sentence structure */
H_ARULE(subj, nphrase);
H_ARULE(pred, verb);
H_ARULE(obj, nphrase);
H_ARULE(advc, h_sequence(adverb, nphrase, NULL));
H_RULE(sentnc, h_sequence(subj, pred,
h_optional(obj), h_optional(advc), NULL));
return sentnc;
}
/*
* main routine: read, parse, print
*
* input e.g.:
* "the quick brown fox jumps the fence with a cyan lion"
*/
#include <stdio.h>
#include <inttypes.h>
int
main(int argc, char **argv)
{
uint8_t input[1024];
size_t sz;
const HParser *parser;
const HParseResult *result;
init();
parser = build_parser();
sz = fread(input, 1, sizeof(input), stdin);
if (!feof(stdin)) {
fprintf(stderr, "too much input\n");
return 1;
}
result = h_parse(parser, input, sz);
if (!result) {
fprintf(stderr, "no parse\n");
return 1;
}
h_pprintln(stdout, result->ast);
fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
result->bit_length / 8, sz);
return 0;
}
......@@ -19,7 +19,7 @@
token {
parser token("95\xa2");
test "95\xa2" --> "95\xa2";
test "95\xa2" --> fail;
test "95\xa3" --> fail;
}
ch {
......@@ -87,7 +87,7 @@ uint8 {
}
int_range {
parser int_range(uint8(), 0x3, 0x10);
parser int_range(uint8(), 0x3, 0xa);
test <05> --> u0x05;
test <0b> --> fail;
}
......@@ -215,7 +215,7 @@ many1 {
test "daabbabadef" --> fail;
}
repeat-n {
repeat_n {
parser repeat_n(choice(ch('a'),ch('b')),0x2);
test "adef" --> fail;
test "abdef" --> ['a','b'];
......@@ -270,24 +270,27 @@ and {
}
not {
parser sequence(ch('a'), choice(token('+'), token("++")), ch('b'));
parser sequence(ch('a'), choice(token("+"), token("++")), ch('b'));
test "a+b" --> ['a',"+",'b'];
test "a++b" --> fail;
parser sequence(ch('a'), choice(sequence(token('+'), not(ch('+'))),
parser sequence(ch('a'), choice(sequence(token("+"), not(ch('+'))),
token("++")),
ch('b'));
test "a+b" --> ['a', ["+"], 'b'];
test "a++b" --> ['a', "++", 'b'];
}
leftrec {
subparser $lr = choice(sequence($lr, ch('a')), epsilon_p());
parser $lr;
test "a" --> ['a'];
test "aa" --> [['a'],'a'];
test "aaa" --> [[['a'],'a'],'a'];
}
## This doesn't work for some reason; it segfaults. We'll leave it for
## later.
#
#leftrec {
# subparser $lr = choice(sequence($lr, ch('a')), epsilon_p());
# parser $lr;
# test "a" --> ['a'];
# #test "aa" --> [['a'],'a'];
# #test "aaa" --> [[['a'],'a'],'a'];
#}
rightrec {
subparser $rr = choice(sequence(ch('a'), $rr), epsilon_p());
......@@ -296,17 +299,17 @@ rightrec {
test "aa" --> ['a',['a']];
test "aaa" --> ['a',['a',['a']]];
}
ambiguous {
subparser $d = ch('d');
subparser $p = ch('+');
subparser $e = choice(sequence($e, $p, $e), $d);
# TODO: implement action/h_act_flatten
parser $e;
test "d" --> 'd';
test "d+d" --> ['d','+','d'];
test "d+d+d" --> [['d','+','d'],'+','d'];
}
## Only for GLR
#ambiguous {
# subparser $d = ch('d');
# subparser $p = ch('+');
# subparser $e = choice(sequence($e, $p, $e), $d);
# # TODO: implement action/h_act_flatten
# parser $e;
#
# test "d" --> 'd';
# test "d+d" --> ['d','+','d'];
# test "d+d+d" --> [['d','+','d'],'+','d'];
#}
% -*- prolog -*-
% Run with:
% $ swipl -q -t halt -g tsgencsharp:prolog tsgencsharp.pl >output-file
% Note: this needs to be run from the lib/ directory.
% So,
% swipl -q -t halt -g tsgencsharp:prolog tsgencsharp.pl >../src/bindings/dotnet/test/hammer_tests.cs
:- module(tsgencsharp,
[gen_ts/2]).
:- expects_dialect(swi).
:- use_module(tsparser).
% TODO: build a Box-like pretty-printer
format_parser_name(Name, Result) :-
atom_codes(Name, [CInit|CName]),
code_type(RInit, to_upper(CInit)),
append("Hammer.", [RInit|CName], Result), !.
format_test_name(Name, Result) :-
atom_codes(Name, [CInit|CName]),
code_type(RInit, to_upper(CInit)),
append("Test", [RInit|CName], Result), !.
indent(0) --> "", !.
indent(N) -->
{N > 0},
" ",
{Np is N - 1},
indent(Np).
pp_char_guts(0x22) -->
"\\\"", !.
pp_char_guts(0x27) -->
"\\'", !.
pp_char_guts(A) -->
{ A >= 0x20, A < 0x7F } ->
[A];
"\\x",
{ H is A >> 4, L is A /\ 0xF,
code_type(Hc, xdigit(H)),
code_type(Lc, xdigit(L)) },
[Hc,Lc].
pp_hexnum_guts(0) --> !.
pp_hexnum_guts(A) -->
{ L is A /\ 0xF,
H is A >> 4,
code_type(Lc, xdigit(L)) },
pp_hexnum_guts(H),
[Lc], !.
pp_string_guts([]) --> !.
pp_string_guts([X|Xs]) -->
pp_char_guts(X),
pp_string_guts(Xs), !.
pp_parser_args([]) --> !.
pp_parser_args([X|Rest]) -->
pp_parser(X),
pp_parser_args_rest(Rest).
pp_parser_args_rest([]) --> !.
pp_parser_args_rest([X|Xs]) -->
", ",
pp_parser(X),
pp_parser_args_rest(Xs).
pp_parser(parser(Name, Args)) -->
!,
{format_parser_name(Name,Fname)},
Fname,
"(",
pp_parser_args(Args),
")".
pp_parser(string(Str)) --> !,
"\"",
pp_string_guts(Str),
"\"", !.
pp_parser(num(0)) --> "0", !.
pp_parser(num(Num)) --> !,
( {Num < 0} ->
"-0x", {RNum is -Num}; "0x", {RNum = Num} ),
pp_hexnum_guts(RNum).
pp_parser(char(C)) --> !,
"'", pp_char_guts(C), "'", !.
pp_parser(ref(Name)) -->
{atom_codes(Name,CName)},
"sp_", CName, !.
pp_parser(A) -->
{ writef("WTF is a %w?\n", [A]),
!, fail
}.
pp_test_elem(decl, parser(_)) --> !.
pp_test_elem(init, parser(_)) --> !.
pp_test_elem(exec, parser(P)) -->
!, indent(3),
"parser = ",
pp_parser(P),
";\n".
pp_test_elem(decl, subparser(Name,_)) -->
!, indent(3),
"IndirectParser ", pp_parser(ref(Name)),
" = Hammer.Indirect();\n".
pp_test_elem(init, subparser(Name, Parser)) -->
!, indent(3),
pp_parser(ref(Name)), ".Bind(",
pp_parser(Parser),
");\n".
pp_test_elem(exec, subparser(_,_)) --> !.
pp_test_elem(decl, test(_,_)) --> !.
pp_test_elem(init, test(_,_)) --> !.
pp_test_elem(decl, testFail(_)) --> !.
pp_test_elem(init, testFail(_)) --> !.
pp_test_elem(exec, test(Str, Result)) -->
!, indent(3),
" CheckParseOK(parser, ", pp_parser(string(Str)),
", ",
pp_parse_result(Result),
");\n".
pp_test_elem(exec, testFail(Str)) -->
!, indent(3),
" CheckParseFail(parser, ", pp_parser(string(Str)),
");\n".
% pp_test_elem(_, _) --> !.
pp_result_seq([]) --> !.
pp_result_seq([X|Xs]) --> !,
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_result_seq_r([]) --> !.
pp_result_seq_r([X|Xs]) --> !,
", ",
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_byte_seq([]) --> !.
pp_byte_seq([X|Xs]) --> !,
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_byte_seq_r([]) --> !.
pp_byte_seq_r([X|Xs]) --> !,
", ",
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_parse_result(char(C)) --> !,
%"(System.UInt64)",
pp_parser(char(C)).
pp_parse_result(seq(Args)) --> !,
"new object[]{ ", pp_result_seq(Args), "}".
pp_parse_result(none) --> !,
"null".
pp_parse_result(uint(V)) --> !,
"(System.UInt64)", pp_parser(num(V)).
pp_parse_result(sint(V)) --> !,
"(System.Int64)(", pp_parser(num(V)), ")".
pp_parse_result(string(A)) --> !,
"new byte[]{ ", pp_byte_seq(A), "}".
%pp_parse_result(A) -->
% "\x1b[1;31m",
% {with_output_to(codes(C), write(A))},
% C,
% "\x1b[0m".
pp_test_elems(_, []) --> !.
pp_test_elems(Phase, [X|Xs]) -->
!,
pp_test_elem(Phase,X),
pp_test_elems(Phase,Xs).
pp_test_case(testcase(Name, Elems)) -->
!,
indent(2), "[Test]\n",
{ format_test_name(Name, TName) },
indent(2), "public void ", TName, "() {\n",
indent(3), "Parser parser;\n",
pp_test_elems(decl, Elems),
pp_test_elems(init, Elems),
pp_test_elems(exec, Elems),
indent(2), "}\n".
pp_test_cases([]) --> !.
pp_test_cases([A|As]) -->
pp_test_case(A),
pp_test_cases(As).
pp_test_suite(Suite) -->
"namespace Hammer.Test {\n",
indent(1), "using NUnit.Framework;\n",
%indent(1), "using Hammer;\n",
indent(1), "[TestFixture]\n",
indent(1), "public partial class HammerTest {\n",
pp_test_cases(Suite),
indent(1), "}\n",
"}\n".
gen_ts(Foo,Str) :-
phrase(pp_test_suite(Foo),Str).
prolog :-
read_tc(A),
gen_ts(A, Res),
writef("%s", [Res]).
% -*- prolog -*-
% Run with:
% $ swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl >output-file
% Note: this needs to be run from the lib/ directory.
% So, from the ruby directory
% (cd ../../../lib && swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl ) >test/autogen_test.rb
:- module(tsgenruby,
[gen_ts/2]).
:- expects_dialect(swi).
:- use_module(tsparser).
:- use_module(library(record)).
:- record testsuite_state(parser_no:integer = 0, test_no:integer=0).
% TODO: build a Box-like pretty-printer
to_title_case([], []) :- !.
to_title_case([WSep,S0|Ss], [R0|Rs]) :-
memberchk(WSep, "_-"), !,
code_type(R0, to_upper(S0)),
to_title_case(Ss,Rs).
to_title_case([S0|Ss], [S0|Rs]) :-
\+ memberchk(S0, "_-"),
!, to_title_case(Ss,Rs).
format_parser_name(Name, Result) :-
atom_codes(Name, CName),
append("h.", CName, Result), !.
format_test_name(Name, Result) :-
atom_codes(Name, CName),
to_title_case([0x5f|CName], RName),
append("Test", RName, Result), !.
indent(0) --> "", !.
indent(N) -->
{N > 0},
" ",
{Np is N - 1},
indent(Np).
pp_char_guts(0x22) -->
"\\\"", !.
pp_char_guts(0x27) -->
"\\'", !.
pp_char_guts(A) -->
{ A >= 0x20, A < 0x7F } ->
[A];
"\\x",
{ H is A >> 4, L is A /\ 0xF,
code_type(Hc, xdigit(H)),
code_type(Lc, xdigit(L)) },
[Hc,Lc].
pp_hexnum_guts(0) --> !.
pp_hexnum_guts(A) -->
{ L is A /\ 0xF,
H is A >> 4,
code_type(Lc, xdigit(L)) },
pp_hexnum_guts(H),
[Lc], !.
pp_string_guts([]) --> !.
pp_string_guts([X|Xs]) -->
pp_char_guts(X),
pp_string_guts(Xs), !.
pp_parser_args([]) --> !.
pp_parser_args([X|Rest]) -->
pp_parser(X),
pp_parser_args_rest(Rest).
pp_parser_args_rest([]) --> !.
pp_parser_args_rest([X|Xs]) -->
", ",
pp_parser(X),
pp_parser_args_rest(Xs).
pp_parser(parser(Name, Args)) -->
!,
{format_parser_name(Name,Fname)},
Fname,
({Args \= []} ->
"(", pp_parser_args(Args), ")"
; "") .
pp_parser(string(Str)) --> !,
"\"",
pp_string_guts(Str),
"\"", !.
pp_parser(num(0)) --> "0", !.
pp_parser(num(Num)) --> !,
( {Num < 0} ->
"-0x", {RNum is -Num}; "0x", {RNum = Num} ),
pp_hexnum_guts(RNum).
pp_parser(char(C)) --> !,
pp_parser(num(C)), ".chr". % Ruby is encoding-aware; this is a
% more reasonable implementation
pp_parser(ref(Name)) -->
{atom_codes(Name,CName)},
"@sp_", CName, !.
pp_parser(A) -->
{ writef("WTF is a %w?\n", [A]),
!, fail
}.
upd_state_test_elem(parser(_), OldSt, NewSt) :- !,
testsuite_state_parser_no(OldSt, OldRNo),
NewRNo is OldRNo + 1,
set_parser_no_of_testsuite_state(NewRNo, OldSt, NewSt).
upd_state_test_elem(test(_, _), OldSt, NewSt) :- !,
testsuite_state_test_no(OldSt, OldTNo),
NewTNo is OldTNo + 1,
set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt).
upd_state_test_elem(testFail(_), OldSt, NewSt) :- !,
testsuite_state_test_no(OldSt, OldTNo),
NewTNo is OldTNo + 1,
set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt).
upd_state_test_elem(_, St, St).
curparser_name(St) --> !,
{ testsuite_state_parser_no(St, RNo),
format(string(X), "@parser_~w", RNo) },
X.
curtest_name(St) --> !,
{ testsuite_state_test_no(St, RNo),
format(string(X), "test_~w", RNo) },
X.
pp_test_elem(decl, parser(_), _) --> !.
pp_test_elem(init, parser(P), St) -->
!, indent(2),
curparser_name(St), " = ",
pp_parser(P),
"\n".
pp_test_elem(exec, parser(_), _) --> !.
pp_test_elem(decl, subparser(Name,_), _) -->
!, indent(2),
pp_parser(ref(Name)),
" = ",
pp_parser(parser(indirect,[])),
"\n".
pp_test_elem(init, subparser(Name, Parser), _) -->
!, indent(2),
pp_parser(ref(Name)), ".bind ",
pp_parser(Parser),
"\n".
pp_test_elem(exec, subparser(_,_), _) --> !.
pp_test_elem(decl, test(_,_), _) --> !.
pp_test_elem(init, test(_,_), _) --> !.
pp_test_elem(decl, testFail(_), _) --> !.
pp_test_elem(init, testFail(_), _) --> !.
pp_test_elem(exec, test(Str, Result), St) -->
!,
"\n",
indent(1), "def ", curtest_name(St), "\n",
indent(2), "assert_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)),
", ",
pp_parse_result(Result),
"\n",
indent(1), "end\n".
pp_test_elem(exec, testFail(Str), St) -->
!,
"\n",
indent(1), "def ", curtest_name(St), "\n",
indent(2), "refute_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), "\n",
indent(1), "end\n".
% pp_test_elem(_, _) --> !.
pp_result_seq([]) --> !.
pp_result_seq([X|Xs]) --> !,
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_result_seq_r([]) --> !.
pp_result_seq_r([X|Xs]) --> !,
", ",
pp_parse_result(X),
pp_result_seq_r(Xs).
pp_byte_seq([]) --> !.
pp_byte_seq([X|Xs]) --> !,
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_byte_seq_r([]) --> !.
pp_byte_seq_r([X|Xs]) --> !,
", ",
pp_parser(num(X)),
pp_byte_seq_r(Xs).
pp_parse_result(char(C)) --> !,
%"(System.UInt64)",
pp_parser(char(C)).
pp_parse_result(seq(Args)) --> !,
"[", pp_result_seq(Args), "]".
pp_parse_result(none) --> !,
"nil".
pp_parse_result(uint(V)) --> !,
pp_parser(num(V)).
pp_parse_result(sint(V)) --> !,
pp_parser(num(V)).
pp_parse_result(string(A)) --> !,
pp_parser(string(A)).
%pp_parse_result(A) -->
% "\x1b[1;31m",
% {with_output_to(codes(C), write(A))},
% C,
% "\x1b[0m".
pp_test_elems(Phase, Elems) -->
{ default_testsuite_state(State) },
pp_test_elems(Phase, Elems, State).
pp_test_elems(_, [], _) --> !.
pp_test_elems(Phase, [X|Xs], St) -->
!,
{ upd_state_test_elem(X, St, NewSt) },
%{NewSt = St},
pp_test_elem(Phase,X, NewSt),
pp_test_elems(Phase,Xs, NewSt).
pp_test_case(testcase(Name, Elems)) -->
!,
{ format_test_name(Name, TName) },
indent(0), "class ", TName, " < Minitest::Test\n",
indent(1), "def setup\n",
indent(2), "super\n",
indent(2), "h = Hammer::Parser\n",
pp_test_elems(decl, Elems),
pp_test_elems(init, Elems),
indent(1), "end\n",
pp_test_elems(exec, Elems),
indent(0), "end\n\n".
pp_test_cases([]) --> !.
pp_test_cases([A|As]) -->
pp_test_case(A),
pp_test_cases(As).
pp_test_suite(Suite) -->
"require 'bundler/setup'\n",
"require 'minitest/autorun'\n",
"require 'hammer'\n",
pp_test_cases(Suite).
gen_ts(Foo,Str) :-
phrase(pp_test_suite(Foo),Str).
prolog :-
read_tc(A),
gen_ts(A, Res),
writef("%s", [Res]).
File moved
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os.path
Import('env testruns')
# Bump this if you break binary compatibility (e.g. renumber backends)
hammer_shlib_version = "1.0.0"
dist_headers = [
"hammer.h",
"allocator.h",
"glue.h",
"internal.h"
'hammer.h',
'allocator.h',
'compiler_specifics.h',
'glue.h',
'internal.h',
'platform.h'
]
parsers_headers = [
"parsers/parser_internal.h"
'parsers/parser_internal.h'
]
backends_headers = [
"backends/regex.h",
"backends/contextfree.h"
'backends/regex.h',
'backends/contextfree.h',
'backends/missing.h',
'backends/params.h'
]
parsers = ['parsers/%s.c'%s for s in
['action',
'and',
'attr_bool',
'bind',
'bits',
'bytes',
'butnot',
'ch',
'charset',
'choice',
'difference',
'end',
'endianness',
'epsilon',
'ignore',
'ignoreseq',
......@@ -38,14 +52,17 @@ parsers = ['parsers/%s.c'%s for s in
'not',
'nothing',
'optional',
'permutation',
'sequence',
'token',
'unimplemented',
'whitespace',
'xor']]
'xor',
'value',
'seek']]
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'params']]
misc_hammer_parts = [
'allocator.c',
......@@ -59,34 +76,79 @@ misc_hammer_parts = [
'hammer.c',
'pprint.c',
'registry.c',
'system_allocator.c']
'system_allocator.c',
'sloballoc.c']
if env['PLATFORM'] == 'win32':
misc_hammer_parts += [
'platform_win32.c',
'tsearch.c',
]
else:
misc_hammer_parts += ['platform_bsdlike.c']
ctests = ['t_benchmark.c',
't_bitreader.c',
't_bitwriter.c',
't_parser.c',
't_grammar.c',
't_misc.c']
't_misc.c',
't_mm.c',
't_names.c',
't_regression.c']
static_library_name = 'hammer'
build_shared_library=True
if env['PLATFORM'] == 'win32':
# FIXME(windows): symbols in hammer are not exported yet, a shared lib would be useless
build_shared_library=False
# prevent collision between .lib from dll and .lib for static lib
static_library_name = 'hammer_s'
if 'GPROF' in env and env['GPROF'] == 1:
# Disable the shared library (it won't work with gprof) and rename the static one
build_shared_library=False
static_library_name = 'hammer_pg'
# Markers for later
libhammer_static = None
libhammer_shared = None
if build_shared_library:
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
SHLIBVERSION=hammer_shlib_version)
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)
Default(libhammer_shared, libhammer_static)
if libhammer_shared is not None:
Default(libhammer_shared, libhammer_static)
env.Install('$libpath', [libhammer_static, libhammer_shared])
else:
Default(libhammer_static)
env.Install('$libpath', [libhammer_static])
env.Install("$libpath", [libhammer_static, libhammer_shared])
env.Install("$incpath", dist_headers)
env.Install("$parsersincpath", parsers_headers)
env.Install("$backendsincpath", backends_headers)
env.Install("$pkgconfigpath", "../../../libhammer.pc")
env.Install('$incpath', dist_headers)
env.Install('$parsersincpath', parsers_headers)
env.Install('$backendsincpath', backends_headers)
env.Install('$pkgconfigpath', '../../../libhammer.pc')
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
testenv.Append(LIBS=['hammer'], LIBPATH=['.'])
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'])
ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path]))
AlwaysBuild(ctest)
testruns.append(ctest)
if GetOption('with_tests'):
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
if libhammer_shared is not None:
testenv.Append(LIBS=['hammer'])
else:
testenv.Append(LIBS=[static_library_name])
testenv.Prepend(LIBPATH=['.'])
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS='--coverage' if testenv.GetOption('coverage') else None)
ctest = Alias('testc', [ctestexec], ''.join(['env LD_LIBRARY_PATH=', os.path.dirname(ctestexec[0].path), ' ', ctestexec[0].path]))
AlwaysBuild(ctest)
testruns.append(ctest)
Export("libhammer_static libhammer_shared")
if libhammer_shared is not None:
Export('libhammer_static libhammer_shared')
else:
Export('libhammer_static')
for b in env['bindings']:
env.SConscript(["bindings/%s/SConscript" % b])
env.SConscript(['bindings/%s/SConscript' % b])
......@@ -18,6 +18,7 @@
#include <string.h>
#include <stdint.h>
#include <sys/types.h>
#include <setjmp.h>
#include "hammer.h"
#include "internal.h"
......@@ -28,28 +29,56 @@ struct arena_link {
// For efficiency, we should probably allocate the arena links in
// their own slice, and link to a block directly. That can be
// implemented later, though, with no change in interface.
struct arena_link *next; // It is crucial that this be the first item; so that
// any arena link can be casted to struct arena_link**.
struct arena_link *next;
size_t free;
size_t used;
uint8_t rest[];
} ;
};
struct HArena_ {
struct arena_link *head;
struct HAllocator_ *mm__;
/* does mm__ zero blocks for us? */
bool malloc_zeros;
size_t block_size;
size_t used;
size_t wasted;
#ifdef DETAILED_ARENA_STATS
size_t mm_malloc_count, mm_malloc_bytes;
size_t memset_count, memset_bytes;
size_t arena_malloc_count, arena_malloc_bytes;
size_t arena_su_malloc_count, arena_su_malloc_bytes;
size_t arena_si_malloc_count, arena_si_malloc_bytes;
size_t arena_lu_malloc_count, arena_lu_malloc_bytes;
size_t arena_li_malloc_count, arena_li_malloc_bytes;
#endif
jmp_buf *except;
};
static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
void* h_alloc(HAllocator* mm__, size_t size) {
void *p = mm__->alloc(mm__, size);
if(!p)
h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size);
return p;
}
void* h_realloc(HAllocator* mm__, void* ptr, size_t size) {
void *p = mm__->realloc(mm__, ptr, size);
if(!p)
h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size);
return p;
}
HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
if (block_size == 0)
block_size = 4096;
struct HArena_ *ret = h_new(struct HArena_, 1);
struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
memset(link, 0, sizeof(struct arena_link) + block_size);
struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size);
assert(ret != NULL);
assert(link != NULL);
link->free = block_size;
link->used = 0;
link->next = NULL;
......@@ -57,41 +86,154 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
ret->block_size = block_size;
ret->used = 0;
ret->mm__ = mm__;
#ifdef DETAILED_ARENA_STATS
ret->mm_malloc_count = 2;
ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size;
ret->memset_count = 0;
ret->memset_bytes = 0;
ret->arena_malloc_count = ret->arena_malloc_bytes = 0;
ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0;
ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0;
ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0;
ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0;
#endif
/* XXX provide a mechanism to indicate mm__ returns zeroed blocks */
ret->malloc_zeros = false;
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
ret->except = NULL;
return ret;
}
void* h_arena_malloc(HArena *arena, size_t size) {
void h_arena_set_except(HArena *arena, jmp_buf *except)
{
arena->except = except;
}
static void *alloc_block(HArena *arena, size_t size)
{
void *block = arena->mm__->alloc(arena->mm__, size);
if (!block) {
if (arena->except)
longjmp(*arena->except, 1);
h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size);
}
return block;
}
void * h_arena_malloc_noinit(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, false);
}
void * h_arena_malloc(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, true);
}
static void * h_arena_malloc_raw(HArena *arena, size_t size,
bool need_zero) {
struct arena_link *link = NULL;
void *ret = NULL;
if (size <= arena->head->free) {
// fast path..
void* ret = arena->head->rest + arena->head->used;
/* fast path.. */
ret = arena->head->rest + arena->head->used;
arena->used += size;
arena->wasted -= size;
arena->head->used += size;
arena->head->free -= size;
return ret;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_si_malloc_count);
arena->arena_si_malloc_bytes += size;
} else {
++(arena->arena_su_malloc_count);
arena->arena_su_malloc_bytes += size;
}
#endif
} else if (size > arena->block_size) {
// We need a new, dedicated block for it, because it won't fit in a standard sized one.
// This involves some annoying casting...
/*
* We need a new, dedicated block for it, because it won't fit in a
* standard sized one.
*
* NOTE:
*
* We used to do a silly casting dance to treat blocks like this
* as special cases and make the used/free fields part of the allocated
* block, but the old code was not really proper portable C and depended
* on a bunch of implementation-specific behavior. We could have done it
* better with a union in struct arena_link, but the memory savings is
* only 0.39% for a 64-bit machine, a 4096-byte block size and all
* large allocations *only just one byte* over the block size, so I
* question the utility of it. We do still slip the large block in
* one position behind the list head so it doesn't cut off a partially
* filled list head.
*
* -- andrea
*/
link = alloc_block(arena, size + sizeof(struct arena_link));
assert(link != NULL);
arena->used += size;
arena->wasted += sizeof(struct arena_link*);
void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
memset(link, 0, size + sizeof(struct arena_link*));
*(struct arena_link**)link = arena->head->next;
arena->head->next = (struct arena_link*)link;
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
arena->wasted += sizeof(struct arena_link);
link->used = size;
link->free = 0;
link->next = arena->head->next;
arena->head->next = link;
ret = link->rest;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_li_malloc_count);
arena->arena_li_malloc_bytes += size;
} else {
++(arena->arena_lu_malloc_count);
arena->arena_lu_malloc_bytes += size;
}
#endif
} else {
// we just need to allocate an ordinary new block.
struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
/* we just need to allocate an ordinary new block. */
link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
assert(link != NULL);
#ifdef DETAILED_ARENA_STATS
++(arena->mm_malloc_count);
arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size;
#endif
link->free = arena->block_size - size;
link->used = size;
link->next = arena->head;
arena->head = link;
arena->used += size;
arena->wasted += sizeof(struct arena_link) + arena->block_size - size;
return link->rest;
ret = link->rest;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_si_malloc_count);
arena->arena_si_malloc_bytes += size;
} else {
++(arena->arena_su_malloc_count);
arena->arena_su_malloc_bytes += size;
}
#endif
}
/*
* Zeroize if necessary
*/
if (need_zero && !(arena->malloc_zeros)) {
memset(ret, 0, size);
#ifdef DETAILED_ARENA_STATS
++(arena->memset_count);
arena->memset_bytes += size;
#endif
}
return ret;
}
void h_arena_free(HArena *arena, void* ptr) {
......@@ -115,4 +257,49 @@ void h_delete_arena(HArena *arena) {
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
stats->used = arena->used;
stats->wasted = arena->wasted;
#ifdef DETAILED_ARENA_STATS
stats->mm_malloc_count = arena->mm_malloc_count;
stats->mm_malloc_bytes = arena->mm_malloc_bytes;
stats->memset_count = arena->memset_count;
stats->memset_bytes = arena->memset_bytes;
stats->arena_malloc_count = arena->arena_malloc_count;
stats->arena_malloc_bytes = arena->arena_malloc_bytes;
stats->arena_su_malloc_count = arena->arena_su_malloc_count;
stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes;
stats->arena_si_malloc_count = arena->arena_si_malloc_count;
stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes;
stats->arena_lu_malloc_count = arena->arena_lu_malloc_count;
stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes;
stats->arena_li_malloc_count = arena->arena_li_malloc_count;
stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
#endif
}
void* h_arena_realloc(HArena *arena, void* ptr, size_t n) {
struct arena_link *link;
void* ret;
size_t ncopy;
// XXX this is really wasteful, but maybe better than nothing?
//
// first, we walk the blocks to find our ptr. since we don't know how large
// the original allocation was, we must always make a new one and copy as
// much data from the old block as there could have been.
for (link = arena->head; link; link = link->next) {
if (ptr >= (void *)link->rest && ptr <= (void *)link->rest + link->used)
break; /* found it */
}
assert(link != NULL);
ncopy = (void *)link->rest + link->used - ptr;
if (n < ncopy)
ncopy = n;
ret = h_arena_malloc_noinit(arena, n);
assert(ret != NULL);
memcpy(ret, ptr, ncopy);
h_arena_free(arena, ptr);
return ret;
}
......@@ -18,6 +18,27 @@
#ifndef HAMMER_ALLOCATOR__H__
#define HAMMER_ALLOCATOR__H__
#include <sys/types.h>
#include <setjmp.h>
#ifdef __cplusplus
extern "C" {
#endif
#if defined __llvm__
# if __has_attribute(malloc)
# define ATTR_MALLOC(n) __attribute__((malloc))
# else
# define ATTR_MALLOC(n)
# endif
#elif defined SWIG
# define ATTR_MALLOC(n)
#elif defined __GNUC__
# define ATTR_MALLOC(n) __attribute__((malloc, alloc_size(2)))
#else
# define ATTR_MALLOC(n)
#endif
/* #define DETAILED_ARENA_STATS */
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ {
......@@ -26,23 +47,49 @@ typedef struct HAllocator_ {
void (*free)(struct HAllocator_* allocator, void* ptr);
} HAllocator;
void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2);
void* h_realloc(HAllocator* allocator, void* ptr, size_t size);
typedef struct HArena_ HArena ; // hidden implementation
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
#ifndef SWIG
void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) ));
#else
void* h_arena_malloc(HArena *arena, size_t count);
#endif
void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_realloc(HArena *arena, void* ptr, size_t count);
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
void h_arena_set_except(HArena *arena, jmp_buf *except);
typedef struct {
size_t used;
size_t wasted;
#ifdef DETAILED_ARENA_STATS
size_t mm_malloc_count;
size_t mm_malloc_bytes;
size_t memset_count;
size_t memset_bytes;
size_t arena_malloc_count;
size_t arena_malloc_bytes;
/* small, uninited */
size_t arena_su_malloc_count;
size_t arena_su_malloc_bytes;
/* small, inited */
size_t arena_si_malloc_count;
size_t arena_si_malloc_bytes;
/* large, uninited */
size_t arena_lu_malloc_count;
size_t arena_lu_malloc_bytes;
/* large, inited */
size_t arena_li_malloc_count;
size_t arena_li_malloc_bytes;
#endif
} HArenaStats;
void h_allocator_stats(HArena *arena, HArenaStats *stats);
#ifdef __cplusplus
}
#endif
#endif // #ifndef LIB_ALLOCATOR__H__