Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.


Select target project
No results found


Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (356)
# generated files
# coverage and profiling stuff
# editor leftovers
# misc
......@@ -10,35 +10,43 @@ matrix:
- compiler: gcc
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-1.9.3-p551
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-1.9.3-p551
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.0.0-p353
rvm: ruby-2.0.0-p647
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.0.0-p353
rvm: ruby-2.0.0-p647
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.1.0
rvm: ruby-2.1.7
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.1.0
rvm: ruby-2.1.7
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.2.3
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.2.3
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: python
python: "2.7"
python: "2.7.10"
env: BINDINGS=python
- compiler: clang
language: python
python: "2.7"
python: "2.7.10"
env: BINDINGS=python CC=clang
- compiler: gcc
language: perl
......@@ -94,12 +102,11 @@ matrix:
env: BINDINGS=cpp CC=clang
- sudo apt-get update -qq
- sudo apt-get install lcov
- sudo apt-get install -y lcov
- gem install coveralls-lcov
- if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi
- if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi
- if [ "$BINDINGS" != "none" ]; then sudo sh -c 'echo "deb trusty-backports main restricted universe multiverse" >> /etc/apt/sources.list'; sudo apt-get update -qq; sudo apt-get install -yqq swig3.0/trusty-backports; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -yqq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -yqq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi
install: true
- if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi
......@@ -6,12 +6,13 @@ internal anaphoric macros use. Chances are that if you use these names
for other things, you're gonna have a bad time.
In particular, these names, and the macros that use them, are:
- state:
Used by a_new and company. Should be an HParseState*
- mm__:
Used by h_new and h_free. Should be an HAllocator*
- stk__:
Used in desugaring. Should be an HCFStack*
- `state`:
Used by `a_new` and company. Should be an `HParseState*`.
- `mm__`:
Used by `h_new` and `h_free`. Should be an `HAllocator*`.
- `stk__`:
Used in desugaring. Should be an `HCFStack*`.
Function suffixes
......@@ -21,9 +22,9 @@ parameters or parameters in multiple different forms. For example,
often, you have a global memory manager that is used for an entire
program. In this case, you can leave off the memory manager arguments
off, letting them be implicit instead. Further, it is often convenient
to pass an array or va_list to a function instead of listing the
arguments inline (eg, for wrapping a function, generating the
arguments programattically, or writing bindings for another language.
to pass an array or `va_list` to a function instead of listing the
arguments inline (e.g., for wrapping a function, generating the
arguments programatically, or writing bindings for another language.)
Because we have found that most variants fall into a fairly small set
of forms, and to minimize the amount of API calls that users need to
......@@ -32,21 +33,22 @@ variants: the function name is followed by two underscores and a set
of single-character "flags" indicating what optional features that
particular variant has (in alphabetical order, of course):
__a: takes variadic arguments as a void*[] (not implemented yet, but will be soon.
__m: takes a memory manager as the first argument, to override the system memory manager.
__v: Takes the variadic argument list as a va_list
- `__a`: takes variadic arguments as a `void*[]` (not implemented yet,
but will be soon.)
- `__m`: takes a memory manager as the first argument, to override the
system memory manager.
- `__v`: Takes the variadic argument list as a `va_list`.
Memory managers
If the __m function variants are used or system_allocator is
If the `__m` function variants are used or `system_allocator` is
overridden, there come some difficult questions to answer,
particularly regarding the behavior when multiple memory managers are
combined. As a general rule of thumb (exceptions will be explicitly
documented), assume that
If you have a function f, which is passed a memory manager m and
> If you have a function f, which is passed a memory manager m and
returns a value r, any function that uses r as a parameter must
also be told to use m as a memory manager.
......@@ -57,7 +59,7 @@ Language-independent test suite
There is a language-independent representation of the Hammer test
suite in `lib/test-suite`. This is intended to be used with the prolog library, along with a language-specific frontend. Prolog library, along with a language-specific frontend.
Only the C# frontend exists so far; to regenerate the test suites using it, run
......@@ -3,7 +3,7 @@
# and kick off a recursive make
# Also, "make src/all" turns into "make -C src all"
SUBDIRS = src examples jni
SUBDIRS = src examples src/bindings/jni
Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer is written in C, but provides bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer currently builds under Linux and OS X. (Windows is coming.)
Hammer currently builds under Linux, OS X, and Windows.
[![Build Status](](
* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
* Thread-safe, reentrant
* Thread-safe, reentrant (for most purposes; see Known Issues for details)
* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
* Parsing backends:
* Packrat parsing
......@@ -18,35 +17,42 @@ Features
* Regular expressions
* Language bindings:
* C++
* Java (not currently building; give us a few days)
* Java (incomplete)
* Python
* Ruby
* Perl
* [Go](
* .NET
* .NET
* Lua (landing soon!)
### Prerequisites
* SCons
* [SCons](
### Optional Dependencies
* pkg-config (for `scons test`)
* glib-2.0 (>= 2.29) (for `scons test`)
* glib-2.0-dev (for `scons test`)
* swig (for Python/Perl/PHP bindings; Perl requires >= 2.0.8)
* python2.7-dev (for Python bindings)
* [swig]( (for Python/Perl/PHP bindings; Perl requires >= 2.0.8; Python 3.x requires >= 3.0.0)
* python2.7-dev (for Python 2 bindings)
* python3-dev (>= 3.5) (for Python 3 bindings)
* a JDK (for Java bindings)
* a working [phpenv]( configuration (for PHP bindings)
* Ruby >= 1.9.3 and bundler, for the Ruby bindings
* [Ruby]( >= 1.9.3 and bundler, for the Ruby bindings
* mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings)
* nunit (for testing .NET bindings)
* [nunit]( (for testing .NET bindings)
To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug`.
To build, type `scons`.
To run the built-in test suite, type `scons test`.
To avoid the test dependencies, add `--no-tests`.
For a debug build, add `--variant=debug`.
To build bindings, pass a "bindings" argument to scons, e.g. `scons bindings=python`. `scons bindings=python test` will build Python bindings and run tests for both C and Python. `--variant=debug` is valid here too. You can build more than one set of bindings at a time; just separate them with commas, e.g. `scons bindings=python,perl`.
For Python, pass `python=python<X>.<Y>`, e. g. `scons bindings=python python=python2.7` or `scons bindings=python python=python3.5`.
For Java, if jni.h and jni_md.h aren't already somewhere on your include path, prepend
`C_INCLUDE_PATH=/path/to/jdk/include` to that.
......@@ -60,26 +66,31 @@ Just `#include <hammer/hammer.h>` (also `#include <hammer/glue.h>` if you plan t
If you've installed Hammer system-wide, you can use `pkg-config` in the usual way.
For documentation, see the [user guide](
To learn about hammer check
* the [user guide](
* [Hammer Primer]( (outdated in terms of code, but good to get the general thinking)
* [Try Hammer](
The `examples/` directory contains some simple examples, currently including:
* base64
* [base64](
* [DNS](
Known Issues
The Python bindings only work with Python 2.7. SCons doesn't work with Python 3, and PyCapsule isn't available in 2.6 and below, so 2.7 is all you get. Sorry about that.
The Python bindings work with Python 2.7, and Python 3.5+.
The requirement for SWIG >= 2.0.8 for Perl bindings is due to a [known bug]( in SWIG. [ppa:dns/irc]( has backports of SWIG 2.0.8 for Ubuntu versions 10.04-12.10; you can also [build SWIG from source](
The .NET bindings are for Mono 3.0.6 and greater. If you're on a Debian-based distro that only provides Mono 2 (e.g., Ubuntu 12.04), there are backports for [3.0.x](, and a [3.2.x PPA]( maintained by the Mono team.
The regular expression backend is potentially not thread-safe (thanks to Martin Murray for pointing this out). A full rewrite of this backend is on the roadmap already due to some unexpected nondeterminism in the current implementation; we plan to fix this problem in that rewrite.
Please join us at `#hammer` on `` if you have any questions or just want to talk about parsing.
Mailing list, IRC, and potentially other channels to come.
You can also email us at <>.
Also to be updated soon.
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os
import os.path
import platform
import subprocess
import sys
if platform.system() == 'Windows':
default_install_dir = 'build' # no obvious place for installation on Windows
vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'jni', 'perl', 'php', 'python', 'ruby']))
vars.Add('python', 'Python interpreter', 'python')
tools = ['default', 'scanreplace']
if 'dotnet' in ARGUMENTS.get('bindings', []):
# add the clang tool if necessary
if os.getenv('CC') == 'clang' or platform.system() == 'Darwin':
# try to detect if cc happens to be clang by inspecting --version
cc = os.getenv('CC') or 'cc'
ver =[cc, '--version'], capture_output=True).stdout
if b'clang' in ver.split():
os.environ['CC'] = cc # make sure we call it as we saw it
envvars = {'PATH' : os.environ['PATH']}
if 'PKG_CONFIG_PATH' in os.environ:
envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH']
if platform.system() == 'Windows':
# from the scons FAQ (keywords: LNK1104 TEMPFILE), needed by link.exe
envvars['TMP'] = os.environ['TMP']
env = Environment(ENV = envvars,
variables = vars,
......@@ -29,7 +51,7 @@ if not 'bindings' in env:
def calcInstallPath(*elements):
path = os.path.abspath(os.path.join(*map(env.subst, elements)))
if 'DESTDIR' in env:
path = os.path.join(env['DESTDIR'], os.path.relpath(path, start="/"))
path = os.path.join(env['DESTDIR'], os.path.relpath(path, start='/'))
return path
rel_prefix = not os.path.isabs(env['prefix'])
......@@ -37,102 +59,165 @@ env['prefix'] = os.path.abspath(env['prefix'])
if 'DESTDIR' in env:
env['DESTDIR'] = os.path.abspath(env['DESTDIR'])
if rel_prefix:
print >>sys.stderr, "--!!-- You used a relative prefix with a DESTDIR. This is probably not what you"
print >>sys.stderr, "--!!-- you want; files will be installed in"
print >>sys.stderr, "--!!-- %s" % (calcInstallPath("$prefix"),)
print('--!!-- You used a relative prefix with a DESTDIR. This is probably not what you', file=sys.stderr)
print('--!!-- you want; files will be installed in', file=sys.stderr)
print('--!!-- %s' % (calcInstallPath('$prefix'),), file=sys.stderr)
env['libpath'] = calcInstallPath("$prefix", "lib")
env['incpath'] = calcInstallPath("$prefix", "include", "hammer")
env['parsersincpath'] = calcInstallPath("$prefix", "include", "hammer", "parsers")
env['backendsincpath'] = calcInstallPath("$prefix", "include", "hammer", "backends")
env['pkgconfigpath'] = calcInstallPath("$prefix", "lib", "pkgconfig")
env['libpath'] = calcInstallPath('$prefix', 'lib')
env['incpath'] = calcInstallPath('$prefix', 'include', 'hammer')
env['parsersincpath'] = calcInstallPath('$prefix', 'include', 'hammer', 'parsers')
env['backendsincpath'] = calcInstallPath('$prefix', 'include', 'hammer', 'backends')
env['pkgconfigpath'] = calcInstallPath('$prefix', 'lib', 'pkgconfig')
env.MergeFlags("-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable")
nargs=1, type='choice',
choices=['debug', 'opt'],
help='Build variant (debug or opt)')
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env["libpath"] + '/${TARGET.file}')
elif os.uname()[0] == "OpenBSD":
nargs=1, type="choice",
choices=["debug", "opt"],
help="Build variant (debug or opt)")
help="Build with coverage instrumentation")
help='Build with coverage instrumentation')
help='Build with debug symbols, even in the opt variant')
help="Build in-place, rather than in the build/<variant> tree")
help='Build with profiling instrumentation for gprof')
help='Build in-place, rather than in the build/<variant> tree')
default=env['PLATFORM'] != 'win32',
help='Do not build tests')
env['CC'] = os.getenv('CC') or env['CC']
env['CXX'] = os.getenv('CXX') or env['CXX']
env['CFLAGS'] = os.getenv('CFLAGS') or env['CFLAGS']
# Language standard and warnings
if env['CC'] == 'cl':
env.MergeFlags('-W3 -WX')
'_CRT_SECURE_NO_WARNINGS' # allow uses of sprintf
'-wd4018', # 'expression' : signed/unsigned mismatch
'-wd4244', # 'argument' : conversion from 'type1' to 'type2', possible loss of data
'-wd4267', # 'var' : conversion from 'size_t' to 'type', possible loss of data
if env['PLATFORM'] == 'darwin':
# It's reported -D_POSIX_C_SOURCE breaks the Mac OS build; I think we
# may need _DARWIN_C_SOURCE instead/in addition to, but let's wait to
# have access to a Mac to test/repo
env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Using -D_POSIX_C_SOURCE=200809L here, not on an ad-hoc basis when,
# #including, is important
env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Linker options
if env['PLATFORM'] == 'darwin':
env.Append(SHLINKFLAGS = '-install_name ' + env['libpath'] + '/${TARGET.file}')
elif platform.system() == 'OpenBSD':
elif env['PLATFORM'] == 'win32':
# no extra lib needed
if GetOption('coverage'):
if env['CC'] == 'gcc':
env.ParseConfig('llvm-config --ldflags')
if GetOption('force_debug'):
if env['CC'] == 'cl':
if GetOption('gprof'):
if env['CC'] == 'gcc' and env['CXX'] == 'g++':
env['GPROF'] = 1
print("Can only use gprof with gcc")
dbg = env.Clone(VARIANT='debug')
if env['CC'] == 'cl':
opt = env.Clone(VARIANT='opt')
if env['CC'] == 'cl':
if GetOption("variant") == 'debug':
if GetOption('variant') == 'debug':
env = dbg
env = opt
env["CC"] = os.getenv("CC") or env["CC"]
env["CXX"] = os.getenv("CXX") or env["CXX"]
if GetOption("coverage"):
if env["CC"] == "gcc":
env.ParseConfig('llvm-config --ldflags')
if os.getenv("CC") == "clang" or env['PLATFORM'] == 'darwin':
env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_"))
env['ENV'].update(x for x in os.environ.items() if x[0].startswith('CCC_'))
#rootpath = env['ROOTPATH'] = os.path.abspath('.')
#env.Append(CPPPATH=os.path.join('#', "hammer"))
#env.Append(CPPPATH=os.path.join('#', 'hammer'))
testruns = []
targets = ["$libpath",
targets = ['$libpath',
if not GetOption("in_place"):
if not GetOption('in_place'):
env['BUILD_BASE'] = 'build/$VARIANT'
lib = env.SConscript(["src/SConscript"], variant_dir='$BUILD_BASE/src')
env.Alias("examples", env.SConscript(["examples/SConscript"], variant_dir='$BUILD_BASE/examples'))
lib = env.SConscript(['src/SConscript'], variant_dir='$BUILD_BASE/src')
env.Alias('examples', env.SConscript(['examples/SConscript'], variant_dir='$BUILD_BASE/examples'))
env['BUILD_BASE'] = '.'
lib = env.SConscript(["src/SConscript"])
lib = env.SConscript(['src/SConscript'])
for testrun in testruns:
env.Alias("test", testrun)
env.Alias('test', testrun)
env.Alias("install", targets)
env.Alias('install', targets)
- x86
- x64
PYTHON: "C:\\Python27"
version: 1.0.{build}
os: Visual Studio 2015
- easy_install scons
- scons --version
- scons install
- '@echo off'
- setlocal
- ps: >-
......@@ -14,6 +21,5 @@ build_script:
- call "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" %VCVARS_PLATFORM%
- call tools\windows\build.bat
# FIXME(windows) TODO(uucidl): reactivate examples
# - call tools\windows\build_examples.bat
- call tools\windows\build_examples.bat
- exit /b 0
......@@ -77,11 +77,13 @@ Benchmarking for parsing backends -- determine empirically which backend will be
12 HParseResult *result = h_parse(hello_parser, input, inputsize);
13 if(result) {
14 printf("yay!\n");
14 printf("yay!\\n");
15 } else {
16 printf("boo!\n");
16 printf("boo!\\n");
17 }
18 }
18 h_parse_result_free(result);
19 return 0 == result;
20 }
from __future__ import absolute_import, division, print_function
example = env.Clone()
example.Append(LIBS="hammer", LIBPATH="../src")
if 'GPROF' in env and env['GPROF'] == 1:
example.Append(LIBS=hammer_lib_name, LIBPATH="../src")
dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
ttuser = example.Program('ttuser', 'ttuser.c')
base64 = example.Program('base64', 'base64.c')
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
ties = example.Program('ties', ['ties.c', 'grammar.c'])
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
\ No newline at end of file
env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
......@@ -45,7 +45,7 @@ int main(int argc, char **argv)
uint8_t input[102400];
size_t inputsize;
const HParseResult *result;
HParseResult *result;
......@@ -57,6 +57,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
return 0;
} else {
return 1;
#!/usr/bin/env python2
# Example parser: Base64, syntax only.
# Demonstrates how to construct a Hammer parser that recognizes valid Base64
# sequences.
# Note that no semantic evaluation of the sequence is performed, i.e. the
# byte sequence being represented is not returned, or determined. See
# and for examples how to attach appropriate
# semantic actions to the grammar.
from __future__ import absolute_import, division, print_function
import sys
import hammer as h
def init_parser():
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
# AUX.
plus ='+')
slash ='/')
equals ='=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_(b'AEIMQUYcgkosw048')
bsfdig_2bit = h.in_(b'AQgw')
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
base64 = h.sequence(h.many(base64_3),
h.optional(h.choice(base64_2, base64_1)))
return h.sequence(h.whitespace(base64), h.whitespace(h.end_p()))
def main():
document = init_parser()
s =
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = document.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
if __name__ == '__main__':
import sys
......@@ -149,12 +149,13 @@ HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -166,6 +167,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
return 0;
} else {
return 1;
#!/usr/bin/env python2
# Example parser: Base64, with fine-grained semantic actions
# Demonstrates how to attach semantic actions to grammar rules and piece by
# piece transform the parse tree into the desired semantic representation,
# in this case a sequence of 8-bit values.
# Those rules using h.action get an attached action, which must be declared
# (as a function).
# This variant of the example uses fine-grained semantic actions that
# transform the parse tree in small steps in a bottom-up fashion. Compare
# for an alternative approach using a single top-level action.
from __future__ import absolute_import, division, print_function
import functools
import sys
import hammer as h
# Semantic actions for the grammar below, each corresponds to an "ARULE".
# They must be named act_<rulename>.
def act_bsfdig(p, user_data=None):
# FIXME See the note in init_parser()
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
elif 0x61 <= c <= 0x7A: # a-z
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == b'+':
return 62
elif c == b'/':
return 63
raise ValueError
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
def act_index0(p, user_data=None):
return p[0]
def act_ignore(p, user_data=None):
return None
act_bsfdig_4bit = act_bsfdig
act_bsfdig_2bit = act_bsfdig
act_equals = act_ignore
act_ws = act_ignore
act_document = act_index0
def act_base64_n(n, p, user_data=None):
"""General-form action to turn a block of base64 digits into bytes.
res = [0]*n
x = 0
bits = 0
for i in range(0, n+1):
x <<= 6
x |= p[i] or 0
bits += 6
x >>= bits % 8 # align, i.e. cut off extra bits
for i in range(n):
item = x & 0xFF
res[n-1-i] = item # output the last byte and
x >>= 8 # discard it
return tuple(res)
act_base64_3 = functools.partial(act_base64_n, 3)
act_base64_2 = functools.partial(act_base64_n, 2)
act_base64_1 = functools.partial(act_base64_n, 1)
def act_base64(p, user_data=None):
assert isinstance(p, tuple)
assert len(p) == 2
assert isinstance(p[0], tuple)
res = []
# concatenate base64_3 blocks
for elem in p[0]:
# append one trailing base64_2 or _1 block
tok = p[1]
if isinstance(tok, tuple):
return tuple(res)
def init_parser():
"""Return a parser with the grammar to be recognized.
# This is a direct translation of the C example. In C the literal 0x30
# is interchangable with the char literal '0' (note the single quotes).
# This is not the case in Python.
# TODO In the interests of being more Pythonic settle on either string
# literals, or integers
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus ='+')
slash ='/')
equals = h.action('='), act_equals)
bsfdig = h.action(h.choice(alpha, digit, plus, slash), act_bsfdig)
bsfdig_4bit = h.action(h.in_(b"AEIMQUYcgkosw048"), act_bsfdig_4bit)
bsfdig_2bit = h.action(h.in_(b"AQgw"), act_bsfdig_2bit)
base64_3 = h.action(h.repeat_n(bsfdig, 4), act_base64_3)
base64_2 = h.action(h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals),
base64_1 = h.action(h.sequence(bsfdig, bsfdig_2bit, equals, equals),
base64 = h.action(h.sequence(h.many(base64_3),
# TODO This is not quite the same as the C example, with uses act_ignore.
# But I can't get hammer to filter any value returned by act_ignore.
ws = h.ignore(h.many(space))
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
# BUG sometimes inputs that should just don't parse.
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
# Using less actions seemed to make it less likely.
return document
def main():
parser = init_parser()
s =
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = parser.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
if __name__ == '__main__':
......@@ -153,12 +153,13 @@ const HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -170,6 +171,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
return 0;
} else {
return 1;
#!/usr/bin/env python2
# Example parser: Base64, with fine-grained semantic actions
# Demonstrates how to attach semantic actions to a grammar and transform the
# parse tree into the desired semantic representation, in this case a sequence
# of 8-bit values.
# Those rules using h.action get an attached action, which must be declared
# (as a function).
# This variant of the example uses coarse-grained semantic actions,
# transforming the entire parse tree in one big step. Compare
# for an alternative approach using a fine-grained piece-by-piece
# transformation.
from __future__ import absolute_import, division, print_function
import functools
import sys
import hammer as h
# Semantic actions for the grammar below, each corresponds to an "ARULE".
# They must be named act_<rulename>.
def bsfdig_value(p):
"""Return the numeric value of a parsed base64 digit.
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if c:
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
elif 0x61 <= c <= 0x7A: # a-z
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == b'+':
return 62
elif c == b'/':
return 63
return 0
def act_base64(p, user_data=None):
assert isinstance(p, tuple)
assert len(p) == 2
assert isinstance(p[0], tuple)
# grab b64_3 block sequence
# grab and analyze b64 end block (_2 or _1)
b64_3 = p[0]
b64_2 = p[1]
b64_1 = p[1]
if not isinstance(b64_2, tuple):
b64_1 = b64_2 = None
elif b64_2[2] == '=':
b64_2 = None
b64_1 = None
# allocate result sequence
res = []
# concatenate base64_3 blocks
for digits in b64_3:
assert isinstance(digits, tuple)
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
x <<= 6; x |= bsfdig_value(digits[2])
x <<= 6; x |= bsfdig_value(digits[3])
res.append((x >> 16) & 0xFF)
res.append((x >> 8) & 0xFF)
res.append(x & 0xFF)
# append one trailing base64_2 or _1 block
if b64_2:
digits = b64_2
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
x <<= 6; x |= bsfdig_value(digits[2])
res.append((x >> 10) & 0xFF)
res.append((x >> 2) & 0xFF)
elif b64_1:
digits = b64_1
x = bsfdig_value(digits[0])
x <<= 6; x |= bsfdig_value(digits[1])
res.append((x >> 4) & 0xFF)
return tuple(res)
# Hammer's Python bindings don't currently expose h_act_index or hact_ignore
def act_index0(p, user_data=None):
return p[0]
def act_ignore(p, user_data=None):
return None
act_ws = act_ignore
act_document = act_index0
def init_parser():
"""Set up the parser with the grammar to be recognized.
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus ='+')
slash ='/')
equals ='=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_(b"AEIMQUYcgkosw048")
bsfdig_2bit = h.in_(b"AQgw")
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
base64 = h.action(h.sequence(h.many(base64_3),
# TODO This is not quite the same as the C example, with uses act_ignore.
# But I can't get hammer to filter any value returned by act_ignore.
ws = h.ignore(h.many(space))
document = h.action(h.sequence(ws, base64, ws, h.end_p()),
# BUG sometimes inputs that should just don't parse.
# It *seemed* to happen mostly with things like "bbbbaaaaBA==".
# Using less actions seemed to make it less likely.
return document
def main():
parser = init_parser()
s =
inputsize = len(s)
print('inputsize=%i' % inputsize, file=sys.stderr)
print('input=%s' % s, file=sys.stderr, end='')
result = parser.parse(s)
if result:
#print('parsed=%i bytes', result.bit_length/8, file=sys.stderr)
if __name__ == '__main__':
......@@ -22,11 +22,12 @@
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
// if user_data exists and is printable:
if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) {
if(*(char*)(nt->user_data) != '0') {
char* user_str = (char*)(nt->user_data);
if(*user_str != '\0') {
// user_data is a non-empty string
return nt->user_data;
return user_str;
} else {
return nt->user_data+1;
return user_str+1;
* Example parser that demonstrates the use of user-defined token types.
* Note the custom printer function that hooks into h_pprint().
#include "../src/hammer.h"
#include "../src/glue.h"
* custom tokens
pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
* Pretty-printer rules:
* - Output 'indent' spaces after every newline you produce.
* - Do not add indent on the first line of output.
* - Do not add a trailing newline.
* - Indent sub-objects by adding 'delta' to 'indent'.
if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
fprintf(stream, "\n%*s", indent, "");
h_pprint(stream, tok->user, indent, delta);
/* XXX define umamb_sub as well */
TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
TT_OBJ = h_allocate_token_new("object", NULL, pprint);
TT_ADJ = h_allocate_token_new("adjective", NULL, pprint);
TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
* semantic actions
* Normally these would be more interesting, but for this example, we just wrap
* our tokens in their intended types.
HParsedToken *act_subj(const HParseResult *p, void *u) {
return H_MAKE(SUBJ, (void *)p->ast);
HParsedToken *act_pred(const HParseResult *p, void *u) {
return H_MAKE(PRED, (void *)p->ast);
HParsedToken *act_obj(const HParseResult *p, void *u) {
return H_MAKE(OBJ, (void *)p->ast);
HParsedToken *act_adj(const HParseResult *p, void *u) {
return H_MAKE(ADJ, (void *)p->ast);
HParsedToken *act_advc(const HParseResult *p, void *u) {
return H_MAKE(ADVC, (void *)p->ast);
* grammar
HParser *
/* words */
#define W(X) h_whitespace(h_literal((const uint8_t *)(#X)))
H_RULE(art, h_choice(W(a), W(the), NULL));
H_RULE(noun, h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
W(bear), W(fence), W(tree), W(car), W(cow), NULL));
H_RULE(verb, h_choice(W(eats), W(jumps), W(falls), NULL));
H_ARULE(adj, h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
W(magenta), W(yellow), W(black), W(brown), NULL));
H_RULE(adverb, h_choice(W(with), W(over), W(after), NULL));
#undef W
/* phrases */
H_RULE(nphrase, h_sequence(art, h_many(adj), noun, NULL));
/* sentence structure */
H_ARULE(subj, nphrase);
H_ARULE(pred, verb);
H_ARULE(obj, nphrase);
H_ARULE(advc, h_sequence(adverb, nphrase, NULL));
H_RULE(sentnc, h_sequence(subj, pred,
h_optional(obj), h_optional(advc), NULL));
return sentnc;
* main routine: read, parse, print
* input e.g.:
* "the quick brown fox jumps the fence with a cyan lion"
#include <stdio.h>
#include <inttypes.h>
main(int argc, char **argv)
uint8_t input[1024];
size_t sz;
const HParser *parser;
const HParseResult *result;
parser = build_parser();
sz = fread(input, 1, sizeof(input), stdin);
if (!feof(stdin)) {
fprintf(stderr, "too much input\n");
return 1;
result = h_parse(parser, input, sz);
if (!result) {
fprintf(stderr, "no parse\n");
return 1;
h_pprintln(stdout, result->ast);
fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
result->bit_length / 8, sz);
return 0;
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os.path
Import('env testruns')
# Bump this if you break binary compatibility (e.g. renumber backends)
hammer_shlib_version = "1.0.0"
dist_headers = [
parsers_headers = [
backends_headers = [
parsers = ['parsers/%s.c'%s for s in
......@@ -26,6 +35,7 @@ parsers = ['parsers/%s.c'%s for s in
......@@ -48,10 +58,11 @@ parsers = ['parsers/%s.c'%s for s in
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'params']]
misc_hammer_parts = [
......@@ -63,10 +74,18 @@ misc_hammer_parts = [
if env['PLATFORM'] == 'win32':
misc_hammer_parts += [
misc_hammer_parts += ['platform_bsdlike.c']
ctests = ['t_benchmark.c',
......@@ -74,27 +93,62 @@ ctests = ['t_benchmark.c',
static_library_name = 'hammer'
if env['PLATFORM'] == 'win32':
# FIXME(windows): symbols in hammer are not exported yet, a shared lib would be useless
# prevent collision between .lib from dll and .lib for static lib
static_library_name = 'hammer_s'
if 'GPROF' in env and env['GPROF'] == 1:
# Disable the shared library (it won't work with gprof) and rename the static one
static_library_name = 'hammer_pg'
# Markers for later
libhammer_static = None
libhammer_shared = None
if build_shared_library:
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_static = env.StaticLibrary('hammer', parsers + backends + misc_hammer_parts)
Default(libhammer_shared, libhammer_static)
if libhammer_shared is not None:
Default(libhammer_shared, libhammer_static)
env.Install('$libpath', [libhammer_static, libhammer_shared])
env.Install('$libpath', [libhammer_static])
env.Install("$libpath", [libhammer_static, libhammer_shared])
env.Install("$incpath", dist_headers)
env.Install("$parsersincpath", parsers_headers)
env.Install("$backendsincpath", backends_headers)
env.Install("$pkgconfigpath", "../../../libhammer.pc")
env.Install('$incpath', dist_headers)
env.Install('$parsersincpath', parsers_headers)
env.Install('$backendsincpath', backends_headers)
env.Install('$pkgconfigpath', '../../../libhammer.pc')
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
testenv.Append(LIBS=['hammer'], LIBPATH=['.'])
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS="--coverage" if testenv.GetOption("coverage") else None)
ctest = Alias('testc', [ctestexec], "".join(["env LD_LIBRARY_PATH=", os.path.dirname(ctestexec[0].path), " ", ctestexec[0].path]))
if GetOption('with_tests'):
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
if libhammer_shared is not None:
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS='--coverage' if testenv.GetOption('coverage') else None)
ctest = Alias('testc', [ctestexec], ''.join(['env LD_LIBRARY_PATH=', os.path.dirname(ctestexec[0].path), ' ', ctestexec[0].path]))
Export("libhammer_static libhammer_shared")
if libhammer_shared is not None:
Export('libhammer_static libhammer_shared')
for b in env['bindings']:
env.SConscript(["bindings/%s/SConscript" % b])
env.SConscript(['bindings/%s/SConscript' % b])
......@@ -18,6 +18,7 @@
#include <string.h>
#include <stdint.h>
#include <sys/types.h>
#include <setjmp.h>
#include "hammer.h"
#include "internal.h"
......@@ -28,32 +29,56 @@ struct arena_link {
// For efficiency, we should probably allocate the arena links in
// their own slice, and link to a block directly. That can be
// implemented later, though, with no change in interface.
struct arena_link *next; // It is crucial that this be the first item; so that
// any arena link can be casted to struct arena_link**.
struct arena_link *next;
size_t free;
size_t used;
uint8_t rest[];
} ;
struct HArena_ {
struct arena_link *head;
struct HAllocator_ *mm__;
/* does mm__ zero blocks for us? */
bool malloc_zeros;
size_t block_size;
size_t used;
size_t wasted;
size_t mm_malloc_count, mm_malloc_bytes;
size_t memset_count, memset_bytes;
size_t arena_malloc_count, arena_malloc_bytes;
size_t arena_su_malloc_count, arena_su_malloc_bytes;
size_t arena_si_malloc_count, arena_si_malloc_bytes;
size_t arena_lu_malloc_count, arena_lu_malloc_bytes;
size_t arena_li_malloc_count, arena_li_malloc_bytes;
jmp_buf *except;
static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
void* h_alloc(HAllocator* mm__, size_t size) {
void *p = mm__->alloc(mm__, size);
h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size);
return p;
void* h_realloc(HAllocator* mm__, void* ptr, size_t size) {
void *p = mm__->realloc(mm__, ptr, size);
h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size);
return p;
HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
if (block_size == 0)
block_size = 4096;
struct HArena_ *ret = h_new(struct HArena_, 1);
struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size);
if (!link) {
// TODO: error-reporting -- let user know that arena link couldn't be allocated
return NULL;
memset(link, 0, sizeof(struct arena_link) + block_size);
struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size);
assert(ret != NULL);
assert(link != NULL);
link->free = block_size;
link->used = 0;
link->next = NULL;
......@@ -61,49 +86,154 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
ret->block_size = block_size;
ret->used = 0;
ret->mm__ = mm__;
ret->mm_malloc_count = 2;
ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size;
ret->memset_count = 0;
ret->memset_bytes = 0;
ret->arena_malloc_count = ret->arena_malloc_bytes = 0;
ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0;
ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0;
ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0;
ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0;
/* XXX provide a mechanism to indicate mm__ returns zeroed blocks */
ret->malloc_zeros = false;
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
ret->except = NULL;
return ret;
void* h_arena_malloc(HArena *arena, size_t size) {
void h_arena_set_except(HArena *arena, jmp_buf *except)
arena->except = except;
static void *alloc_block(HArena *arena, size_t size)
void *block = arena->mm__->alloc(arena->mm__, size);
if (!block) {
if (arena->except)
longjmp(*arena->except, 1);
h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size);
return block;
void * h_arena_malloc_noinit(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, false);
void * h_arena_malloc(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, true);
static void * h_arena_malloc_raw(HArena *arena, size_t size,
bool need_zero) {
struct arena_link *link = NULL;
void *ret = NULL;
if (size <= arena->head->free) {
// fast path..
void* ret = arena->head->rest + arena->head->used;
/* fast path.. */
ret = arena->head->rest + arena->head->used;
arena->used += size;
arena->wasted -= size;
arena->head->used += size;
arena->head->free -= size;
return ret;
arena->arena_malloc_bytes += size;
if (need_zero) {
arena->arena_si_malloc_bytes += size;
} else {
arena->arena_su_malloc_bytes += size;
} else if (size > arena->block_size) {
// We need a new, dedicated block for it, because it won't fit in a standard sized one.
// This involves some annoying casting...
* We need a new, dedicated block for it, because it won't fit in a
* standard sized one.
* We used to do a silly casting dance to treat blocks like this
* as special cases and make the used/free fields part of the allocated
* block, but the old code was not really proper portable C and depended
* on a bunch of implementation-specific behavior. We could have done it
* better with a union in struct arena_link, but the memory savings is
* only 0.39% for a 64-bit machine, a 4096-byte block size and all
* large allocations *only just one byte* over the block size, so I
* question the utility of it. We do still slip the large block in
* one position behind the list head so it doesn't cut off a partially
* filled list head.
* -- andrea
link = alloc_block(arena, size + sizeof(struct arena_link));
assert(link != NULL);
arena->used += size;
arena->wasted += sizeof(struct arena_link*);
void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*));
if (!link) {
// TODO: error-reporting -- let user know that arena link couldn't be allocated
return NULL;
arena->wasted += sizeof(struct arena_link);
link->used = size;
link->free = 0;
link->next = arena->head->next;
arena->head->next = link;
ret = link->rest;
arena->arena_malloc_bytes += size;
if (need_zero) {
arena->arena_li_malloc_bytes += size;
} else {
arena->arena_lu_malloc_bytes += size;
memset(link, 0, size + sizeof(struct arena_link*));
*(struct arena_link**)link = arena->head->next;
arena->head->next = (struct arena_link*)link;
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
} else {
// we just need to allocate an ordinary new block.
struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size);
if (!link) {
// TODO: error-reporting -- let user know that arena link couldn't be allocated
return NULL;
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
/* we just need to allocate an ordinary new block. */
link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
assert(link != NULL);
arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size;
link->free = arena->block_size - size;
link->used = size;
link->next = arena->head;
arena->head = link;
arena->used += size;
arena->wasted += sizeof(struct arena_link) + arena->block_size - size;
return link->rest;
ret = link->rest;
arena->arena_malloc_bytes += size;
if (need_zero) {
arena->arena_si_malloc_bytes += size;
} else {
arena->arena_su_malloc_bytes += size;
* Zeroize if necessary
if (need_zero && !(arena->malloc_zeros)) {
memset(ret, 0, size);
arena->memset_bytes += size;
return ret;
void h_arena_free(HArena *arena, void* ptr) {
......@@ -127,4 +257,49 @@ void h_delete_arena(HArena *arena) {
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
stats->used = arena->used;
stats->wasted = arena->wasted;
stats->mm_malloc_count = arena->mm_malloc_count;
stats->mm_malloc_bytes = arena->mm_malloc_bytes;
stats->memset_count = arena->memset_count;
stats->memset_bytes = arena->memset_bytes;
stats->arena_malloc_count = arena->arena_malloc_count;
stats->arena_malloc_bytes = arena->arena_malloc_bytes;
stats->arena_su_malloc_count = arena->arena_su_malloc_count;
stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes;
stats->arena_si_malloc_count = arena->arena_si_malloc_count;
stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes;
stats->arena_lu_malloc_count = arena->arena_lu_malloc_count;
stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes;
stats->arena_li_malloc_count = arena->arena_li_malloc_count;
stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
void* h_arena_realloc(HArena *arena, void* ptr, size_t n) {
struct arena_link *link;
void* ret;
size_t ncopy;
// XXX this is really wasteful, but maybe better than nothing?
// first, we walk the blocks to find our ptr. since we don't know how large
// the original allocation was, we must always make a new one and copy as
// much data from the old block as there could have been.
for (link = arena->head; link; link = link->next) {
if (ptr >= (void *)link->rest && ptr <= (void *)link->rest + link->used)
break; /* found it */
assert(link != NULL);
ncopy = (void *)link->rest + link->used - ptr;
if (n < ncopy)
ncopy = n;
ret = h_arena_malloc_noinit(arena, n);
assert(ret != NULL);
memcpy(ret, ptr, ncopy);
h_arena_free(arena, ptr);
return ret;
......@@ -18,22 +18,12 @@
#include <sys/types.h>
#include <setjmp.h>
#ifdef __cplusplus
extern "C" {
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size);
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
void (*free)(struct HAllocator_* allocator, void* ptr);
} HAllocator;
typedef struct HArena_ HArena ; // hidden implementation
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
#if defined __llvm__
# if __has_attribute(malloc)
# define ATTR_MALLOC(n) __attribute__((malloc))
......@@ -48,13 +38,52 @@ HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for def
# define ATTR_MALLOC(n)
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size);
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
void (*free)(struct HAllocator_* allocator, void* ptr);
} HAllocator;
void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2);
void* h_realloc(HAllocator* allocator, void* ptr, size_t size);
typedef struct HArena_ HArena ; // hidden implementation
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_realloc(HArena *arena, void* ptr, size_t count);
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
void h_arena_set_except(HArena *arena, jmp_buf *except);
typedef struct {
size_t used;
size_t wasted;
size_t mm_malloc_count;
size_t mm_malloc_bytes;
size_t memset_count;
size_t memset_bytes;
size_t arena_malloc_count;
size_t arena_malloc_bytes;
/* small, uninited */
size_t arena_su_malloc_count;
size_t arena_su_malloc_bytes;
/* small, inited */
size_t arena_si_malloc_count;
size_t arena_si_malloc_bytes;
/* large, uninited */
size_t arena_lu_malloc_count;
size_t arena_lu_malloc_bytes;
/* large, inited */
size_t arena_li_malloc_count;
size_t arena_li_malloc_bytes;
} HArenaStats;
void h_allocator_stats(HArena *arena, HArenaStats *stats);