Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Commits on Source (283)
# generated files
*.o
*~
*.os
*.so
*.a
*.gem
*.pyc
*.class
*.so
jni/com*.h
src/test_suite
lib/hush
libhammer.pc
build/
examples/dns
examples/base64
examples/base64_sem1
examples/base64_sem2
TAGS
*.swp
*.swo
jni/com*.h
src/test_suite
# coverage and profiling stuff
*.gcov
*.gcda
*.gcno
gmon.out
# editor leftovers
*~
*.sw?
\#*
# misc
lib/hush
TAGS
docs/milestone2.dot.pdf
*.dot.pdf
Session.vim
*.gcov
cscope.out
build/
libhammer.pc
.sconsign.dblite
*.os
*.pyc
*.gem
sudo: required
dist: trusty
language: c
compiler:
- gcc
......@@ -8,35 +10,43 @@ matrix:
include:
- compiler: gcc
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-1.9.3-p551
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-1.9.3-p484
rvm: ruby-1.9.3-p551
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.0.0-p353
rvm: ruby-2.0.0-p647
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.0.0-p353
rvm: ruby-2.0.0-p647
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.1.0
rvm: ruby-2.1.7
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.1.0
rvm: ruby-2.1.7
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: ruby
rvm: ruby-2.2.3
env: BINDINGS=ruby
- compiler: clang
language: ruby
rvm: ruby-2.2.3
env: BINDINGS=ruby CC=clang
- compiler: gcc
language: python
python: "2.7"
python: "2.7.10"
env: BINDINGS=python
- compiler: clang
language: python
python: "2.7"
python: "2.7.10"
env: BINDINGS=python CC=clang
- compiler: gcc
language: perl
......@@ -87,17 +97,16 @@ matrix:
- compiler: gcc
language: cpp
env: BINDINGS=cpp
- compiler: gcc
- compiler: clang
language: cpp
env: BINDINGS=cpp CC=clang
before_install:
- sudo apt-get update -qq
- sudo apt-get install -y lcov
- gem install coveralls-lcov
- if [ "$BINDINGS" != "none" ]; then sudo apt-get install -qq swig; fi
- if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -y -qq mono-devel mono-mcs nunit nunit-console; fi
- if [ "$BINDINGS" != "none" ]; then sudo sh -c 'echo "deb http://archive.ubuntu.com/ubuntu trusty-backports main restricted universe multiverse" >> /etc/apt/sources.list'; sudo apt-get update -qq; sudo apt-get install -yqq swig3.0/trusty-backports; fi
- if [ "$BINDINGS" == "python" ]; then sudo apt-get install -yqq python-dev; fi
- if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -yqq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi
install: true
before_script:
- if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi
......
......@@ -6,12 +6,13 @@ internal anaphoric macros use. Chances are that if you use these names
for other things, you're gonna have a bad time.
In particular, these names, and the macros that use them, are:
- state:
Used by a_new and company. Should be an HParseState*
- mm__:
Used by h_new and h_free. Should be an HAllocator*
- stk__:
Used in desugaring. Should be an HCFStack*
- `state`:
Used by `a_new` and company. Should be an `HParseState*`.
- `mm__`:
Used by `h_new` and `h_free`. Should be an `HAllocator*`.
- `stk__`:
Used in desugaring. Should be an `HCFStack*`.
Function suffixes
=================
......@@ -21,9 +22,9 @@ parameters or parameters in multiple different forms. For example,
often, you have a global memory manager that is used for an entire
program. In this case, you can leave off the memory manager arguments
off, letting them be implicit instead. Further, it is often convenient
to pass an array or va_list to a function instead of listing the
arguments inline (eg, for wrapping a function, generating the
arguments programattically, or writing bindings for another language.
to pass an array or `va_list` to a function instead of listing the
arguments inline (e.g., for wrapping a function, generating the
arguments programatically, or writing bindings for another language.)
Because we have found that most variants fall into a fairly small set
of forms, and to minimize the amount of API calls that users need to
......@@ -32,21 +33,22 @@ variants: the function name is followed by two underscores and a set
of single-character "flags" indicating what optional features that
particular variant has (in alphabetical order, of course):
__a: takes variadic arguments as a void*[] (not implemented yet, but will be soon.
__m: takes a memory manager as the first argument, to override the system memory manager.
__v: Takes the variadic argument list as a va_list
- `__a`: takes variadic arguments as a `void*[]` (not implemented yet,
but will be soon.)
- `__m`: takes a memory manager as the first argument, to override the
system memory manager.
- `__v`: Takes the variadic argument list as a `va_list`.
Memory managers
===============
If the __m function variants are used or system_allocator is
If the `__m` function variants are used or `system_allocator` is
overridden, there come some difficult questions to answer,
particularly regarding the behavior when multiple memory managers are
combined. As a general rule of thumb (exceptions will be explicitly
documented), assume that
If you have a function f, which is passed a memory manager m and
> If you have a function f, which is passed a memory manager m and
returns a value r, any function that uses r as a parameter must
also be told to use m as a memory manager.
......@@ -57,7 +59,7 @@ Language-independent test suite
There is a language-independent representation of the Hammer test
suite in `lib/test-suite`. This is intended to be used with the
tsparser.pl prolog library, along with a language-specific frontend.
tsparser.pl Prolog library, along with a language-specific frontend.
Only the C# frontend exists so far; to regenerate the test suites using it, run
......
......@@ -3,7 +3,7 @@
# and kick off a recursive make
# Also, "make src/all" turns into "make -C src all"
SUBDIRS = src examples jni
SUBDIRS = src examples src/bindings/jni
include config.mk
TOPLEVEL=.
......
Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables.
Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer is written in C, but provides bindings for other languages. If you don't see a language you're interested in on the list, just ask.
Hammer currently builds under Linux and OS X. (Windows is coming.)
Hammer currently builds under Linux, OS X, and Windows.
[![Build Status](https://travis-ci.org/UpstandingHackers/hammer.png)](https://travis-ci.org/UpstandingHackers/hammer)
Features
========
* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle
* Thread-safe, reentrant
* Thread-safe, reentrant (for most purposes; see Known Issues for details)
* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar
* Parsing backends:
* Packrat parsing
......@@ -18,13 +17,14 @@ Features
* Regular expressions
* Language bindings:
* C++
* Java (not currently building; give us a few days)
* Java (incomplete)
* Python
* Ruby
* Perl
* [Go](https://github.com/prevoty/hammer)
* PHP
* .NET
* .NET
* Lua (landing soon!)
Installing
==========
......@@ -35,18 +35,24 @@ Installing
* pkg-config (for `scons test`)
* glib-2.0 (>= 2.29) (for `scons test`)
* glib-2.0-dev (for `scons test`)
* [swig](http://swig.org/) (for Python/Perl/PHP bindings; Perl requires >= 2.0.8)
* python2.7-dev (for Python bindings)
* [swig](http://swig.org/) (for Python/Perl/PHP bindings; Perl requires >= 2.0.8; Python 3.x requires >= 3.0.0)
* python2.7-dev (for Python 2 bindings)
* python3-dev (>= 3.5) (for Python 3 bindings)
* a JDK (for Java bindings)
* a working [phpenv](https://github.com/CHH/phpenv) configuration (for PHP bindings)
* [Ruby](https://www.ruby-lang.org/) >= 1.9.3 and bundler, for the Ruby bindings
* mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings)
* [nunit](http://www.nunit.org/) (for testing .NET bindings)
To build, type `scons`. To run the built-in test suite, type `scons test`. For a debug build, add `--variant=debug`.
To build, type `scons`.
To run the built-in test suite, type `scons test`.
To avoid the test dependencies, add `--no-tests`.
For a debug build, add `--variant=debug`.
To build bindings, pass a "bindings" argument to scons, e.g. `scons bindings=python`. `scons bindings=python test` will build Python bindings and run tests for both C and Python. `--variant=debug` is valid here too. You can build more than one set of bindings at a time; just separate them with commas, e.g. `scons bindings=python,perl`.
For Python, pass `python=python<X>.<Y>`, e. g. `scons bindings=python python=python2.7` or `scons bindings=python python=python3.5`.
For Java, if jni.h and jni_md.h aren't already somewhere on your include path, prepend
`C_INCLUDE_PATH=/path/to/jdk/include` to that.
......@@ -73,16 +79,18 @@ The `examples/` directory contains some simple examples, currently including:
Known Issues
============
The Python bindings only work with Python 2.7. SCons doesn't work with Python 3, and PyCapsule isn't available in 2.6 and below, so 2.7 is all you get. Sorry about that.
The Python bindings work with Python 2.7, and Python 3.5+.
The requirement for SWIG >= 2.0.8 for Perl bindings is due to a [known bug](http://sourceforge.net/p/swig/patches/324/) in SWIG. [ppa:dns/irc](https://launchpad.net/~dns/+archive/irc) has backports of SWIG 2.0.8 for Ubuntu versions 10.04-12.10; you can also [build SWIG from source](http://www.swig.org/download.html).
The .NET bindings are for Mono 3.0.6 and greater. If you're on a Debian-based distro that only provides Mono 2 (e.g., Ubuntu 12.04), there are backports for [3.0.x](http://www.meebey.net/posts/mono_3.0_preview_debian_ubuntu_packages/), and a [3.2.x PPA](https://launchpad.net/~directhex/+archive/monoxide) maintained by the Mono team.
The regular expression backend is potentially not thread-safe (thanks to Martin Murray for pointing this out). A full rewrite of this backend is on the roadmap already due to some unexpected nondeterminism in the current implementation; we plan to fix this problem in that rewrite.
Community
=========
Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing.
Mailing list, IRC, and potentially other channels to come.
Contact
=======
You can also email us at <hammer@upstandinghackers.com>.
Also to be updated soon.
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os
import os.path
import platform
import subprocess
import sys
default_install_dir='/usr/local'
......@@ -9,14 +13,26 @@ if platform.system() == 'Windows':
default_install_dir = 'build' # no obvious place for installation on Windows
vars = Variables(None, ARGUMENTS)
vars.Add(PathVariable('DESTDIR', 'Root directory to install in (useful for packaging scripts)', None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', 'Where to install in the FHS', default_install_dir, PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'perl', 'php', 'python', 'ruby']))
vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate))
vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept))
vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['cpp', 'dotnet', 'jni', 'perl', 'php', 'python', 'ruby']))
vars.Add('python', 'Python interpreter', 'python')
tools = ['default', 'scanreplace']
if 'dotnet' in ARGUMENTS.get('bindings', []):
tools.append('csharp/mono')
# add the clang tool if necessary
if os.getenv('CC') == 'clang' or platform.system() == 'Darwin':
tools.append('clang')
else:
# try to detect if cc happens to be clang by inspecting --version
cc = os.getenv('CC') or 'cc'
ver = subprocess.run([cc, '--version'], capture_output=True).stdout
if b'clang' in ver.split():
tools.append('clang')
os.environ['CC'] = cc # make sure we call it as we saw it
envvars = {'PATH' : os.environ['PATH']}
if 'PKG_CONFIG_PATH' in os.environ:
envvars['PKG_CONFIG_PATH'] = os.environ['PKG_CONFIG_PATH']
......@@ -43,9 +59,9 @@ env['prefix'] = os.path.abspath(env['prefix'])
if 'DESTDIR' in env:
env['DESTDIR'] = os.path.abspath(env['DESTDIR'])
if rel_prefix:
print >>sys.stderr, '--!!-- You used a relative prefix with a DESTDIR. This is probably not what you'
print >>sys.stderr, '--!!-- you want; files will be installed in'
print >>sys.stderr, '--!!-- %s' % (calcInstallPath('$prefix'),)
print('--!!-- You used a relative prefix with a DESTDIR. This is probably not what you', file=sys.stderr)
print('--!!-- you want; files will be installed in', file=sys.stderr)
print('--!!-- %s' % (calcInstallPath('$prefix'),), file=sys.stderr)
env['libpath'] = calcInstallPath('$prefix', 'lib')
......@@ -69,24 +85,33 @@ AddOption('--coverage',
action='store_true',
help='Build with coverage instrumentation')
AddOption('--force-debug',
dest='force_debug',
default=False,
action='store_true',
help='Build with debug symbols, even in the opt variant')
AddOption('--gprof',
dest='gprof',
default=False,
action="store_true",
help='Build with profiling instrumentation for gprof')
AddOption('--in-place',
dest='in_place',
default=False,
action='store_true',
help='Build in-place, rather than in the build/<variant> tree')
AddOption('--tests',
AddOption('--no-tests',
dest='with_tests',
default=env['PLATFORM'] != 'win32',
action='store_true',
help='Build tests')
action='store_false',
help='Do not build tests')
env['CC'] = os.getenv('CC') or env['CC']
env['CXX'] = os.getenv('CXX') or env['CXX']
if os.getenv('CC') == 'clang' or env['PLATFORM'] == 'darwin':
env.Replace(CC='clang',
CXX='clang++')
env['CFLAGS'] = os.getenv('CFLAGS') or env['CFLAGS']
# Language standard and warnings
if env['CC'] == 'cl':
......@@ -102,7 +127,15 @@ if env['CC'] == 'cl':
]
)
else:
env.MergeFlags('-std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
if env['PLATFORM'] == 'darwin':
# It's reported -D_POSIX_C_SOURCE breaks the Mac OS build; I think we
# may need _DARWIN_C_SOURCE instead/in addition to, but let's wait to
# have access to a Mac to test/repo
env.MergeFlags('-std=c99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
else:
# Using -D_POSIX_C_SOURCE=200809L here, not on an ad-hoc basis when,
# #including, is important
env.MergeFlags('-std=c99 -D_POSIX_C_SOURCE=200809L -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -Wno-unused-variable')
# Linker options
if env['PLATFORM'] == 'darwin':
......@@ -116,14 +149,31 @@ else:
env.MergeFlags('-lrt')
if GetOption('coverage'):
env.Append(CFLAGS=['--coverage'],
CXXFLAGS=['--coverage'],
LDFLAGS=['--coverage'])
env.Append(CCFLAGS=['--coverage'],
LDFLAGS=['--coverage'],
LINKFLAGS=['--coverage'])
if env['CC'] == 'gcc':
env.Append(LIBS=['gcov'])
else:
env.ParseConfig('llvm-config --ldflags')
if GetOption('force_debug'):
if env['CC'] == 'cl':
env.Append(CCFLAGS=['/Z7'])
else:
env.Append(CCFLAGS=['-g'])
if GetOption('gprof'):
if env['CC'] == 'gcc' and env['CXX'] == 'g++':
env.Append(CCFLAGS=['-pg'],
LDFLAGS=['-pg'],
LINKFLAGS=['-pg'])
env['GPROF'] = 1
else:
print("Can only use gprof with gcc")
Exit(1)
dbg = env.Clone(VARIANT='debug')
if env['CC'] == 'cl':
dbg.Append(CCFLAGS=['/Z7'])
......
......@@ -77,11 +77,13 @@ Benchmarking for parsing backends -- determine empirically which backend will be
11
12 HParseResult *result = h_parse(hello_parser, input, inputsize);
13 if(result) {
14 printf("yay!\n");
14 printf("yay!\\n");
15 } else {
16 printf("boo!\n");
16 printf("boo!\\n");
17 }
18 }
18 h_parse_result_free(result);
19 return 0 == result;
20 }
.fi
.SH "AUTHOR"
.sp
......
from __future__ import absolute_import, division, print_function
Import('env')
example = env.Clone()
example.Append(LIBS="hammer", LIBPATH="../src")
if 'GPROF' in env and env['GPROF'] == 1:
hammer_lib_name="hammer_pg"
else:
hammer_lib_name="hammer"
example.Append(LIBS=hammer_lib_name, LIBPATH="../src")
dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
ttuser = example.Program('ttuser', 'ttuser.c')
base64 = example.Program('base64', 'base64.c')
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
ties = example.Program('ties', ['ties.c', 'grammar.c'])
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
\ No newline at end of file
env.Alias("examples", [dns, ttuser, base64, base64_sem1, base64_sem2, ties])
......@@ -45,7 +45,7 @@ int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParseResult *result;
HParseResult *result;
init_parser();
......@@ -57,6 +57,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
......@@ -10,7 +10,7 @@
# base64_sem1.py and base64_sem2.py for examples how to attach appropriate
# semantic actions to the grammar.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import sys
......@@ -23,13 +23,13 @@ def init_parser():
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
# AUX.
plus = h.ch('+')
slash = h.ch('/')
equals = h.ch('=')
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.ch(b'=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_('AEIMQUYcgkosw048')
bsfdig_2bit = h.in_('AQgw')
bsfdig_4bit = h.in_(b'AEIMQUYcgkosw048')
bsfdig_2bit = h.in_(b'AQgw')
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
......
......@@ -149,12 +149,13 @@ HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -166,6 +167,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
......@@ -13,7 +13,7 @@
# transform the parse tree in small steps in a bottom-up fashion. Compare
# base64_sem2.py for an alternative approach using a single top-level action.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import functools
import sys
......@@ -26,7 +26,7 @@ import hammer as h
def act_bsfdig(p, user_data=None):
# FIXME See the note in init_parser()
c = p if isinstance(p, (int, long)) else ord(p)
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
......@@ -34,9 +34,9 @@ def act_bsfdig(p, user_data=None):
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == '+':
elif c == b'+':
return 62
elif c == '/':
elif c == b'/':
return 63
else:
raise ValueError
......@@ -65,14 +65,14 @@ def act_base64_n(n, p, user_data=None):
x = 0
bits = 0
for i in xrange(0, n+1):
for i in range(0, n+1):
x <<= 6
x |= p[i] or 0
bits += 6
x >>= bits % 8 # align, i.e. cut off extra bits
for i in xrange(n):
for i in range(n):
item = x & 0xFF
res[n-1-i] = item # output the last byte and
......@@ -118,16 +118,16 @@ def init_parser():
# literals, or integers
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(" \t\n\r\f\v")
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus = h.ch('+')
slash = h.ch('/')
equals = h.action(h.ch('='), act_equals)
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.action(h.ch(b'='), act_equals)
bsfdig = h.action(h.choice(alpha, digit, plus, slash), act_bsfdig)
bsfdig_4bit = h.action(h.in_("AEIMQUYcgkosw048"), act_bsfdig_4bit)
bsfdig_2bit = h.action(h.in_("AQgw"), act_bsfdig_2bit)
bsfdig_4bit = h.action(h.in_(b"AEIMQUYcgkosw048"), act_bsfdig_4bit)
bsfdig_2bit = h.action(h.in_(b"AQgw"), act_bsfdig_2bit)
base64_3 = h.action(h.repeat_n(bsfdig, 4), act_base64_3)
base64_2 = h.action(h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals),
act_base64_2)
......
......@@ -153,12 +153,13 @@ const HParser *init_parser(void)
#include <stdio.h>
const HParser *parser; // Allocated statically to suppress leak warnings
int main(int argc, char **argv)
{
uint8_t input[102400];
size_t inputsize;
const HParser *parser;
const HParseResult *result;
HParseResult *result;
parser = init_parser();
......@@ -170,6 +171,7 @@ int main(int argc, char **argv)
if(result) {
fprintf(stderr, "parsed=%" PRId64 " bytes\n", result->bit_length/8);
h_pprint(stdout, result->ast, 0, 0);
h_parse_result_free(result);
return 0;
} else {
return 1;
......
......@@ -14,7 +14,7 @@
# for an alternative approach using a fine-grained piece-by-piece
# transformation.
from __future__ import print_function
from __future__ import absolute_import, division, print_function
import functools
import sys
......@@ -28,7 +28,7 @@ import hammer as h
def bsfdig_value(p):
"""Return the numeric value of a parsed base64 digit.
"""
c = p if isinstance(p, (int, long)) else ord(p)
c = p if isinstance(p, h.INTEGER_TYPES) else ord(p)
if c:
if 0x41 <= c <= 0x5A: # A-Z
return c - 0x41
......@@ -36,9 +36,9 @@ def bsfdig_value(p):
return c - 0x61 + 26
elif 0x30 <= c <= 0x39: # 0-9
return c - 0x30 + 52
elif c == '+':
elif c == b'+':
return 62
elif c == '/':
elif c == b'/':
return 63
return 0
......@@ -109,16 +109,16 @@ def init_parser():
# CORE
digit = h.ch_range(0x30, 0x39)
alpha = h.choice(h.ch_range(0x41, 0x5a), h.ch_range(0x61, 0x7a))
space = h.in_(" \t\n\r\f\v")
space = h.in_(b" \t\n\r\f\v")
# AUX.
plus = h.ch('+')
slash = h.ch('/')
equals = h.ch('=')
plus = h.ch(b'+')
slash = h.ch(b'/')
equals = h.ch(b'=')
bsfdig = h.choice(alpha, digit, plus, slash)
bsfdig_4bit = h.in_("AEIMQUYcgkosw048")
bsfdig_2bit = h.in_("AQgw")
bsfdig_4bit = h.in_(b"AEIMQUYcgkosw048")
bsfdig_2bit = h.in_(b"AQgw")
base64_3 = h.repeat_n(bsfdig, 4)
base64_2 = h.sequence(bsfdig, bsfdig, bsfdig_4bit, equals)
base64_1 = h.sequence(bsfdig, bsfdig_2bit, equals, equals)
......
/*
* Example parser that demonstrates the use of user-defined token types.
*
* Note the custom printer function that hooks into h_pprint().
*/
#include "../src/hammer.h"
#include "../src/glue.h"
/*
* custom tokens
*/
HTokenType TT_SUBJ, TT_PRED, TT_OBJ, TT_ADJ, TT_ADVC;
void
pprint(FILE *stream, const HParsedToken *tok, int indent, int delta)
{
/*
* Pretty-printer rules:
*
* - Output 'indent' spaces after every newline you produce.
* - Do not add indent on the first line of output.
* - Do not add a trailing newline.
* - Indent sub-objects by adding 'delta' to 'indent'.
*/
if (((HParsedToken *)tok->user)->token_type == TT_SEQUENCE)
fprintf(stream, "\n%*s", indent, "");
h_pprint(stream, tok->user, indent, delta);
}
/* XXX define umamb_sub as well */
void
init(void)
{
TT_SUBJ = h_allocate_token_new("subject", NULL, pprint);
TT_PRED = h_allocate_token_new("predicate", NULL, pprint);
TT_OBJ = h_allocate_token_new("object", NULL, pprint);
TT_ADJ = h_allocate_token_new("adjective", NULL, pprint);
TT_ADVC = h_allocate_token_new("adverbial clause", NULL, pprint);
}
/*
* semantic actions
*
* Normally these would be more interesting, but for this example, we just wrap
* our tokens in their intended types.
*/
HParsedToken *act_subj(const HParseResult *p, void *u) {
return H_MAKE(SUBJ, (void *)p->ast);
}
HParsedToken *act_pred(const HParseResult *p, void *u) {
return H_MAKE(PRED, (void *)p->ast);
}
HParsedToken *act_obj(const HParseResult *p, void *u) {
return H_MAKE(OBJ, (void *)p->ast);
}
HParsedToken *act_adj(const HParseResult *p, void *u) {
return H_MAKE(ADJ, (void *)p->ast);
}
HParsedToken *act_advc(const HParseResult *p, void *u) {
return H_MAKE(ADVC, (void *)p->ast);
}
/*
* grammar
*/
HParser *
build_parser(void)
{
/* words */
#define W(X) h_whitespace(h_literal((const uint8_t *)(#X)))
H_RULE(art, h_choice(W(a), W(the), NULL));
H_RULE(noun, h_choice(W(cat), W(dog), W(fox), W(tiger), W(lion),
W(bear), W(fence), W(tree), W(car), W(cow), NULL));
H_RULE(verb, h_choice(W(eats), W(jumps), W(falls), NULL));
H_ARULE(adj, h_choice(W(quick), W(slow), W(happy), W(lazy), W(cyan),
W(magenta), W(yellow), W(black), W(brown), NULL));
H_RULE(adverb, h_choice(W(with), W(over), W(after), NULL));
#undef W
/* phrases */
H_RULE(nphrase, h_sequence(art, h_many(adj), noun, NULL));
/* sentence structure */
H_ARULE(subj, nphrase);
H_ARULE(pred, verb);
H_ARULE(obj, nphrase);
H_ARULE(advc, h_sequence(adverb, nphrase, NULL));
H_RULE(sentnc, h_sequence(subj, pred,
h_optional(obj), h_optional(advc), NULL));
return sentnc;
}
/*
* main routine: read, parse, print
*
* input e.g.:
* "the quick brown fox jumps the fence with a cyan lion"
*/
#include <stdio.h>
#include <inttypes.h>
int
main(int argc, char **argv)
{
uint8_t input[1024];
size_t sz;
const HParser *parser;
const HParseResult *result;
init();
parser = build_parser();
sz = fread(input, 1, sizeof(input), stdin);
if (!feof(stdin)) {
fprintf(stderr, "too much input\n");
return 1;
}
result = h_parse(parser, input, sz);
if (!result) {
fprintf(stderr, "no parse\n");
return 1;
}
h_pprintln(stdout, result->ast);
fprintf(stderr, "consumed %" PRId64 "/%zu bytes.\n",
result->bit_length / 8, sz);
return 0;
}
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os.path
Import('env testruns')
# Bump this if you break binary compatibility (e.g. renumber backends)
hammer_shlib_version = "1.0.0"
dist_headers = [
'hammer.h',
'allocator.h',
......@@ -18,7 +24,9 @@ parsers_headers = [
backends_headers = [
'backends/regex.h',
'backends/contextfree.h'
'backends/contextfree.h',
'backends/missing.h',
'backends/params.h'
]
parsers = ['parsers/%s.c'%s for s in
......@@ -27,6 +35,7 @@ parsers = ['parsers/%s.c'%s for s in
'attr_bool',
'bind',
'bits',
'bytes',
'butnot',
'ch',
'charset',
......@@ -49,10 +58,11 @@ parsers = ['parsers/%s.c'%s for s in
'unimplemented',
'whitespace',
'xor',
'value']]
'value',
'seek']]
backends = ['backends/%s.c' % s for s in
['packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0']]
['missing', 'packrat', 'llk', 'regex', 'glr', 'lalr', 'lr', 'lr0', 'params']]
misc_hammer_parts = [
'allocator.c',
......@@ -66,7 +76,8 @@ misc_hammer_parts = [
'hammer.c',
'pprint.c',
'registry.c',
'system_allocator.c']
'system_allocator.c',
'sloballoc.c']
if env['PLATFORM'] == 'win32':
misc_hammer_parts += [
......@@ -82,7 +93,9 @@ ctests = ['t_benchmark.c',
't_parser.c',
't_grammar.c',
't_misc.c',
't_regression.c']
't_mm.c',
't_names.c',
't_regression.c']
static_library_name = 'hammer'
......@@ -93,9 +106,21 @@ if env['PLATFORM'] == 'win32':
# prevent collision between .lib from dll and .lib for static lib
static_library_name = 'hammer_s'
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts)
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
if 'GPROF' in env and env['GPROF'] == 1:
# Disable the shared library (it won't work with gprof) and rename the static one
build_shared_library=False
static_library_name = 'hammer_pg'
# Markers for later
libhammer_static = None
libhammer_shared = None
if build_shared_library:
libhammer_shared = env.SharedLibrary('hammer', parsers + backends + misc_hammer_parts, \
SHLIBVERSION=hammer_shlib_version)
libhammer_static = env.StaticLibrary(static_library_name, parsers + backends + misc_hammer_parts)
if libhammer_shared is not None:
Default(libhammer_shared, libhammer_static)
env.Install('$libpath', [libhammer_static, libhammer_shared])
else:
......@@ -110,14 +135,20 @@ env.Install('$pkgconfigpath', '../../../libhammer.pc')
if GetOption('with_tests'):
testenv = env.Clone()
testenv.ParseConfig('pkg-config --cflags --libs glib-2.0')
testenv.Append(LIBS=['hammer'])
if libhammer_shared is not None:
testenv.Append(LIBS=['hammer'])
else:
testenv.Append(LIBS=[static_library_name])
testenv.Prepend(LIBPATH=['.'])
ctestexec = testenv.Program('test_suite', ctests + ['test_suite.c'], LINKFLAGS='--coverage' if testenv.GetOption('coverage') else None)
ctest = Alias('testc', [ctestexec], ''.join(['env LD_LIBRARY_PATH=', os.path.dirname(ctestexec[0].path), ' ', ctestexec[0].path]))
AlwaysBuild(ctest)
testruns.append(ctest)
Export('libhammer_static libhammer_shared')
if libhammer_shared is not None:
Export('libhammer_static libhammer_shared')
else:
Export('libhammer_static')
for b in env['bindings']:
env.SConscript(['bindings/%s/SConscript' % b])
......@@ -29,28 +29,46 @@ struct arena_link {
// For efficiency, we should probably allocate the arena links in
// their own slice, and link to a block directly. That can be
// implemented later, though, with no change in interface.
struct arena_link *next; // It is crucial that this be the first item; so that
// any arena link can be casted to struct arena_link**.
struct arena_link *next;
size_t free;
size_t used;
uint8_t rest[];
} ;
};
struct HArena_ {
struct arena_link *head;
struct HAllocator_ *mm__;
/* does mm__ zero blocks for us? */
bool malloc_zeros;
size_t block_size;
size_t used;
size_t wasted;
#ifdef DETAILED_ARENA_STATS
size_t mm_malloc_count, mm_malloc_bytes;
size_t memset_count, memset_bytes;
size_t arena_malloc_count, arena_malloc_bytes;
size_t arena_su_malloc_count, arena_su_malloc_bytes;
size_t arena_si_malloc_count, arena_si_malloc_bytes;
size_t arena_lu_malloc_count, arena_lu_malloc_bytes;
size_t arena_li_malloc_count, arena_li_malloc_bytes;
#endif
jmp_buf *except;
};
static void * h_arena_malloc_raw(HArena *arena, size_t size, bool need_zero);
void* h_alloc(HAllocator* mm__, size_t size) {
void *p = mm__->alloc(mm__, size);
if(!p)
h_platform_errx(1, "memory allocation failed (%uB requested)\n", (unsigned int)size);
h_platform_errx(1, "memory allocation failed (%zuB requested)\n", size);
return p;
}
void* h_realloc(HAllocator* mm__, void* ptr, size_t size) {
void *p = mm__->realloc(mm__, ptr, size);
if(!p)
h_platform_errx(1, "memory reallocation failed (%zuB requested)\n", size);
return p;
}
......@@ -61,7 +79,6 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
struct arena_link *link = (struct arena_link*)h_alloc(mm__, sizeof(struct arena_link) + block_size);
assert(ret != NULL);
assert(link != NULL);
memset(link, 0, sizeof(struct arena_link) + block_size);
link->free = block_size;
link->used = 0;
link->next = NULL;
......@@ -69,6 +86,19 @@ HArena *h_new_arena(HAllocator* mm__, size_t block_size) {
ret->block_size = block_size;
ret->used = 0;
ret->mm__ = mm__;
#ifdef DETAILED_ARENA_STATS
ret->mm_malloc_count = 2;
ret->mm_malloc_bytes = sizeof(*ret) + sizeof(struct arena_link) + block_size;
ret->memset_count = 0;
ret->memset_bytes = 0;
ret->arena_malloc_count = ret->arena_malloc_bytes = 0;
ret->arena_su_malloc_count = ret->arena_su_malloc_bytes = 0;
ret->arena_si_malloc_count = ret->arena_si_malloc_bytes = 0;
ret->arena_lu_malloc_count = ret->arena_lu_malloc_bytes = 0;
ret->arena_li_malloc_count = ret->arena_li_malloc_bytes = 0;
#endif
/* XXX provide a mechanism to indicate mm__ returns zeroed blocks */
ret->malloc_zeros = false;
ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size;
ret->except = NULL;
return ret;
......@@ -90,39 +120,120 @@ static void *alloc_block(HArena *arena, size_t size)
return block;
}
void* h_arena_malloc(HArena *arena, size_t size) {
void * h_arena_malloc_noinit(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, false);
}
void * h_arena_malloc(HArena *arena, size_t size) {
return h_arena_malloc_raw(arena, size, true);
}
static void * h_arena_malloc_raw(HArena *arena, size_t size,
bool need_zero) {
struct arena_link *link = NULL;
void *ret = NULL;
if (size <= arena->head->free) {
// fast path..
void* ret = arena->head->rest + arena->head->used;
/* fast path.. */
ret = arena->head->rest + arena->head->used;
arena->used += size;
arena->wasted -= size;
arena->head->used += size;
arena->head->free -= size;
return ret;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_si_malloc_count);
arena->arena_si_malloc_bytes += size;
} else {
++(arena->arena_su_malloc_count);
arena->arena_su_malloc_bytes += size;
}
#endif
} else if (size > arena->block_size) {
// We need a new, dedicated block for it, because it won't fit in a standard sized one.
// This involves some annoying casting...
arena->used += size;
arena->wasted += sizeof(struct arena_link*);
void* link = alloc_block(arena, size + sizeof(struct arena_link*));
/*
* We need a new, dedicated block for it, because it won't fit in a
* standard sized one.
*
* NOTE:
*
* We used to do a silly casting dance to treat blocks like this
* as special cases and make the used/free fields part of the allocated
* block, but the old code was not really proper portable C and depended
* on a bunch of implementation-specific behavior. We could have done it
* better with a union in struct arena_link, but the memory savings is
* only 0.39% for a 64-bit machine, a 4096-byte block size and all
* large allocations *only just one byte* over the block size, so I
* question the utility of it. We do still slip the large block in
* one position behind the list head so it doesn't cut off a partially
* filled list head.
*
* -- andrea
*/
link = alloc_block(arena, size + sizeof(struct arena_link));
assert(link != NULL);
memset(link, 0, size + sizeof(struct arena_link*));
*(struct arena_link**)link = arena->head->next;
arena->head->next = (struct arena_link*)link;
return (void*)(((uint8_t*)link) + sizeof(struct arena_link*));
arena->used += size;
arena->wasted += sizeof(struct arena_link);
link->used = size;
link->free = 0;
link->next = arena->head->next;
arena->head->next = link;
ret = link->rest;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_li_malloc_count);
arena->arena_li_malloc_bytes += size;
} else {
++(arena->arena_lu_malloc_count);
arena->arena_lu_malloc_bytes += size;
}
#endif
} else {
// we just need to allocate an ordinary new block.
struct arena_link *link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
/* we just need to allocate an ordinary new block. */
link = alloc_block(arena, sizeof(struct arena_link) + arena->block_size);
assert(link != NULL);
memset(link, 0, sizeof(struct arena_link) + arena->block_size);
#ifdef DETAILED_ARENA_STATS
++(arena->mm_malloc_count);
arena->mm_malloc_bytes += sizeof(struct arena_link) + arena->block_size;
#endif
link->free = arena->block_size - size;
link->used = size;
link->next = arena->head;
arena->head = link;
arena->used += size;
arena->wasted += sizeof(struct arena_link) + arena->block_size - size;
return link->rest;
ret = link->rest;
#ifdef DETAILED_ARENA_STATS
++(arena->arena_malloc_count);
arena->arena_malloc_bytes += size;
if (need_zero) {
++(arena->arena_si_malloc_count);
arena->arena_si_malloc_bytes += size;
} else {
++(arena->arena_su_malloc_count);
arena->arena_su_malloc_bytes += size;
}
#endif
}
/*
* Zeroize if necessary
*/
if (need_zero && !(arena->malloc_zeros)) {
memset(ret, 0, size);
#ifdef DETAILED_ARENA_STATS
++(arena->memset_count);
arena->memset_bytes += size;
#endif
}
return ret;
}
void h_arena_free(HArena *arena, void* ptr) {
......@@ -146,4 +257,49 @@ void h_delete_arena(HArena *arena) {
void h_allocator_stats(HArena *arena, HArenaStats *stats) {
stats->used = arena->used;
stats->wasted = arena->wasted;
#ifdef DETAILED_ARENA_STATS
stats->mm_malloc_count = arena->mm_malloc_count;
stats->mm_malloc_bytes = arena->mm_malloc_bytes;
stats->memset_count = arena->memset_count;
stats->memset_bytes = arena->memset_bytes;
stats->arena_malloc_count = arena->arena_malloc_count;
stats->arena_malloc_bytes = arena->arena_malloc_bytes;
stats->arena_su_malloc_count = arena->arena_su_malloc_count;
stats->arena_su_malloc_bytes = arena->arena_su_malloc_bytes;
stats->arena_si_malloc_count = arena->arena_si_malloc_count;
stats->arena_si_malloc_bytes = arena->arena_si_malloc_bytes;
stats->arena_lu_malloc_count = arena->arena_lu_malloc_count;
stats->arena_lu_malloc_bytes = arena->arena_lu_malloc_bytes;
stats->arena_li_malloc_count = arena->arena_li_malloc_count;
stats->arena_li_malloc_bytes = arena->arena_li_malloc_bytes;
#endif
}
void* h_arena_realloc(HArena *arena, void* ptr, size_t n) {
struct arena_link *link;
void* ret;
size_t ncopy;
// XXX this is really wasteful, but maybe better than nothing?
//
// first, we walk the blocks to find our ptr. since we don't know how large
// the original allocation was, we must always make a new one and copy as
// much data from the old block as there could have been.
for (link = arena->head; link; link = link->next) {
if (ptr >= (void *)link->rest && ptr <= (void *)link->rest + link->used)
break; /* found it */
}
assert(link != NULL);
ncopy = (void *)link->rest + link->used - ptr;
if (n < ncopy)
ncopy = n;
ret = h_arena_malloc_noinit(arena, n);
assert(ret != NULL);
memcpy(ret, ptr, ncopy);
h_arena_free(arena, ptr);
return ret;
}
......@@ -38,6 +38,8 @@ extern "C" {
# define ATTR_MALLOC(n)
#endif
/* #define DETAILED_ARENA_STATS */
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size);
......@@ -46,12 +48,15 @@ typedef struct HAllocator_ {
} HAllocator;
void* h_alloc(HAllocator* allocator, size_t size) ATTR_MALLOC(2);
void* h_realloc(HAllocator* allocator, void* ptr, size_t size);
typedef struct HArena_ HArena ; // hidden implementation
HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default...
void* h_arena_malloc_noinit(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_malloc(HArena *arena, size_t count) ATTR_MALLOC(2);
void* h_arena_realloc(HArena *arena, void* ptr, size_t count);
void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers.
void h_delete_arena(HArena *arena);
void h_arena_set_except(HArena *arena, jmp_buf *except);
......@@ -59,6 +64,26 @@ void h_arena_set_except(HArena *arena, jmp_buf *except);
typedef struct {
size_t used;
size_t wasted;
#ifdef DETAILED_ARENA_STATS
size_t mm_malloc_count;
size_t mm_malloc_bytes;
size_t memset_count;
size_t memset_bytes;
size_t arena_malloc_count;
size_t arena_malloc_bytes;
/* small, uninited */
size_t arena_su_malloc_count;
size_t arena_su_malloc_bytes;
/* small, inited */
size_t arena_si_malloc_count;
size_t arena_si_malloc_bytes;
/* large, uninited */
size_t arena_lu_malloc_count;
size_t arena_lu_malloc_bytes;
/* large, inited */
size_t arena_li_malloc_count;
size_t arena_li_malloc_bytes;
#endif
} HArenaStats;
void h_allocator_stats(HArena *arena, HArenaStats *stats);
......
#include <assert.h>
#include "lr.h"
#include "params.h"
static bool glr_step(HParseResult **result, HSlist *engines,
HLREngine *engine, const HLRAction *action);
......@@ -14,7 +15,7 @@ int h_glr_compile(HAllocator* mm__, HParser* parser, const void* params)
}
int result = h_lalr_compile(mm__, parser, params);
if(result == -1 && parser->backend_data) {
if(result == -2 && parser->backend_data) {
// table is there, just has conflicts? nevermind, that's okay.
result = 0;
}
......@@ -174,9 +175,9 @@ static bool glr_step(HParseResult **result, HSlist *engines,
HSlistNode *x;
for(x=engines->head; x; x=x->next) {
HLREngine *eng = x->elem;
if(eng->state == engine->state) {
x->elem = lrengine_merge(eng, engine);
break;
if(eng->state == engine->state && eng->input.index == engine->input.index) {
x->elem = lrengine_merge(eng, engine);
break;
}
}
if(!x) // no merge happened
......@@ -225,6 +226,8 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
HLREngine *engine = h_slist_pop(engines);
const HLRAction *action = h_lrengine_action(engine);
glr_step(&result, engback, engine, action);
// XXX detect ambiguous results - two engines terminating at the same pos
// -> kill both engines, i.e. ignore if there is a later unamb. success
}
// swap the lists
......@@ -239,12 +242,54 @@ HParseResult *h_glr_parse(HAllocator* mm__, const HParser* parser, HInputStream*
return result;
}
char * h_glr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
}
char * h_glr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "GLR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
}
int h_glr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
}
HParserBackendVTable h__glr_backend_vtable = {
.compile = h_glr_compile,
.parse = h_glr_parse,
.free = h_glr_free
.free = h_glr_free,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "glr",
.backend_description = "GLR(k) parser backend",
.get_description_with_params = h_glr_get_description,
.get_short_name_with_params = h_glr_get_short_name,
.extract_params = h_glr_extract_params
};
......
#include <assert.h>
#include "contextfree.h"
#include "lr.h"
#include "params.h"
/* LALR-via-SLR grammar transformation */
......@@ -31,18 +31,24 @@ static size_t follow_transition(const HLRTable *table, size_t x, HCFChoice *A)
{
HLRAction *action = lrtable_lookup(table, x, A);
assert(action != NULL);
// we are interested in a transition out of state x, i.e. a shift action.
// while there could also be reduce actions associated with A in state x,
// those are not what we are here for. so if action is a conflict, search it
// for the shift. there will only be one and it will be the bottom element.
if(action->type == HLR_CONFLICT) {
HSlistNode *x;
for(x=action->branches->head; x; x=x->next) {
action = x->elem;
assert(action->type != HLR_CONFLICT); // no nesting of conflicts
if(action->type == HLR_SHIFT)
break;
}
assert(x != NULL && x->next == NULL); // shift found at the bottom
}
assert(action->type == HLR_SHIFT);
return action->nextstate;
}
static inline HLRTransition *transition(HArena *arena,
size_t x, const HCFChoice *A, size_t y)
{
HLRTransition *t = h_arena_malloc(arena, sizeof(HLRTransition));
t->from = x;
t->symbol = A;
t->to = y;
return t;
return action->nextstate;
}
// no-op on terminal symbols
......@@ -69,8 +75,8 @@ static void transform_productions(const HLRTable *table, HLREnhGrammar *eg,
HCFChoice **iBj = items;
for(; *B; B++, iBj++) {
size_t j = follow_transition(table, i, *B);
HLRTransition *i_B_j = transition(arena, i, *B, j);
*iBj = h_hashtable_get(eg->tmap, i_B_j);
HLRTransition i_B_j = {i, *B, j};
*iBj = h_hashtable_get(eg->tmap, &i_B_j);
assert(*iBj != NULL);
i = j;
}
......@@ -269,6 +275,7 @@ HCFChoice *h_desugar_augmented(HAllocator *mm__, HParser *parser)
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
{
size_t k = params? (uintptr_t)params : DEFAULT_KMAX;
// generate (augmented) CFG from parser
// construct LR(0) DFA
// build LR(0) table
......@@ -279,18 +286,18 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
}
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser));
if(g == NULL) // backend not suitable (language not context-free)
return -1;
return 2;
HLRDFA *dfa = h_lr0_dfa(g);
if (dfa == NULL) { // this should normally not happen
h_cfgrammar_free(g);
return -1;
return 3;
}
HLRTable *table = h_lr0_table(g, dfa);
if (table == NULL) { // this should normally not happen
h_cfgrammar_free(g);
return -1;
return 4;
}
if(has_conflicts(table)) {
......@@ -300,7 +307,7 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
if(eg == NULL) { // this should normally not happen
h_cfgrammar_free(g);
h_lrtable_free(table);
return -1;
return 5;
}
// go through the inadequate states; replace inadeq with a new list
......@@ -329,10 +336,14 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
if(match_any_production(table, eg, lhs, item->rhs, state)) {
// the left-hand symbol's follow set is this production's
// contribution to the lookahead
const HStringMap *fs = h_follow(1, eg->grammar, lhs);
const HStringMap *fs = h_follow(k, eg->grammar, lhs);
assert(fs != NULL);
assert(fs->epsilon_branch == NULL);
assert(!h_stringmap_empty(fs));
// NB: there is a case where fs can be empty: when reducing by lhs
// would lead to certain parse failure, by means of h_nothing_p()
// for instance. in that case, the below code correctly adds no
// reduce action.
assert(!h_stringmap_empty(fs)); // XXX
// for each lookahead symbol, put action into table cell
if(terminals_put(table->tmap[state], fs, action) < 0)
......@@ -345,11 +356,13 @@ int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params)
h_slist_push(table->inadeq, (void *)(uintptr_t)state);
}
}
h_cfgrammar_free(eg->grammar);
}
h_cfgrammar_free(g);
parser->backend_data = table;
return has_conflicts(table)? -1 : 0;
return has_conflicts(table)? -2 : 0;
}
void h_lalr_free(HParser *parser)
......@@ -357,22 +370,62 @@ void h_lalr_free(HParser *parser)
HLRTable *table = parser->backend_data;
h_lrtable_free(table);
parser->backend_data = NULL;
parser->backend = PB_PACKRAT;
parser->backend_vtable = h_get_default_backend_vtable();
parser->backend = h_get_default_backend();
}
char * h_lalr_get_description(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *descr = NULL;
k = h_get_param_k(param);
descr = h_format_description_with_param_k(mm__, backend_name, k);
return descr;
}
char * h_lalr_get_short_name(HAllocator *mm__,
HParserBackend be, void *param) {
const char *backend_name = "LALR";
size_t k;
char *name = NULL;
k = h_get_param_k(param);
name = h_format_name_with_param_k(mm__, backend_name, k);
return name;
}
int h_lalr_extract_params(HParserBackendWithParams * be_with_params, backend_with_params_t * be_with_params_t) {
return h_extract_param_k(be_with_params, be_with_params_t);
}
HParserBackendVTable h__lalr_backend_vtable = {
.compile = h_lalr_compile,
.parse = h_lr_parse,
.free = h_lalr_free,
.parse_start = h_lr_parse_start,
.parse_chunk = h_lr_parse_chunk,
.parse_finish = h_lr_parse_finish
};
.parse_finish = h_lr_parse_finish,
.copy_params = h_copy_numeric_param,
/* No free_param needed, since it's not actually allocated */
/* Name/param resolution functions */
.backend_short_name = "lalr",
.backend_description = "LALR(k) parser backend",
.get_description_with_params = h_lalr_get_description,
.get_short_name_with_params = h_lalr_get_short_name,
.extract_params = h_lalr_extract_params
};
// dummy!
int test_lalr(void)
......