Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • hammer/hammer
  • mlp/hammer
  • xentrac/hammer
  • pesco/hammer
  • letitiali/hammer
  • nobody/hammer
  • kia/hammer-sandbox
  • vyrus001/hammer
  • denleylam/hammer
9 results
Show changes
Showing
with 615 additions and 86 deletions
......@@ -46,7 +46,7 @@ HBenchmarkResults *h_benchmark__m(HAllocator* mm__, HParser* parser, HParserTest
for (backend = PB_MIN; backend <= PB_MAX; backend++) {
ret->results[backend].backend = backend;
// Step 1: Compile grammar for given parser...
if (h_compile(parser, backend, NULL) == -1) {
if (h_compile(parser, backend, NULL)) {
// backend inappropriate for grammar...
fprintf(stderr, "Compiling for %s failed\n", HParserBackendNames[backend]);
ret->results[backend].compile_success = false;
......
#!python
from __future__ import absolute_import, division, print_function
import os
import sys
def walkDirs(path):
"""helper function to get a list of all subdirectories"""
def addDirs(pathlist, dirname, names):
"""internal function to pass to os.walk"""
print("in addDirs")
for n in names:
f = os.path.join(dirname, n)
if os.path.isdir(f):
pathlist.append(f)
pathlist = [path]
os.walk(path, addDirs, pathlist)
print(pathlist)
return pathlist
def ConfigureJNI(env):
"""Configure the given environment for compiling Java Native Interface
c or c++ language files."""
print( "Configuring JNI includes")
if not env.get('JAVAC'):
print( "The Java compiler must be installed and in the current path.")
return 0
# first look for a shell variable called JAVA_HOME
java_base = os.environ.get('JAVA_HOME')
if not java_base:
if sys.platform == 'darwin':
# Apple's OS X has its own special java base directory
java_base = '/System/Library/Frameworks/JavaVM.framework'
else:
# Search for the java compiler
print ("JAVA_HOME environment variable is not set. Searching for java... ")
jcdir = os.path.dirname(env.WhereIs('javac'))
if not jcdir:
print( "not found.")
return 0
# assuming the compiler found is in some directory like
# /usr/jdkX.X/bin/javac, java's home directory is /usr/jdkX.X
java_base = os.path.join(jcdir, "..")
print( "found.")
if sys.platform == 'cygwin':
# Cygwin and Sun Java have different ideas of how path names
# are defined. Use cygpath to convert the windows path to
# a cygwin path. i.e. C:\jdkX.X to /cygdrive/c/jdkX.X
java_base = os.popen("cygpath -up '"+java_base+"'").read().replace( \
'\n', '')
if sys.platform == 'darwin':
# Apple does not use Sun's naming convention
java_headers = [os.path.join(java_base, 'Headers')]
java_libs = [os.path.join(java_base, 'Libraries')]
else:
# windows and linux
java_headers = [os.path.join(java_base, 'include')]
java_libs = [os.path.join(java_base, 'lib')]
# Sun's windows and linux JDKs keep system-specific header
# files in a sub-directory of include
if java_base == '/usr' or java_base == '/usr/local':
# too many possible subdirectories. Just use defaults
java_headers.append(os.path.join(java_headers[0], 'win32'))
java_headers.append(os.path.join(java_headers[0], 'linux'))
java_headers.append(os.path.join(java_headers[0], 'solaris'))
else:
# add all subdirs of 'include'. The system specific headers
# should be in there somewhere
java_headers = walkDirs(java_headers[0])
if not any(os.path.exists(os.path.join(path, 'jni.h'))
for path in java_headers):
print("Can't find jni.h in %s" % java_headers)
return 0
# add Java's include and lib directory to the environment
java_headers.append(os.path.join(java_headers[0], 'linux'))
env.Append(CPPPATH = java_headers)
env.Append(LIBPATH = java_libs)
# add any special platform-specific compilation or linking flags
if sys.platform == 'darwin':
env.Append(SHLINKFLAGS = '-dynamiclib -framework JavaVM')
env['SHLIBSUFFIX'] = '.jnilib'
elif sys.platform == 'cygwin':
env.Append(CCFLAGS = '-mno-cygwin')
env.Append(SHLINKFLAGS = '-mno-cygwin -Wl,--kill-at')
# Add extra potentially useful environment variables
env['JAVA_HOME'] = java_base
env['JNI_CPPPATH'] = java_headers
env['JNI_LIBPATH'] = java_libs
return 1
\ No newline at end of file
......@@ -84,6 +84,20 @@ public static void main(String args[])
Parser i3parsers[] = {Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()};
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'i'), Hammer.uInt8(), Hammer.int8()), i3, i3.length));
out("permutation");
byte ch3[] = {(byte) 'a', (byte) 'b', (byte) 'c'};
handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'a'), Hammer.ch((byte)'b'), Hammer.ch((byte)'c')), ch3, ch3.length));
handle(Hammer.parse(Hammer.permutation(Hammer.ch((byte)'b'), Hammer.ch((byte)'a'), Hammer.ch((byte)'c')), ch3, ch3.length));
out("skip");
byte ch6[] = {(byte) 'a', (byte) 'b', (byte) 'c', (byte) 'd', (byte) 'e', (byte) 'f'};
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.skip((int)32), Hammer.ch((byte)'f')), ch6, ch6.length));
out("seek");
final int SEEK_SET = 0; /* Seek from beginning of file. */
//final int SEEK_CUR = 1; /* Seek from current position. */
//final int SEEK_END = 2; /* Seek from end of file. */
handle(Hammer.parse(Hammer.sequence(Hammer.ch((byte)'a'), Hammer.seek((int)40, (int)SEEK_SET), Hammer.ch((byte)'f')), ch6, ch6.length));
}
......
......@@ -5,17 +5,18 @@ CSOURCES := com_upstandinghackers_hammer_Hammer.c com_upstandinghackers_hammer_P
# ls *.h *.o *.so com/upstandinghackers/hammer/*.class | grep -v jhammer.h | tr '\n' ' '; replace single $ with $$
OUTPUTS := com/upstandinghackers/hammer/Action.class com/upstandinghackers/hammer/Hammer.class com_upstandinghackers_hammer_Hammer.h com_upstandinghackers_hammer_Hammer.o com/upstandinghackers/hammer/Hammer\$TokenType.class com_upstandinghackers_hammer_Hammer_TokenType.h com/upstandinghackers/hammer/ParsedToken.class com_upstandinghackers_hammer_ParsedToken.h com_upstandinghackers_hammer_ParsedToken.o com/upstandinghackers/hammer/Parser.class com/upstandinghackers/hammer/ParseResult.class com_upstandinghackers_hammer_ParseResult.h com_upstandinghackers_hammer_ParseResult.o com_upstandinghackers_hammer_Parser.h com_upstandinghackers_hammer_Parser.o com/upstandinghackers/hammer/Predicate.class libjhammer.so
TOPLEVEL := ../
TOPLEVEL := ../../../
JC=javac
JH=javah
CP=com/upstandinghackers/hammer
PACKAGE=com.upstandinghackers.hammer
include ../common.mk
include ../../../common.mk
JNI_INCLUDE := /usr/lib/jvm/java-6-openjdk/include/
CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
JNI_INCLUDE := /usr/lib/jvm/java-8-oracle/include/
JNI_INCLUDE_LINUX := /usr/lib/jvm/java-8-oracle/include/linux
CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE) -I $(JNI_INCLUDE_LINUX)
%.java: $(call ifsilent,| $(HUSH))
$(call hush, "Compiling Java source $@") $(JC) $(CP)/$@
......@@ -23,7 +24,7 @@ CFLAGS += -fPIC -I. -I $(TOPLEVEL)/src/ -I jni -I $(JNI_INCLUDE)
all: javacc prepare compile link
link: compile
$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../src/*.o ../src/backends/*.o ../src/parsers/*.o
$(call hush, "Generating libjhammer.so") $(CC) -shared $(CFLAGS) -o libjhammer.so *.o ../../../src/*.o ../../../src/backends/*.o ../../../src/parsers/*.o
$(CSOURCES): prepare
$(call hush, "Compiling $@") $(CC) -c $(CFLAGS) $@
......
File moved
# -*- python -*-
from __future__ import absolute_import, division, print_function
import os, os.path
import sys
Import('env libhammer_shared testruns targets')
from src.bindings.jni.ConfigureJNI import ConfigureJNI
javaenv = env.Clone()
if not ConfigureJNI(javaenv):
print("Java Native Interface is required... Exiting")
Exit(0)
javaenv.Append(CPPPATH=[".", "../.."],
LIBS=['hammer'],
LIBPATH=["../.."])
# compile java classes
jni_classes = javaenv.Java(".", "#src/bindings/jni/com")
print(jni_classes)
jni_headers = javaenv.JavaH(".", jni_classes)
print(jni_headers)
Default(jni_classes)
Default(jni_headers)
#print(javaenv.Dump())
shlib_env = env.Clone(CPPPATH=javaenv['JNI_CPPPATH'] + ['../..'],
LIBS=['hammer'],
LIBPATH=["../.."])
csources = ['com_upstandinghackers_hammer_Hammer.c',
'com_upstandinghackers_hammer_ParsedToken.c',
'com_upstandinghackers_hammer_Parser.c',
'com_upstandinghackers_hammer_ParseResult.c']
libjhammer_shared = shlib_env.SharedLibrary('libjhammer', csources)
Default(libjhammer_shared)
......@@ -53,10 +53,12 @@ public class Hammer
public static native Parser middle(Parser p, Parser x, Parser q);
// public static native Parser action(Parser p, Action a);
public static native Parser in(byte[] charset, int length);
public static native Parser notIn(byte[] charset, int length);
public static native Parser endP();
public static native Parser nothingP();
public static native Parser sequence(Parser... parsers);
public static native Parser choice(Parser... parsers);
public static native Parser permutation(Parser... parsers);
public static native Parser butNot(Parser p1, Parser p2);
public static native Parser difference(Parser p1, Parser p2);
public static native Parser xor(Parser p1, Parser p2);
......@@ -73,4 +75,11 @@ public class Hammer
public static native Parser and(Parser p);
public static native Parser not(Parser p);
public static native Parser indirect();
// public static native Parser bindIndirect(Parser indirect, Parser inner);
public static native Parser withEndianess(byte endianess, Parser p);
// public static native Parser bind(Parser p, HContinuation k, void *env);
public static native Parser skip(int n);
public static native Parser seek(int n, int whence);
public static native Parser tell();
}
......@@ -157,6 +157,13 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_in
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_notIn
(JNIEnv *env, jclass class, jbyteArray charset, jint length)
{
RETURNWRAP(env, h_not_in((uint8_t*) ((*env)->GetByteArrayElements(env, charset, NULL)), (size_t)length));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_endP
(JNIEnv *env, jclass class)
{
......@@ -227,6 +234,34 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_choice
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_permutation
(JNIEnv *env, jclass class, jobjectArray permutation)
{
jsize length;
void **parsers;
int i;
jobject current;
const HParser *result;
length = (*env)->GetArrayLength(env, permutation);
parsers = malloc(sizeof(HParser *)*(length+1));
if(NULL==parsers)
{
return NULL;
}
for(i=0; i<length; i++)
{
current = (*env)->GetObjectArrayElement(env, permutation, (jsize)i);
parsers[i] = UNWRAP(env, current);
}
parsers[length] = NULL;
result = h_permutation__a(parsers);
RETURNWRAP(env, result);
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_butNot
(JNIEnv *env, jclass class, jobject p, jobject q)
{
......@@ -332,4 +367,29 @@ JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_indirect
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_with_endianness
(JNIEnv *env, jclass class, jbyte endianess, jobject p)
{
RETURNWRAP(env, h_with_endianness((char) endianess, UNWRAP(env, p)));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_skip
(JNIEnv *env, jclass class, jint n)
{
RETURNWRAP(env, h_skip((size_t) n));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_seek
(JNIEnv *env, jclass class, jint offset, jint whence)
{
RETURNWRAP(env, h_seek((ssize_t) offset, (int) whence));
}
JNIEXPORT jobject JNICALL Java_com_upstandinghackers_hammer_Hammer_tell
(JNIEnv *env, jclass class)
{
RETURNWRAP(env, h_tell());
}
File moved
......@@ -108,3 +108,77 @@ int64_t h_read_bits(HInputStream* state, int count, char signed_p) {
out <<= final_shift;
return (out ^ msb) - msb; // perform sign extension
}
void h_skip_bits(HInputStream* stream, size_t count) {
size_t left;
if (count == 0)
return;
if (stream->overrun)
return;
if (stream->index == stream->length) {
stream->overrun = true;
return;
}
// consume from a partial byte?
left = 8 - stream->bit_offset - stream->margin;
if (count < left) {
stream->bit_offset += count;
return;
}
if (left < 8) {
stream->index += 1;
stream->bit_offset = 0;
stream->margin = 0;
count -= left;
}
assert(stream->bit_offset == 0);
assert(stream->margin == 0);
// consume full bytes
left = stream->length - stream->index;
if (count / 8 <= left) {
stream->index += count / 8;
count = count % 8;
} else {
stream->index = stream->length;
stream->overrun = true;
return;
}
assert(count < 8);
// final partial byte
if (count > 0 && stream->index == stream->length)
stream->overrun = true;
else
stream->bit_offset = count;
}
void h_seek_bits(HInputStream* stream, size_t pos) {
size_t pos_index = pos / 8;
size_t pos_offset = pos % 8;
/* seek within the current byte? */
if (pos_index == stream->index) {
stream->bit_offset = pos_offset;
return;
}
stream->margin = 0;
/* seek past the end? */
if ((pos_index > stream->length) ||
(pos_index == stream->length && pos_offset > 0)) {
stream->index = stream->length;
stream->bit_offset = 0;
stream->overrun = true;
return;
}
stream->index = pos_index;
stream->bit_offset = pos_offset;
stream->margin = 0;
}
......@@ -6,10 +6,25 @@
#include <ctype.h>
// type of pairs used as memoization keys by h_follow and h_first
struct k_nt {size_t k; const HCFChoice *nt;};
// a special map value for use when the map is used to represent a set
static void * const INSET = (void *)(uintptr_t)1;
static bool eq_k_nt(const void *p, const void *q)
{
const struct k_nt *a=p, *b=q;
return a->k == b->k && a->nt == b->nt;
}
static HHashValue hash_k_nt(const void *p)
{
const struct k_nt *x = p;
return h_hash_ptr(x->nt) * x->k;
}
HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
{
HCFGrammar *g = h_new(HCFGrammar, 1);
......@@ -20,14 +35,17 @@ HCFGrammar *h_cfgrammar_new(HAllocator *mm__)
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
g->start = NULL;
g->geneps = NULL;
g->first = NULL;
g->follow = NULL;
g->kmax = 0; // will be increased as needed by ensure_k
g->first = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
g->follow = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
HStringMap *eps = h_stringmap_new(g->arena);
h_stringmap_put_epsilon(eps, INSET);
g->singleton_epsilon = eps;
HStringMap *end = h_stringmap_new(g->arena);
h_stringmap_put_end(end, INSET);
g->singleton_end = end;
return g;
}
......@@ -42,6 +60,7 @@ void h_cfgrammar_free(HCFGrammar *g)
// helpers
static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol);
static void collect_geneps(HCFGrammar *grammar);
static void eliminate_dead_rules(HCFGrammar *g);
HCFGrammar *h_cfgrammar(HAllocator* mm__, const HParser *parser)
......@@ -83,6 +102,9 @@ HCFGrammar *h_cfgrammar_(HAllocator* mm__, HCFChoice *desugared)
g->start = desugared;
}
// simplifications
eliminate_dead_rules(g);
// determine which nonterminals generate epsilon
collect_geneps(g);
......@@ -128,42 +150,6 @@ static void collect_nts(HCFGrammar *grammar, HCFChoice *symbol)
}
}
/* Increase g->kmax if needed, allocating enough first/follow slots. */
static void ensure_k(HCFGrammar *g, size_t k)
{
if (k <= g->kmax) {
return;
}
// NB: we don't actually use first/follow[0] but allocate it anyway
// so indices of the array correspond neatly to values of k
// allocate the new arrays
HHashTable **first = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
HHashTable **follow = h_arena_malloc(g->arena, (k+1)*sizeof(HHashTable *));
if (g->kmax > 0) {
// we are resizing, copy the old tables over
for(size_t i=0; i<=g->kmax; i++) {
first[i] = g->first[i];
follow[i] = g->follow[i];
}
} else {
// we are initializing, allocate the first (in fact, dummy) tables
first[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
follow[0] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
}
// allocate the new tables
for(size_t i=g->kmax+1; i<=k; i++) {
first[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
follow[i] = h_hashtable_new(g->arena, h_eq_ptr, h_hash_ptr);
}
g->first = first;
g->follow = follow;
g->kmax = k;
}
bool h_derives_epsilon(HCFGrammar *g, const HCFChoice *symbol)
{
// XXX this can now also be implemented in terms of h_first:
......@@ -232,6 +218,76 @@ static void collect_geneps(HCFGrammar *g)
} while(g->geneps->used != prevused);
}
static bool mentions_symbol(HCFChoice **s, const HCFChoice *x)
{
for(; *s; s++) {
if (*s == x)
return true;
}
return false;
}
static void remove_productions_with(HCFGrammar *g, const HCFChoice *x)
{
HHashTableEntry *hte;
const HCFChoice *symbol;
size_t i;
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL)
continue;
symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
HCFSequence **p, **q;
for(p = symbol->seq; *p != NULL; ) {
if (mentions_symbol((*p)->items, x)) {
// remove production p
for(q=p; *(q+1) != NULL; q++); // q = last production
*p = *q; // move q over p
*q = NULL; // delete old q
} else {
p++;
}
}
}
}
}
static void eliminate_dead_rules(HCFGrammar *g)
{
HHashTableEntry *hte;
const HCFChoice *symbol = NULL;
size_t i;
bool found;
do {
found = false;
for(i=0; !found && i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; !found && hte; hte = hte->next) {
if (hte->key == NULL)
continue;
symbol = hte->key;
assert(symbol->type == HCF_CHOICE);
// this NT is dead if it has no productions
if (*symbol->seq == NULL)
found = true;
}
}
if (found) {
h_hashtable_del(g->nts, symbol);
remove_productions_with(g, symbol);
}
} while(found); // until nothing left to remove
// rebuild g->nts. there may now be symbols that no longer appear in any
// productions. we also might have removed g->start.
g->nts = h_hashset_new(g->arena, h_eq_ptr, h_hash_ptr);
collect_nts(g, g->start);
}
HStringMap *h_stringmap_new(HArena *a)
{
......@@ -240,6 +296,7 @@ HStringMap *h_stringmap_new(HArena *a)
m->end_branch = NULL;
m->char_branches = h_hashtable_new(a, h_eq_ptr, h_hash_ptr);
m->arena = a;
m->taint = false;
return m;
}
......@@ -396,30 +453,65 @@ bool h_stringmap_empty(const HStringMap *m)
&& h_hashtable_empty(m->char_branches));
}
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
static bool eq_stringmap(const void *a, const void *b)
{
return h_stringmap_equal(a, b);
}
bool h_stringmap_equal(const HStringMap *a, const HStringMap *b)
{
if (a->epsilon_branch != b->epsilon_branch)
return false;
if (a->end_branch != b->end_branch)
return false;
return h_hashtable_equal(a->char_branches, b->char_branches, eq_stringmap);
}
// helper for h_follow and h_first
bool workset_equal(HHashTable *a, HHashTable *b)
{
if (a == NULL || b == NULL)
return (a == b);
else
return h_hashtable_equal(a, b, eq_stringmap);
}
static const HStringMap *
h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s);
static const HStringMap *
h_first_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
{
HHashTable *ws = *pws;
HStringMap *ret;
HCFSequence **p;
uint8_t c;
struct k_nt kx = {k,x};
struct k_nt *pkx = NULL;
bool taint = false;
// shortcut: first_0(X) is always {""}
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->first
ensure_k(g, k);
ret = h_hashtable_get(g->first[k], x);
// shortcut: first_k($) is always {$}
if (x->type == HCF_END) {
return g->singleton_end;
}
// check memoization and workset
ret = h_hashtable_get(g->first, &kx);
if (ret == NULL && ws != NULL)
ret = h_hashtable_get(ws, &kx);
if (ret != NULL) {
return ret;
}
// not found, create result
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->first[k], x, ret);
switch(x->type) {
case HCF_END:
h_stringmap_put_end(ret, INSET);
break;
case HCF_CHAR:
h_stringmap_put_char(ret, x->chr, INSET);
break;
......@@ -433,30 +525,75 @@ const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
break;
case HCF_CHOICE:
// this is a nonterminal
// to avoid recursive loops, taint ret and place it in workset
ret->taint = true;
if (ws == NULL)
ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
h_hashtable_put(ws, pkx, ret);
// return the union of the first sets of all productions
for(p=x->seq; *p; ++p)
h_stringmap_update(ret, h_first_seq(k, g, (*p)->items));
for(p=x->seq; *p; ++p) {
const HStringMap *first_rhs = h_first_seq_work(k, g, pws, (*p)->items);
assert(ws == *pws); // call above did not change the workset pointer
taint |= first_rhs->taint;
h_stringmap_update(ret, first_rhs);
}
break;
default: // should not be reached
assert_message(0, "unknown HCFChoice type");
assert_message(0, "unexpected HCFChoice type");
}
// immediately memoize ret and remove it from ws if untainted by recursion
if (!taint) {
if (pkx == NULL) {
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
} else if (ws != NULL) {
// we already had a key, so ret might (will) be in ws; remove it.
h_hashtable_del(ws, pkx);
}
ret->taint = false;
h_hashtable_put(g->first, pkx, ret);
}
return ret;
}
const HStringMap *h_first(size_t k, HCFGrammar *g, const HCFChoice *x)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_first_work(k, g, &ws, x);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
// helpers for h_first_seq, definitions below
static bool is_singleton_epsilon(const HStringMap *m);
static bool any_string_shorter(size_t k, const HStringMap *m);
// pointer to functions like h_first_seq
typedef const HStringMap *(*StringSetFun)(size_t, HCFGrammar *, HCFChoice **);
typedef const HStringMap *
(*StringSetFun)(size_t, HCFGrammar *, HHashTable **, HCFChoice **);
// helper for h_first_seq and h_follow
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail);
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
static const HStringMap *
h_first_seq_work(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
// shortcut: the first set of the empty sequence, for any k, is {""}
if (*s == NULL) {
......@@ -467,11 +604,11 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
HCFChoice *x = s[0];
HCFChoice **tail = s+1;
const HStringMap *first_x = h_first(k, g, x);
const HStringMap *first_x = h_first_work(k, g, pws, x);
// shortcut: if first_k(X) = {""}, just return first_k(tail)
if (is_singleton_epsilon(first_x)) {
return h_first_seq(k, g, tail);
return h_first_seq_work(k, g, pws, tail);
}
// shortcut: if no elements of first_k(X) have length <k, just return first_k(X)
......@@ -483,8 +620,25 @@ const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
HStringMap *ret = h_stringmap_new(g->arena);
// extend the elements of first_k(X) up to length k from tail
stringset_extend(g, ret, k, first_x, h_first_seq, tail);
ret->taint = stringset_extend(g, pws, ret, k, first_x, h_first_seq_work, tail);
return ret;
}
const HStringMap *h_first_seq(size_t k, HCFGrammar *g, HCFChoice **s)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_first_seq_work(k, g, &ws, s);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
......@@ -546,13 +700,25 @@ static void remove_all_shorter(size_t k, HStringMap *m)
}
// h_follow adapted to the signature of StringSetFun
static inline
const HStringMap *h_follow_(size_t k, HCFGrammar *g, HCFChoice **s)
static const HStringMap *
h_follow_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
assert(pws == NULL);
return h_follow(k, g, *s);
}
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
static const HStringMap *
h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x);
// h_follow_work adapted to the signature of StringSetFun
static const HStringMap *
h_follow_work_(size_t k, HCFGrammar *g, HHashTable **pws, HCFChoice **s)
{
return h_follow_work(k, g, pws, *s);
}
static const HStringMap *
h_follow_work(size_t k, HCFGrammar *g, HHashTable **pws, const HCFChoice *x)
{
// consider all occurances of X in g
// the follow set of X is the union of:
......@@ -564,28 +730,45 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
// { a b | a <- first_k(tail), b <- follow_l(A), l=k-|a| }
HStringMap *ret;
HHashTable *ws = *pws;
struct k_nt kx = {k,x};
struct k_nt *pkx;
bool taint = false;
// shortcut: follow_0(X) is always {""}
if (k==0) {
return g->singleton_epsilon;
}
// memoize via g->follow
ensure_k(g, k);
ret = h_hashtable_get(g->follow[k], x);
// check memoization and workset
ret = h_hashtable_get(g->follow, &kx);
if (ret == NULL && ws != NULL)
ret = h_hashtable_get(ws, &kx);
if (ret != NULL) {
return ret;
}
// not found, create result
ret = h_stringmap_new(g->arena);
assert(ret != NULL);
h_hashtable_put(g->follow[k], x, ret);
// to avoid recursive loops, taint ret and place it in workset
ret->taint = true;
if (ws == NULL)
ws = *pws = h_hashtable_new(g->arena, eq_k_nt, hash_k_nt);
pkx = h_arena_malloc(g->arena, sizeof kx);
*pkx = kx;
h_hashtable_put(ws, pkx, ret);
// if X is the start symbol, the end token is in its follow set
if (x == g->start) {
h_stringmap_put_end(ret, INSET);
}
// iterate over g->nts
// iterate over g->nts, looking for X
size_t i;
HHashTableEntry *hte;
int x_found=0;
for (i=0; i < g->nts->capacity; i++) {
for (hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
......@@ -600,19 +783,46 @@ const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
HCFChoice **s = (*p)->items; // production's right-hand side
for (; *s; s++) {
if (*s == x) { // occurance found
if (*s == x) { // occurrence found
x_found=1;
HCFChoice **tail = s+1;
const HStringMap *first_tail = h_first_seq(k, g, tail);
// extend the elems of first_k(tail) up to length k from follow(A)
stringset_extend(g, ret, k, first_tail, h_follow_, &a);
taint |= stringset_extend(g, pws, ret, k,
first_tail, h_follow_work_, &a);
}
}
}
}
}
assert(x_found || x == g->start); // no orphan non-terminals
// immediately memoize ret and remove it from ws if untainted by recursion
if (!taint) {
ret->taint = false;
h_hashtable_del(ws, pkx);
h_hashtable_put(g->follow, pkx, ret);
}
return ret;
}
const HStringMap *h_follow(size_t k, HCFGrammar *g, const HCFChoice *x)
{
HHashTable *ws, *bak;
const HStringMap *ret;
// fixpoint iteration on workset
ws = NULL;
do {
bak = ws;
ws = NULL;
ret = h_follow_work(k, g, &ws, x);
} while(!workset_equal(ws, bak));
assert(ret != NULL);
return ret;
}
......@@ -629,7 +839,7 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
// casting the const off of A below. note: stringset_extend does
// not touch this argument, only passes it through to h_follow
// in this case, which accepts it, once again, as const.
stringset_extend(g, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
stringset_extend(g, NULL, ret, k, first_rhs, h_follow_, (HCFChoice **)&A);
// make sure there are only strings of length _exactly_ k
remove_all_shorter(k, ret);
......@@ -638,13 +848,17 @@ HStringMap *h_predict(size_t k, HCFGrammar *g,
}
// add the set { a b | a <- as, b <- f_l(S), l=k-|a| } to ret
static void stringset_extend(HCFGrammar *g, HStringMap *ret,
static bool stringset_extend(HCFGrammar *g, HHashTable **pws, HStringMap *ret,
size_t k, const HStringMap *as,
StringSetFun f, HCFChoice **tail)
{
bool taint = false;
if (as->epsilon_branch) {
// for a="", add f_k(tail) to ret
h_stringmap_update(ret, f(k, g, tail));
const HStringMap *f_tail = f(k, g, pws, tail);
taint |= f_tail->taint;
h_stringmap_update(ret, f_tail);
}
if (as->end_branch) {
......@@ -671,9 +885,11 @@ static void stringset_extend(HCFGrammar *g, HStringMap *ret,
HStringMap *ret_ = h_stringmap_new(g->arena);
h_stringmap_put_after(ret, c, ret_);
stringset_extend(g, ret_, k-1, as_, f, tail);
taint |= stringset_extend(g, pws, ret_, k-1, as_, f, tail);
}
}
return taint;
}
......@@ -818,13 +1034,15 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
fputs(name, f);
i += strlen(name);
for(; i<column; i++) fputc(' ', f);
fputs(" ->", f);
assert(nt->type == HCF_CHOICE);
HCFSequence **p = nt->seq;
if (*p == NULL) {
return; // shouldn't happen
fputs(" -x\n", f); // empty choice, e.g. h_nothing_p()
return;
}
fputs(" ->", f);
pprint_sequence(f, g, *p++); // print first production on the same line
for(; *p; p++) { // print the rest below with "or" bars
for(i=0; i<column; i++) fputc(' ', f); // indent
......@@ -835,6 +1053,8 @@ static void pprint_ntrules(FILE *f, const HCFGrammar *g, const HCFChoice *nt,
void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
{
HAllocator *mm__ = g->mm__;
if (g->nts->used < 1) {
return;
}
......@@ -842,11 +1062,12 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
// determine maximum string length of symbol names
int len;
size_t s;
for(len=1, s=26; s < g->nts->used; len++, s*=26);
for(len=1, s=26; s < g->nts->used; len++, s*=26);
// iterate over g->nts
// iterate over g->nts and collect its entries in an ordered array
size_t i;
HHashTableEntry *hte;
const HCFChoice **arr = h_new(const HCFChoice *, g->nts->used);
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
......@@ -855,9 +1076,16 @@ void h_pprint_grammar(FILE *file, const HCFGrammar *g, int indent)
const HCFChoice *a = hte->key; // production's left-hand symbol
assert(a->type == HCF_CHOICE);
pprint_ntrules(file, g, a, indent, len);
size_t id = (uintptr_t)hte->value; // nonterminal id
assert(id < g->nts->used);
arr[id] = a;
}
}
// print rules in alphabetical order
for(i=0; i < g->nts->used; i++)
pprint_ntrules(file, g, arr[i], indent, len);
h_free(arr);
}
void h_pprint_symbolset(FILE *file, const HCFGrammar *g, const HHashSet *set, int indent)
......
......@@ -8,15 +8,15 @@ typedef struct HCFGrammar_ {
HHashSet *nts; // HCFChoices, each representing the alternative
// productions for one nonterminal
HHashSet *geneps; // set of NTs that can generate the empty string
HHashTable **first; // memoized first sets of the grammar's symbols
HHashTable **follow; // memoized follow sets of the grammar's NTs
size_t kmax; // maximum lookahead depth allocated
HHashTable *first; // memoized first sets of the grammar's symbols
HHashTable *follow; // memoized follow sets of the grammar's NTs
HArena *arena;
HAllocator *mm__;
// constant set containing only the empty string.
// this is only a member of HCFGrammar because it needs a pointer to arena.
// constant sets containing only the empty string or end symbol.
// these are only members of HCFGrammar because they need a pointer to arena.
const struct HStringMap_ *singleton_epsilon;
const struct HStringMap_ *singleton_end;
} HCFGrammar;
......@@ -37,6 +37,7 @@ typedef struct HStringMap_ {
void *end_branch; // points to leaf value
HHashTable *char_branches; // maps to inner nodes (HStringMaps)
HArena *arena;
bool taint; // for use by h_follow() and h_first()
} HStringMap;
HStringMap *h_stringmap_new(HArena *a);
......@@ -52,6 +53,7 @@ void *h_stringmap_get_lookahead(const HStringMap *m, HInputStream lookahead);
bool h_stringmap_present(const HStringMap *m, const uint8_t *str, size_t n, bool end);
bool h_stringmap_present_epsilon(const HStringMap *m);
bool h_stringmap_empty(const HStringMap *m);
bool h_stringmap_equal(const HStringMap *a, const HStringMap *b);
static inline HStringMap *h_stringmap_get_char(const HStringMap *m, const uint8_t c)
{ return h_hashtable_get(m->char_branches, (void *)char_key(c)); }
......