diff --git a/HACKING b/HACKING index d923217e8007694f45de5f99802321ac6f9b37c0..970a2491e9bcc03b39cc7e3fd18b74c2da136d0a 100644 --- a/HACKING +++ b/HACKING @@ -52,3 +52,11 @@ documented), assume that In other words, don't let the (memory manager) streams cross. +Language-independent test suite +=============================== + +There is a language-independent representation of the Hammer test +suite in `lib/test-suite`. This is intended to be used with the +tsparser.pl prolog library, along with a language-specific frontend. + +No language-specific frontends have been written yet. diff --git a/lib/test-suite b/lib/test-suite new file mode 100644 index 0000000000000000000000000000000000000000..207ece4054fabb0c31041e8c394cbe28b60c33ef --- /dev/null +++ b/lib/test-suite @@ -0,0 +1,312 @@ +# This file is to be processed with testgen.pl to produce test suites +# for bindings. The only escape code that is valid is \x + +# The only type that may change between bindings is "char", which gets +# converted into the most convenient representation of a character in +# a given language. In C, this is a TT_UINT; in P*, it's a +# single-character string. In this file, a character is wrapped with +# single quotes. + +# Strings may be given in C syntax, with no escapes other than \xHH or +# in hex syntax, like <31.32.33> for "123". + +# indirect/bind_indirect can be used via subparsers. A subparser is +# introduced with a "subparser $<name> = <definition()>;" command. +# First, all subparsers are declared with h_indirect(). Then, they +# are all bound. Inside a definition, all subparsers can be +# referenced by $<name>. + +token { + parser token("95\xa2"); + test "95\xa2" --> "95\xa2"; + test "95\xa2" --> fail; +} + +ch { + parser ch(0xA2); + test "\xa2" --> '\xa2'; + test "\xa3" --> fail; +} + +ch_range { + parser ch_range(0x61,0x63); + test "b" --> 'b'; + test "d" --> fail; +} + +int64 { + parser int64(); + test <ff.ff.ff.fe.00.00.00.00> --> s-0x200000000; + test <ff.ff.ff.fe.00.00.00> --> fail; +} + +int32 { + parser int32(); + test <ff.fe.00.00> --> s-0x20000; + test <ff.fe.00> --> fail; + test <00.02.00.00> --> s0x20000; + test <00.02.00> --> fail; +} + +int16 { + parser int16(); + test <fe.00> --> s-0x200; + test <fe> --> fail; + test <02.00> --> s0x200; + test <02> --> fail; +} + +int8 { + parser int8(); + test <88> --> s-0x78; + test <> --> fail; +} + +uint64 { + parser uint64(); + test <00.00.00.02.00.00.00.00> --> u0x200000000; + test <00.00.00.02.00.00.00> --> fail; +} + +uint32 { + parser uint32(); + test <00.02.00.00> --> u0x20000; + test <00.02.00> --> fail; +} + +uint16 { + parser uint16(); + test <02.00> --> u0x200; + test <02> --> fail; +} + +uint8 { + parser uint8(); + test <78> --> u0x78; + test <> --> fail; +} + +int_range { + parser int_range(uint8(), 0x3, 0x10); + test <05> --> u0x05; + test <0b> --> fail; +} + +whitespace { + parser whitespace(ch(0x61)); + test "a" --> 'a'; + test " a" --> 'a'; + test " a" --> 'a'; + test "\x09a" --> 'a'; + test "_a" --> fail; + + parser whitespace(end_p()); + test "" --> none; + test " " --> none; + test " x" --> fail; +} + +left { + parser left(ch(0x61), ch(0x20)); + test "a " --> 'a'; + test "a" --> fail; + test " " --> fail; + test "ba" --> fail; +} + +middle { + parser middle(ch(' '), ch('a'), ch(' ')); + test " a " --> 'a'; + test "a" --> fail; + test " a" --> fail; + test "a " --> fail; + test " b " --> fail; + test "ba " --> fail; + test " ab" --> fail; +} + +# TODO: action + +in { + parser in("abc"); + test "b" --> 'b'; + test "d" --> fail; +} + +not_in { + parser not_in("abc"); + test "d" --> 'd'; + test "a" --> fail; +} + +end_p { + parser sequence(ch('a'), end_p()); + test "a" --> ['a']; + test "aa" --> fail; +} + +nothing_p { + parser nothing_p(); + test "a" --> fail; +} + +sequence { + parser sequence(ch('a'), ch('b')); + test "ab" --> ['a', 'b']; + test "a" --> fail; + test "b" --> fail; + + parser sequence(ch('a'), whitespace(ch('b'))); + test "ab" --> ['a','b']; + test "a b" --> ['a','b']; + test "a b" --> ['a','b']; +} + +choice { + parser choice(ch('a'), ch('b')); + test "a" --> 'a'; + test "b" --> 'b'; + test "ab" --> 'a'; + test "c" --> fail; +} + +butnot { + parser butnot(ch('a'), token("ab")); + test "a" --> 'a'; + test "ab" --> fail; + test "aa" --> 'a'; + + parser butnot(ch_range('0','9'), ch('6')); + test "5" --> '5'; + test "6" --> fail; +} + +difference { + # Not sure this actually tests anything + parser difference(token("ab"), ch('a')); + test "ab" --> "ab"; + test "a" --> fail; +} + +xor { + parser xor(ch_range('0','6'), ch_range('5','9')); + test "0" --> '0'; + test "9" --> '9'; + test "5" --> fail; + test "a" --> fail; +} + +many { + # TODO: Some of the parsers in the original were commented out. + parser many(choice(ch('a'), ch('b'))); + test "" --> []; + test "a" --> ['a']; + test "b" --> ['b']; + test "aabbaba" --> ['a','a','b','b','a','b','a']; +} + +many1 { + # TODO: Some of the checks in the original were commented out. + parser many1(choice(ch('a'),ch('b'))); + test "" --> fail; + test "a" --> ['a']; + test "b" --> ['b']; + test "aabbaba" --> ['a','a','b','b','a','b','a']; + test "daabbabadef" --> fail; +} + +repeat-n { + parser repeat_n(choice(ch('a'),ch('b')),0x2); + test "adef" --> fail; + test "abdef" --> ['a','b']; + test "dabdef" --> fail; +} + +optional { + parser sequence(ch('a'), optional(choice(ch('b'),ch('c'))), ch('d')); + test "abd" --> ['a','b','d']; + test "acd" --> ['a','c','d']; + test "ad" --> ['a',none,'d']; + test "aed" --> fail; + test "ab" --> fail; + test "ac" --> fail; +} + +ignore { + parser sequence(ch('a'), ignore(ch('b')), ch('c')); + test "abc" --> ['a','c']; + test "ac" --> fail; +} + +sepBy { + parser sepBy(choice(ch('1'), ch('2'), ch('3')), ch(',')); + test "1,2,3" --> ['1','2','3']; + test "1,3,2" --> ['1','3','2']; + test "1,3" --> ['1','3']; + test "3" --> ['3']; + test "" --> []; +} + +sepBy1 { + parser sepBy1(choice(ch('1'), ch('2'), ch('3')), ch(',')); + test "1,2,3" --> ['1','2','3']; + test "1,3,2" --> ['1','3','2']; + test "1,3" --> ['1','3']; + test "3" --> ['3']; + test "" --> fail; +} + +# TODO: attr_bool; +and { + parser sequence(and(ch('0')), ch('0')); + test "0" --> ['0']; + test "1" --> fail; + parser sequence(and(ch('0')), ch('1')); + test "0" --> fail; + test "1" --> fail; + parser sequence(ch('1'), and(ch('2'))); + test "12" --> ['1']; + test "13" --> fail; +} + +not { + parser sequence(ch('a'), choice(token('+'), token("++")), ch('b')); + test "a+b" --> ['a',"+",'b']; + test "a++b" --> fail; + + parser sequence(ch('a'), choice(sequence(token('+'), not(ch('+'))), + token("++")), + ch('b')); + test "a+b" --> ['a', ["+"], 'b']; + test "a++b" --> ['a', "++", 'b']; +} + +leftrec { + subparser $lr = choice(sequence($lr, ch('a')), epsilon_p()); + parser $lr; + test "a" --> ['a']; + test "aa" --> [['a'],'a']; + test "aaa" --> [[['a'],'a'],'a']; +} + +rightrec { + subparser $rr = choice(sequence(ch('a'), $rr), epsilon_p()); + parser $rr; + test "a" --> ['a']; + test "aa" --> ['a',['a']]; + test "aaa" --> ['a',['a',['a']]]; +} + +ambiguous { + subparser $d = ch('d'); + subparser $p = ch('+'); + subparser $e = choice(sequence($e, $p, $e), $d); + # TODO: implement action/h_act_flatten + parser $e; + + test "d" --> 'd'; + test "d+d" --> ['d','+','d']; + test "d+d+d" --> [['d','+','d'],'+','d']; +} + + diff --git a/lib/testgen.pl b/lib/testgen.pl new file mode 100644 index 0000000000000000000000000000000000000000..c8ff7e0888f08ab0be5af405e70b934ab1b825f9 --- /dev/null +++ b/lib/testgen.pl @@ -0,0 +1,197 @@ +% -*- prolog -*- + +% test suite grammar +:- module(tsparser, + [read_tc/1]). +:- expects_dialect(swi). +:- use_module(library(pure_input)). + + +nl --> "\r", !. +nl --> "\n", !. +ws --> " ", !. +ws --> "\t", !. +ws --> nl, !. +nonws --> ws, !, {fail}. +nonws --> [_]. % any character other than whitespace +tilEOL --> nl, !. +tilEOL --> [_], tilEOL. +comment --> "#", tilEOL. +layout1 --> ws, !; comment. +layout --> layout1, !, layout. +layout --> []. +eof([],[]). + +idChar(Chr) --> + [Chr], + {memberchk(Chr, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")}. +idTailChar(Chr) --> idChar(Chr), !. +idTailChar(Chr) --> + [Chr], + {memberchk(Chr, "1234567890-_")}. + +id(Name) --> % toplevel lexical token + idChar(Chr), + idTail(Rest), + {atom_codes(Name, [Chr|Rest])}. +idTail([Chr|Rest]) --> + idTailChar(Chr), !, + idTail(Rest). +idTail([]) --> []. + +string(Str) --> "\"", qstring(Str), "\"", !. +string(Str) --> "<", hexstring(Str), ">". + +qchar(C) --> "\\x", !, hexbyte(C). +qchar(C) --> [C], {C >= 0x20, C < 0x7F}. +qstring([], [34|R], [34|R]) :- !. +qstring([C|Rest]) --> + qchar(C), qstring(Rest). +qstring([]). +hexstring([C|Rest]) --> + hexbyte(C), !, + hexstring_post(Rest). +hexstring([]) --> + []. +hexstring_post([C|Rest]) --> + ".", !, + hexbyte(C), + hexstring_post(Rest). +hexstring_post([]) --> []. + +hexchr(C) --> [C0], {memberchk(C0, "0123456789"), C is C0 - 0x30}, !. +hexchr(C) --> [C0], {memberchk(C0, "abcdef"), C is C0 - 0x61 + 10}, !. +hexchr(C) --> [C0], {memberchk(C0, "ABCDEF"), C is C0 - 0x41 + 10}, !. +hexbyte(C) --> + hexchr(C0), + hexchr(C1), + {C is C0 * 16 + C1}. +hexnum(A) --> + hexchr(C0), + hexnum(A, C0). +hexnum(A, Acc) --> + hexchr(C0), !, + {AccP is Acc * 16 + C0}, + hexnum(A, AccP). +hexnum(Acc, Acc) --> []. + + +parse_result_seq([A|Ar]) --> + parse_result(A), layout, + parse_result_seq_tail(Ar). +parse_result_seq([]) --> + layout. +parse_result_seq_tail([]) --> []. +parse_result_seq_tail([A|Ar]) --> + ",", layout, + parse_result(A), layout, + parse_result_seq_tail(Ar). + +parse_result(seq(A)) --> + "[", layout, + parse_result_seq(A), % eats trailing ws + "]", !. +parse_result(char(A)) --> + "'", qchar(A), "'", !. +parse_result(uint(A)) --> + "u0x", hexnum(A), !. +parse_result(sint(A)) --> + "s0x", hexnum(A), !. +parse_result(sint(A)) --> + "s-0x", hexnum(A0), {A is -A0}, !. +parse_result(string(A)) --> + string(A), !. +parse_result(none) --> "none". + +parser_arg(A) --> parser(A), !. +parser_arg(string(A)) --> string(A), !. +parser_arg(char(A)) --> "'", qchar(A), "'", !. +parser_arg(num(A)) --> "0x", hexnum(A). + +parser_arglist_tail([A0|As]) --> + % eats leading whitespace + ",", !, layout, + parser_arg(A0), layout, + parser_arglist_tail(As). +parser_arglist_tail([]) --> []. + +parser_arglist([]) --> []. +parser_arglist([A0|As]) --> + parser_arg(A0), + layout, + parser_arglist_tail(As). % eats trailing whitespace + + +parser(ref(Name)) --> + "$", !, id(Name). +parser(parser(Name, Args)) --> + id(Name), layout, + "(", layout, + parser_arglist(Args), % eats trailing whitespace + ")". + +testcase_elem(subparser(Name,Value)) --> + "subparser", layout1, layout, !, + "$", id(Name), layout, + "=", layout, + parser(Value), layout, + ";". +testcase_elem(parser(Value)) --> + "parser", layout1, layout, !, + parser(Value), layout, + ";". +testcase_elem(testFail(Input)) --> + "test", layout1, layout, + string(Input), layout, + "-->", layout, + "fail", layout, + ";", !. +testcase_elem(test(Input, Expected)) --> + "test", layout1, layout, + string(Input), layout, + "-->", layout, + parse_result(Expected), layout, + ";", !. + +testcase_body([Elem|Rest]) --> + testcase_elem(Elem), layout, !, + testcase_body(Rest). +testcase_body([]) --> []. + +testcase(testcase(Name,Content)) --> + id(Name), + layout, + "{", + layout, + testcase_body(Content), + layout, + "}". + +testcases([]) --> + layout, eof, !. +testcases([TC|Rest]) --> + layout, + testcase(TC), + testcases(Rest). +testsuite(A) --> + testcases(A). + + +%slurp(File, Lst) :- +% get_code(File, C), +% (C == -1 -> +% Lst = []; +% slurp(File, Rest), +% Lst = [C|Rest]), !. + +read_tc(A) :- + phrase_from_file(testsuite(A), 'test-suite'). + +%read_tc(A) :- +% read_tc('test-suite', A). +%read_tc(Name, A) :- +% open(Name, read, File), +% slurp(File, Contents), +% close(File), !, +% phrase(testsuite(A), Contents). +