diff --git a/.travis.yml b/.travis.yml index 2328d03275b8109b1756831e06a756df2b409ea4..e483b5ff3ab597448facff290a5ea595d65856b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,6 +6,30 @@ env: - BINDINGS=none matrix: include: + - compiler: gcc + language: ruby + rvm: ruby-1.9.3-p484 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-1.9.3-p484 + env: BINDINGS=ruby CC=clang + - compiler: gcc + language: ruby + rvm: ruby-2.0.0-p353 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-2.0.0-p353 + env: BINDINGS=ruby CC=clang + - compiler: gcc + language: ruby + rvm: ruby-2.1.0 + env: BINDINGS=ruby + - compiler: clang + language: ruby + rvm: ruby-2.1.0 + env: BINDINGS=ruby CC=clang - compiler: gcc language: python python: "2.7" @@ -66,7 +90,6 @@ before_install: - if [ "$BINDINGS" == "perl" ]; then sudo add-apt-repository ppa:dns/irc -y; sudo apt-get update -qq; sudo apt-get install -qq swig=2.0.8-1irc1~12.04; fi - if [ "$BINDINGS" == "python" ]; then sudo apt-get install -qq python-dev; fi - if [ "$BINDINGS" == "dotnet" ]; then sudo add-apt-repository ppa:directhex/monoxide -y; sudo apt-get update -qq; sudo apt-get install -qq mono-devel mono-mcs nunit nunit-console; mozroots --import --sync; fi - install: true before_script: - if [ "$BINDINGS" == "php" ]; then phpenv config-add src/bindings/php/hammer.ini; fi diff --git a/README.md b/README.md index 1e1dee9a10e599ca18f4b03f1adfb7ec386ba99c..4334e68a8d543b886794e3542688877f7f2da803 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Features * C++ (not yet implemented) * Java (not currently building; give us a few days) * Python - * Ruby (not yet implemented) + * Ruby * Perl * [Go](https://github.com/prevoty/hammer) * PHP @@ -39,6 +39,7 @@ Installing * python2.7-dev (for Python bindings) * a JDK (for Java bindings) * a working [phpenv](https://github.com/CHH/phpenv) configuration (for PHP bindings) +* Ruby >= 1.9.3 and bundler, for the Ruby bindings * mono-devel and mono-mcs (>= 3.0.6) (for .NET bindings) * nunit (for testing .NET bindings) diff --git a/SConstruct b/SConstruct index 17b1009e0f2121a07de9bc94da8d889e17d91e6f..fe1c78ef381cbe1b92d9c382299fe8fc65f7071b 100644 --- a/SConstruct +++ b/SConstruct @@ -7,7 +7,7 @@ import sys vars = Variables(None, ARGUMENTS) vars.Add(PathVariable('DESTDIR', "Root directory to install in (useful for packaging scripts)", None, PathVariable.PathIsDirCreate)) vars.Add(PathVariable('prefix', "Where to install in the FHS", "/usr/local", PathVariable.PathAccept)) -vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python'])) +vars.Add(ListVariable('bindings', 'Language bindings to build', 'none', ['dotnet', 'perl', 'php', 'python', 'ruby'])) env = Environment(ENV = {'PATH' : os.environ['PATH']}, variables = vars, diff --git a/lib/test-suite b/lib/test-suite index 6c15b3d60fe77e81ce0594e3f993726bb0f5f53f..7f00b8e36ad0b9d3d3eff2342fd72941f42f46e3 100644 --- a/lib/test-suite +++ b/lib/test-suite @@ -19,7 +19,7 @@ token { parser token("95\xa2"); test "95\xa2" --> "95\xa2"; - test "95\xa2" --> fail; + test "95\xa3" --> fail; } ch { @@ -87,7 +87,7 @@ uint8 { } int_range { - parser int_range(uint8(), 0x3, 0x10); + parser int_range(uint8(), 0x3, 0xa); test <05> --> u0x05; test <0b> --> fail; } @@ -299,17 +299,17 @@ rightrec { test "aa" --> ['a',['a']]; test "aaa" --> ['a',['a',['a']]]; } - -ambiguous { - subparser $d = ch('d'); - subparser $p = ch('+'); - subparser $e = choice(sequence($e, $p, $e), $d); - # TODO: implement action/h_act_flatten - parser $e; - - test "d" --> 'd'; - test "d+d" --> ['d','+','d']; - test "d+d+d" --> [['d','+','d'],'+','d']; -} +## Only for GLR +#ambiguous { +# subparser $d = ch('d'); +# subparser $p = ch('+'); +# subparser $e = choice(sequence($e, $p, $e), $d); +# # TODO: implement action/h_act_flatten +# parser $e; +# +# test "d" --> 'd'; +# test "d+d" --> ['d','+','d']; +# test "d+d+d" --> [['d','+','d'],'+','d']; +#} diff --git a/lib/tsgenruby.pl b/lib/tsgenruby.pl new file mode 100644 index 0000000000000000000000000000000000000000..d866eeed3c3b583e65365573e545441277db84b7 --- /dev/null +++ b/lib/tsgenruby.pl @@ -0,0 +1,259 @@ +% -*- prolog -*- +% Run with: +% $ swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl >output-file +% Note: this needs to be run from the lib/ directory. + +% So, from the ruby directory +% (cd ../../../lib && swipl -q -t halt -g tsgenruby:prolog tsgenruby.pl ) >test/autogen_test.rb + + + +:- module(tsgenruby, + [gen_ts/2]). + +:- expects_dialect(swi). +:- use_module(tsparser). +:- use_module(library(record)). + +:- record testsuite_state(parser_no:integer = 0, test_no:integer=0). +% TODO: build a Box-like pretty-printer + +to_title_case([], []) :- !. +to_title_case([WSep,S0|Ss], [R0|Rs]) :- + memberchk(WSep, "_-"), !, + code_type(R0, to_upper(S0)), + to_title_case(Ss,Rs). +to_title_case([S0|Ss], [S0|Rs]) :- + \+ memberchk(S0, "_-"), + !, to_title_case(Ss,Rs). + +format_parser_name(Name, Result) :- + atom_codes(Name, CName), + append("h.", CName, Result), !. + +format_test_name(Name, Result) :- + atom_codes(Name, CName), + to_title_case([0x5f|CName], RName), + append("Test", RName, Result), !. + +indent(0) --> "", !. +indent(N) --> + {N > 0}, + " ", + {Np is N - 1}, + indent(Np). + +pp_char_guts(0x22) --> + "\\\"", !. +pp_char_guts(0x27) --> + "\\'", !. +pp_char_guts(A) --> + { A >= 0x20, A < 0x7F } -> + [A]; + "\\x", + { H is A >> 4, L is A /\ 0xF, + code_type(Hc, xdigit(H)), + code_type(Lc, xdigit(L)) }, + [Hc,Lc]. + +pp_hexnum_guts(0) --> !. +pp_hexnum_guts(A) --> + { L is A /\ 0xF, + H is A >> 4, + code_type(Lc, xdigit(L)) }, + pp_hexnum_guts(H), + [Lc], !. +pp_string_guts([]) --> !. +pp_string_guts([X|Xs]) --> + pp_char_guts(X), + pp_string_guts(Xs), !. + +pp_parser_args([]) --> !. +pp_parser_args([X|Rest]) --> + pp_parser(X), + pp_parser_args_rest(Rest). +pp_parser_args_rest([]) --> !. +pp_parser_args_rest([X|Xs]) --> + ", ", + pp_parser(X), + pp_parser_args_rest(Xs). + +pp_parser(parser(Name, Args)) --> + !, + {format_parser_name(Name,Fname)}, + Fname, + ({Args \= []} -> + + "(", pp_parser_args(Args), ")" + ; "") . +pp_parser(string(Str)) --> !, + "\"", + pp_string_guts(Str), + "\"", !. +pp_parser(num(0)) --> "0", !. +pp_parser(num(Num)) --> !, + ( {Num < 0} -> + "-0x", {RNum is -Num}; "0x", {RNum = Num} ), + pp_hexnum_guts(RNum). +pp_parser(char(C)) --> !, + pp_parser(num(C)), ".chr". % Ruby is encoding-aware; this is a + % more reasonable implementation + +pp_parser(ref(Name)) --> + {atom_codes(Name,CName)}, + "@sp_", CName, !. + + +pp_parser(A) --> + { writef("WTF is a %w?\n", [A]), + !, fail + }. + +upd_state_test_elem(parser(_), OldSt, NewSt) :- !, + testsuite_state_parser_no(OldSt, OldRNo), + NewRNo is OldRNo + 1, + set_parser_no_of_testsuite_state(NewRNo, OldSt, NewSt). +upd_state_test_elem(test(_, _), OldSt, NewSt) :- !, + testsuite_state_test_no(OldSt, OldTNo), + NewTNo is OldTNo + 1, + set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt). +upd_state_test_elem(testFail(_), OldSt, NewSt) :- !, + testsuite_state_test_no(OldSt, OldTNo), + NewTNo is OldTNo + 1, + set_test_no_of_testsuite_state(NewTNo, OldSt, NewSt). +upd_state_test_elem(_, St, St). + +curparser_name(St) --> !, + { testsuite_state_parser_no(St, RNo), + format(string(X), "@parser_~w", RNo) }, + X. +curtest_name(St) --> !, + { testsuite_state_test_no(St, RNo), + format(string(X), "test_~w", RNo) }, + X. + +pp_test_elem(decl, parser(_), _) --> !. +pp_test_elem(init, parser(P), St) --> + !, indent(2), + curparser_name(St), " = ", + pp_parser(P), + "\n". +pp_test_elem(exec, parser(_), _) --> !. +pp_test_elem(decl, subparser(Name,_), _) --> + !, indent(2), + pp_parser(ref(Name)), + " = ", + pp_parser(parser(indirect,[])), + "\n". +pp_test_elem(init, subparser(Name, Parser), _) --> + !, indent(2), + pp_parser(ref(Name)), ".bind ", + pp_parser(Parser), + "\n". +pp_test_elem(exec, subparser(_,_), _) --> !. +pp_test_elem(decl, test(_,_), _) --> !. +pp_test_elem(init, test(_,_), _) --> !. +pp_test_elem(decl, testFail(_), _) --> !. +pp_test_elem(init, testFail(_), _) --> !. +pp_test_elem(exec, test(Str, Result), St) --> + !, + "\n", + indent(1), "def ", curtest_name(St), "\n", + indent(2), "assert_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), + ", ", + pp_parse_result(Result), + "\n", + indent(1), "end\n". +pp_test_elem(exec, testFail(Str), St) --> + !, + "\n", + indent(1), "def ", curtest_name(St), "\n", + indent(2), "refute_parse_ok ", curparser_name(St), ", ", pp_parser(string(Str)), "\n", + indent(1), "end\n". + +% pp_test_elem(_, _) --> !. + +pp_result_seq([]) --> !. +pp_result_seq([X|Xs]) --> !, + pp_parse_result(X), + pp_result_seq_r(Xs). +pp_result_seq_r([]) --> !. +pp_result_seq_r([X|Xs]) --> !, + ", ", + pp_parse_result(X), + pp_result_seq_r(Xs). + +pp_byte_seq([]) --> !. +pp_byte_seq([X|Xs]) --> !, + pp_parser(num(X)), + pp_byte_seq_r(Xs). +pp_byte_seq_r([]) --> !. +pp_byte_seq_r([X|Xs]) --> !, + ", ", + pp_parser(num(X)), + pp_byte_seq_r(Xs). + +pp_parse_result(char(C)) --> !, + %"(System.UInt64)", + pp_parser(char(C)). +pp_parse_result(seq(Args)) --> !, + "[", pp_result_seq(Args), "]". +pp_parse_result(none) --> !, + "nil". +pp_parse_result(uint(V)) --> !, + pp_parser(num(V)). +pp_parse_result(sint(V)) --> !, + pp_parser(num(V)). +pp_parse_result(string(A)) --> !, + pp_parser(string(A)). + +%pp_parse_result(A) --> +% "\x1b[1;31m", +% {with_output_to(codes(C), write(A))}, +% C, +% "\x1b[0m". + + +pp_test_elems(Phase, Elems) --> + { default_testsuite_state(State) }, + pp_test_elems(Phase, Elems, State). +pp_test_elems(_, [], _) --> !. +pp_test_elems(Phase, [X|Xs], St) --> + !, + { upd_state_test_elem(X, St, NewSt) }, + %{NewSt = St}, + pp_test_elem(Phase,X, NewSt), + pp_test_elems(Phase,Xs, NewSt). + +pp_test_case(testcase(Name, Elems)) --> + !, + { format_test_name(Name, TName) }, + indent(0), "class ", TName, " < Minitest::Test\n", + indent(1), "def setup\n", + indent(2), "super\n", + indent(2), "h = Hammer::Parser\n", + pp_test_elems(decl, Elems), + pp_test_elems(init, Elems), + indent(1), "end\n", + pp_test_elems(exec, Elems), + indent(0), "end\n\n". + + +pp_test_cases([]) --> !. +pp_test_cases([A|As]) --> + pp_test_case(A), + pp_test_cases(As). + +pp_test_suite(Suite) --> + "require 'bundler/setup'\n", + "require 'minitest/autorun'\n", + "require 'hammer'\n", + pp_test_cases(Suite). + +gen_ts(Foo,Str) :- + phrase(pp_test_suite(Foo),Str). + +prolog :- + read_tc(A), + gen_ts(A, Res), + writef("%s", [Res]). diff --git a/lib/testgen.pl b/lib/tsparser.pl similarity index 100% rename from lib/testgen.pl rename to lib/tsparser.pl diff --git a/src/bindings/ruby/.gitignore b/src/bindings/ruby/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4ceda1bade5fb0612afed8357cd41af0e2b07f2b --- /dev/null +++ b/src/bindings/ruby/.gitignore @@ -0,0 +1,2 @@ +/Gemfile.lock +.bundle diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile new file mode 100644 index 0000000000000000000000000000000000000000..c5029fc93d47c9d9ba5ecf8022cd547e7e99fb35 --- /dev/null +++ b/src/bindings/ruby/Gemfile @@ -0,0 +1,8 @@ +source 'https://rubygems.org' + +gemspec + +group :test do + gem 'minitest', '~> 5.2' + gem 'rake', '>10' +end diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ae29459f309a7e8121a199200e50a6572b10c5d6 --- /dev/null +++ b/src/bindings/ruby/README.md @@ -0,0 +1,76 @@ +# hammer-parser + +Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library. + + +## Notes + +* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer). + + +## Development + +1. `cd src/bindings/ruby`. + +2. Run `bundle install` to install dependencies. + +3. Run `bundle console` to open `irb` with hammer loaded. + +4. To run tests, just run `bundle exec rake test`. + + +## Installation + +TODO + + + +## Examples + +### Building a parser + +```ruby +parser = Hammer::Parser.build { + token 'Hello ' + choice { + token 'Mom' + token 'Dad' + } + token '!' +} +``` + +Also possible: + +```ruby +parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) + .token('!') + .build +``` + +More like hammer in C: + +```ruby +h = Hammer::Parser +parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +``` + +### Parsing + +```ruby +result = parser.parse 'Hello Mom!' +=> #<HParseResult> +result = parser.parse 'Hello Someone!' +=> nil +``` + +The `parse` method returns an `HParseResult` object, which needs to be +kept around until you're entirely done with the parse tree, which can +be accessed with `result.ast`. + +While the AST can be accessed using the same interface as the C +HParsedToken type, we recommend using `result.ast.unmarshal` instead. +This converts the entire parse tree into a standalone Ruby-native +datastructure which will likely be much easier to work with. diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile new file mode 100644 index 0000000000000000000000000000000000000000..c73847038dbb1c07cdaec20738a4acb3008bdafd --- /dev/null +++ b/src/bindings/ruby/Rakefile @@ -0,0 +1,8 @@ +require 'rake/testtask' + +Rake::TestTask.new do |t| + #t.pattern = "test/*_test.rb" + t.test_files = FileList['test/*_test.rb'] +end + +task :default => [:test] diff --git a/src/bindings/ruby/SConscript b/src/bindings/ruby/SConscript new file mode 100644 index 0000000000000000000000000000000000000000..6d85a9329d033cf1d247163f6dd3d078fc08746c --- /dev/null +++ b/src/bindings/ruby/SConscript @@ -0,0 +1,33 @@ +# -*- python -*- +import os.path +Import("env libhammer_shared testruns targets") + +rubysources = [ + Glob("test/*.rb"), + Glob("lib/hammer.rb"), + Glob("lib/*/*.rb"), + "hammer-parser.gemspec", + "Rakefile", + "Gemfile", + "README.md", +] + +rubyenv = env.Clone() + +for k,v in os.environ.items(): + if "RUBY" in k or "GEM" in k or "rvm" in k: + rubyenv['ENV'][k] = v + +rubyenv['ENV']['LD_LIBRARY_PATH'] = os.path.dirname(str(libhammer_shared[0])) +rubyenv['RBDIR'] = os.path.dirname(str(rubyenv.File("Gemfile").path)) + +setup = rubyenv.Command(Dir(".bundle"), rubysources, "cd $RBDIR && bundle install") +AlwaysBuild(setup) + +rubytestexec = rubyenv.Command(None, [setup] + rubysources, "cd $RBDIR && bundle exec rake test") + +rubytest = Alias("testruby", [rubytestexec], rubytestexec) +AlwaysBuild(rubytestexec) +testruns.append(rubytest) + +# No need for an install target; everybody just uses gems for that. diff --git a/src/bindings/ruby/hammer-parser.gemspec b/src/bindings/ruby/hammer-parser.gemspec new file mode 100644 index 0000000000000000000000000000000000000000..18b4db738ad9325526dbdca381d0acf93f112d3c --- /dev/null +++ b/src/bindings/ruby/hammer-parser.gemspec @@ -0,0 +1,31 @@ +#encoding: UTF-8 +Gem::Specification.new do |s| + s.name = 'hammer-parser' + s.version = '0.1.0' + s.summary = 'Ruby bindings to the hammer parsing library.' + s.description = s.summary # TODO: longer description? + s.authors = ['Meredith L. Patterson', 'TQ Hirsch', 'Jakob Rath'] + # TODO: + # s.email = ... + # s.homepage = ... + + files = [] + files << 'README.md' + files << [ + "lib/hammer/internal.rb", + "lib/hammer/parser.rb", + "lib/hammer/parser_builder.rb", + "lib/hammer.rb", + "lib/minitest/hamer-parser_plugin.rb", + "test/autogen_test.rb", + "test/parser_test.rb" + ] + s.files = files + s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ } + + s.require_paths = %w[lib] + + s.add_dependency 'ffi', '~> 1.9' + s.add_dependency 'docile', '~> 1.1' # TODO: Find a way to make this optional +end + diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb new file mode 100644 index 0000000000000000000000000000000000000000..916a0a505cad5cb148ccb20f86546f05bdb9b119 --- /dev/null +++ b/src/bindings/ruby/lib/hammer.rb @@ -0,0 +1,52 @@ +require 'hammer/internal' +require 'hammer/parser' +require 'hammer/parser_builder' + +# TODO: +# Probably need to rename this file to 'hammer-parser.rb', so +# people can use "require 'hammer-parser'" in their code. + + +# Leave this in for now to be able to play around with HParseResult in irb. +x = nil +parser = Hammer::Parser.build { + token 'abc' + x = indirect + end_p +} +x.bind(Hammer::Parser.token('abd')) + +#$p = parser +$r = parser.parse 'abcabd' + +#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:bytes].token } + + +h = Hammer::Parser +parser = + h.many( + h.action(h.uint8) { |r| + #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" + r.data * 2 + }) + +#parser = Hammer::Parser.build { +# many { +# uint8 +# action { |r| +# p r +# r[:ast] +# } +# } +#} + +$r = parser.parse 'abcdefgh' + +#p $r[:ast][:data][:seq].elements.map {|e| e[:data][:uint]} +# or: +#p $r.ast.data.map(&:data) + + +h = Hammer::Parser +parser = h.many(h.attr_bool(h.uint8) { |r| r.data <= 100 }) +#p parser.parse('abcdefgh').ast.data.map(&:data) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb new file mode 100644 index 0000000000000000000000000000000000000000..bac3b3edb8cdbd44732ec0f69caafebf1258e1b0 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -0,0 +1,346 @@ +require 'ffi' + +module Hammer + module Internal + extend FFI::Library + + ffi_lib 'hammer' + + class DynamicVariable + SYMBOL_PREFIX = "Hammer::Internal::DynamicVariable gensym " + @@current_symbol = 0 + + def initialize(default=nil, name=nil, &block) + # This can take either a default value or a block. If a + # default value is given, all threads' dynvars are initialized + # to that object. If a block is given, the block is lazilly + # called on each thread to generate the initial value. If + # both a block and a default value are passed, the block is + # called with the literal value. + @default = default + @block = block || Proc.new{|x| x} + @@current_symbol += 1 + @sym = (SYMBOL_PREFIX + @@current_symbol.to_s).to_sym + end + + def value + if Thread.current.key? @sym + return Thread.current[@sym] + else + return Thread.current[@sym] = @block.call(@default) + end + end + + def value=(new_value) + Thread.current[@sym] = new_value + end + + def with(new_value, &block) + old_value = value + begin + self.value = new_value + return block.call + ensure + self.value = old_value + end + end + end + + # Maybe we can implement Hammer::Parser with FFI::DataConverter. + # That way, most hammer functions won't need to be wrapped. + # (Probably need to wrap token, sequence and choice only). + # See http://www.elabs.se/blog/61-advanced-topics-in-ruby-ffi + typedef :pointer, :h_parser + + class HTokenType + extend FFI::DataConverter + + @@known_type_map = { + :none => 1, + :bytes => 2, + :sint => 4, + :uint => 8, + :sequence => 16, + } + + @@inverse_type_map = @@known_type_map.invert + + @@from_hpt = { + :none => Proc.new { nil }, + :bytes => Proc.new {|hpt| hpt[:data][:bytes].token}, + :sint => Proc.new {|hpt| hpt[:data][:sint]}, + :uint => Proc.new {|hpt| hpt[:data][:uint]}, + :sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}}, + } + + def self.new(name, &block) + if name.is_a?(Symbol) + name_sym = name + name_str = name.to_s + else + name_str = name.to_s + name_sym = name.to_sym + end + num = Hammer::Internal.h_allocate_token_type(name_str) + @@known_type_map[name_sym] = num + @@inverse_type_map[num] = name_sym + @@from_hpt[name_sym] = block + end + + def self.from_name(name) + unless @@known_type_map.key? name + num = Hammer::Internal.h_get_token_type_number(name.to_s) + if num <= 0 + raise ArgumentError, "Unknown token type #{name}" + end + @@known_type_map[name] = num + @@inverse_type_map[num] = name + end + return @@known_type_map[name] + end + + def self.from_num(num) + unless @@inverse_type_map.key? num + name = Hammer::Internal.h_get_token_type_name(num) + if name.nil? + return nil + end + name = name.to_sym + @@known_type_map[name] = num + @@inverse_type_map[num] = name + end + return @@inverse_type_map[num] + end + + def self.native_type + FFI::Type::INT + end + + def self.to_native(val, ctx) + return val if val.is_a?(Integer) + return from_name(val) + end + + def self.from_native(val, ctx) + return from_num(val) || val + end + end + + # Define these as soon as possible, so that they can be used + # without fear elsewhere + attach_function :h_allocate_token_type, [:string], :int + attach_function :h_get_token_type_number, [:string], :int + attach_function :h_get_token_type_name, [:int], :string + + class HCountedArray < FFI::Struct + layout :capacity, :size_t, + :used, :size_t, + :arena, :pointer, + :elements, :pointer # HParsedToken** + + def length + self[:used] + end + + def elements + elem_array = FFI::Pointer.new(:pointer, self[:elements]) + return (0...self[:used]).map { |i| HParsedToken.new(elem_array[i].read_pointer) } + end + + #def [](idx) + # raise ArgumentError, "Index out of range" unless idx >= 0 and idx < length + # elem_array = FFI::Pointer.new(:pointer, self[:elements]) + # return HParsedToken.new(elem_array[i].read_pointer) + #end + + def map(&code) + elements.map {|x| code.call x} + end + def each(&code) + elements.each {|x| code.call x} + end + end + + class HBytes < FFI::Struct + layout :token, :pointer, # uint8_t* + :len, :size_t + + def token + # TODO: Encoding? + # Should be the same encoding as the string the token was created with. + # But how do we get to this knowledge at this point? + # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). + self[:token].read_string(self[:len]) + end + + # TODO: Probably should rename this to match ruby conventions: length, count, size + def len + self[:len] + end + end + + class HString < FFI::Struct + layout :content, HBytes.by_ref, + :encoding, :uint64 + def token + return self[:content].token.force_encoding( + ObjectSpace._id2ref(self[:encoding])) + end + end + + HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt| + hpt.user(HString).token + } + HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt| + ObjectSpace._id2ref(hpt[:data][:uint]) + } + + class HParsedTokenDataUnion < FFI::Union + layout :bytes, HBytes.by_value, + :sint, :int64, + :uint, :uint64, + :dbl, :double, + :flt, :float, + :seq, HCountedArray.by_ref, + :user, :pointer + end + + class HParsedToken < FFI::Struct + layout :token_type, HTokenType, + :data, HParsedTokenDataUnion.by_value, + :index, :size_t, + :bit_offset, :char + + def normalize + # If I'm null, return nil. + return nil if null? + return self + end + + def token_type + self[:token_type] + end + + # TODO: Is this name ok? + def data + return self[:data][:bytes].token if token_type == :bytes + return self[:data][:sint] if token_type == :sint + return self[:data][:uint] if token_type == :uint + return self[:data][:seq].elements if token_type == :sequence + return self[:data][:user] if token_type == :user + end + + def bytes + raise ArgumentError, 'wrong token type' unless token_type == :bytes + self[:data][:bytes] + end + + def seq + raise ArgumentError, 'wrong token type' unless token_type == :sequence + self[:data][:seq] + end + + def index + self[:index] + end + + def bit_offset + self[:bit_offset] + end + + def user(struct) + struct.by_ref.from_native(self[:data][:user], nil) + end + + def unmarshal + Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self + end + end + + class HParseResult < FFI::Struct + layout :ast, HParsedToken.by_ref, + :bit_length, :long_long, + :arena, :pointer + + def ast + self[:ast].normalize + end + + def bit_length + self[:bit_length] + end + + def self.release(ptr) + Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? + end + + def arena_alloc(type) + Hammer::Internal.arena_alloc(self[:arena], type) + end + end + + def self.arena_alloc(arena, type) + ptr = h_arena_malloc(arena, type.size) + return type.by_ref.from_native(ptr, nil) + end + + # run a parser + attach_function :h_parse, [:h_parser, :pointer, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? + + # build a parser + attach_function :h_token, [:buffer_in, :size_t], :h_parser + attach_function :h_ch, [:uint8], :h_parser + attach_function :h_ch_range, [:uint8, :uint8], :h_parser + attach_function :h_int_range, [:h_parser, :int64, :int64], :h_parser + attach_function :h_bits, [:size_t, :bool], :h_parser + attach_function :h_int64, [], :h_parser + attach_function :h_int32, [], :h_parser + attach_function :h_int16, [], :h_parser + attach_function :h_int8, [], :h_parser + attach_function :h_uint64, [], :h_parser + attach_function :h_uint32, [], :h_parser + attach_function :h_uint16, [], :h_parser + attach_function :h_uint8, [], :h_parser + attach_function :h_whitespace, [:h_parser], :h_parser + attach_function :h_left, [:h_parser, :h_parser], :h_parser + attach_function :h_right, [:h_parser, :h_parser], :h_parser + attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser + attach_function :h_in, [:pointer, :size_t], :h_parser + attach_function :h_not_in, [:pointer, :size_t], :h_parser + attach_function :h_end_p, [], :h_parser + attach_function :h_nothing_p, [], :h_parser + attach_function :h_sequence, [:varargs], :h_parser + attach_function :h_choice, [:varargs], :h_parser + attach_function :h_butnot, [:h_parser, :h_parser], :h_parser + attach_function :h_difference, [:h_parser, :h_parser], :h_parser + attach_function :h_xor, [:h_parser, :h_parser], :h_parser + attach_function :h_many, [:h_parser], :h_parser + attach_function :h_many1, [:h_parser], :h_parser + attach_function :h_repeat_n, [:h_parser, :size_t], :h_parser + attach_function :h_optional, [:h_parser], :h_parser + attach_function :h_ignore, [:h_parser], :h_parser + attach_function :h_sepBy, [:h_parser, :h_parser], :h_parser + attach_function :h_sepBy1, [:h_parser, :h_parser], :h_parser + attach_function :h_epsilon_p, [], :h_parser + attach_function :h_length_value, [:h_parser, :h_parser], :h_parser + attach_function :h_and, [:h_parser], :h_parser + attach_function :h_not, [:h_parser], :h_parser + + attach_function :h_indirect, [], :h_parser + attach_function :h_bind_indirect, [:h_parser, :h_parser], :void + + callback :HAction, [HParseResult.by_ref], HParsedToken.by_ref + attach_function :h_action, [:h_parser, :HAction], :h_parser + + callback :HPredicate, [HParseResult.by_ref], :bool + attach_function :h_attr_bool, [:h_parser, :HPredicate], :h_parser + + # free the parse result + attach_function :h_parse_result_free, [HParseResult.by_ref], :void + + # TODO: Does the HParser* need to be freed? + + # Add the arena + attach_function :h_arena_malloc, [:pointer, :size_t], :pointer + end +end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb new file mode 100644 index 0000000000000000000000000000000000000000..d1177c576e7ba811f1bc75d7001747b1fcb23481 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -0,0 +1,222 @@ +require 'hammer/internal' + +module Hammer + class Parser + + @@saved_objects = Hammer::Internal::DynamicVariable.new nil, "Hammer parse-time pins" + + # Don't create new instances with Hammer::Parser.new, + # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) + # + # name: Name of the parser. Should be a symbol. + # h_parser: The pointer to the parser as returned by hammer. + # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives). + def initialize(name, h_parser, dont_gc=[]) + @name = name + @h_parser = h_parser + # Always store as array, so we can easily add stuff later on + dont_gc = [dont_gc] unless dont_gc.is_a? Array + @dont_gc = dont_gc.dup + end + + attr_reader :name + attr_reader :h_parser + + # Parse the given data. Returns the parse result if successful, nil otherwise. + # + # data: A string containing the data to parse. + def parse(data) + raise RuntimeError, '@h_parser is nil' if @h_parser.nil? + raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. + + ibuf = FFI::MemoryPointer.from_string(data) + @@saved_objects.with([]) do + result = Hammer::Internal.h_parse(@h_parser, ibuf, data.bytesize) # Don't include the trailing null + if result.null? + return nil + else + # NOTE: + # The parse result *must* hold a reference to the parser that created it! + # Otherwise, the parser might get garbage-collected while the result is still valid. + # Any pointers to token strings will then be invalid. + result.instance_variable_set :@parser, self + result.instance_variable_set :@pins, @@saved_objects.value + return result + end + end + end + + # Binds an indirect parser. + def bind(other_parser) + raise RuntimeError, 'can only bind indirect parsers' unless self.name == :indirect + Hammer::Internal.h_bind_indirect(self.h_parser, other_parser.h_parser) + @dont_gc << other_parser + end + + # Can pass the action either as a Proc in second parameter, or as block. + def self.action(parser, action=nil, &block) + action = block if action.nil? + raise ArgumentError, 'no action' if action.nil? + + real_action = Proc.new {|hpr| + ret = action.call(hpr.ast) + # Pin the result + @@saved_objects.value << ret + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + unless hpr.ast.nil? + hpt[:index] = hpr[:ast][:index] + hpt[:bit_offset] = hpr[:ast][:bit_offset] + end + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.object" + hpt[:data][:uint] = ret.object_id + hpt + } + + h_parser = Hammer::Internal.h_action(parser.h_parser, real_action) + return Hammer::Parser.new(:action, h_parser, [parser, action, real_action]) + end + + # Can pass the predicate either as a Proc in second parameter, or as block. + def self.attr_bool(parser, predicate=nil, &block) + predicate = block if predicate.nil? + raise ArgumentError, 'no predicate' if predicate.nil? + + real_pred = Proc.new {|hpr| predicate.call hpr.ast} + + h_parser = Hammer::Internal.h_attr_bool(parser.h_parser, real_pred) + return Hammer::Parser.new(:attr_bool, h_parser, [parser, predicate, real_pred]) + end + + def self.token(string) + # Need to copy string to a memory buffer (not just string.dup) + # * Original string might be modified, this must not affect existing tokens + # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) + buffer = FFI::MemoryPointer.from_string(string) + h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end + encoding = string.encoding + + wrapping_action = Proc.new {|hpr| + hstr = hpr.arena_alloc(Hammer::Internal::HString) + hstr[:content] = hpr[:ast][:data][:bytes] + hstr[:encoding] = encoding.object_id + + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.encodedStr" + hpt[:data][:user] = hstr.to_ptr + hpt[:bit_offset] = hpr[:ast][:bit_offset] + hpt[:index] = hpr[:ast][:index] + hpt + } + wrapped_parser = Hammer::Internal.h_action(h_parser, wrapping_action) + return Hammer::Parser.new(:token, wrapped_parser, [buffer, string, encoding, wrapping_action, h_parser]) + end + + def self.marshal_ch_arg(num) + if num.is_a?(String) + raise ArgumentError, "Expecting either a fixnum in 0..255 or a single-byte String" unless num.bytesize == 1 + num = num.bytes.first + end + raise ArgumentError, 'Expecting a Fixnum in 0..255 or a single-byte String' unless num.is_a?(Fixnum) and num.between?(0, 255) + return num + end + private_class_method :marshal_ch_arg + + def self.ch_parser_wrapper(parser) + return Hammer::Parser.action(parser) {|x| x.data.chr} + end + + def self.ch(ch) + num = marshal_ch_arg(ch) + h_parser = Hammer::Internal.h_ch(num) + + return ch_parser_wrapper(Hammer::Parser.new(:ch, h_parser, nil)) + end + + def self.ch_range(ch1, ch2) + ch1 = marshal_ch_arg(ch1) + ch2 = marshal_ch_arg(ch2) + h_parser = Hammer::Internal.h_ch_range(ch1, ch2) + return ch_parser_wrapper(Hammer::Parser.new(:ch_range, h_parser, nil)) + end + + def self.int_range(parser, i1, i2) + h_parser = Hammer::Internal.h_int_range(parser.h_parser, i1, i2) + return Hammer::Parser.new(:int_range, h_parser, [parser]) + end + + def self.in(charset) + raise ArgumentError, "Expected a String" unless charset.is_a?(String) + ibuf = FFI::MemoryPointer.from_string(charset) + h_parser = Hammer::Internal.h_in(ibuf, charset.bytesize) + return ch_parser_wrapper(Hammer::Parser.new(:in, h_parser, nil)) + end + + def self.repeat_n(parser, count) + h_parser = Hammer::Internal.h_repeat_n(parser.h_parser, count) + return Hammer::Parser.new(:repeat_n, h_parser, nil) + end + + def self.not_in(charset) + raise ArgumentError, "Expected a String" unless charset.is_a?(String) + ibuf = FFI::MemoryPointer.from_string(charset) + h_parser = Hammer::Internal.h_not_in(ibuf, charset.bytesize) + return ch_parser_wrapper(Hammer::Parser.new(:not_in, h_parser, nil)) + end + + # Defines a parser constructor with the given name. + # Options: + # hammer_function: name of the hammer function to call (default: 'h_'+name) + # varargs: Whether the function is taking a variable number of arguments (default: false) + def self.define_parser(name, options = {}) + hammer_function = options[:hammer_function] || ('h_' + name.to_s).to_sym + varargs = options[:varargs] || false + + # Define a new class method + define_singleton_method name do |*parsers| + if varargs + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + args += [:pointer, nil] + else + args = parsers.map(&:h_parser) + end + h_parser = Hammer::Internal.send hammer_function, *args + + return Hammer::Parser.new(name, h_parser, parsers) + end + end + private_class_method :define_parser + + define_parser :sequence, varargs: true + define_parser :choice, varargs: true + + define_parser :int64 + define_parser :int32 + define_parser :int16 + define_parser :int8 + define_parser :uint64 + define_parser :uint32 + define_parser :uint16 + define_parser :uint8 + define_parser :whitespace + define_parser :left + define_parser :right + define_parser :middle + define_parser :end_p + define_parser :nothing_p + define_parser :butnot + define_parser :difference + define_parser :xor + define_parser :many + define_parser :many1 + define_parser :optional + define_parser :ignore + define_parser :sepBy + define_parser :sepBy1 + define_parser :epsilon_p + define_parser :length_value + define_parser :and + define_parser :not + define_parser :indirect + + end +end diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb new file mode 100644 index 0000000000000000000000000000000000000000..6756314aacaa7d72b35056c5f8ec2a820ccc12c8 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -0,0 +1,124 @@ +# TODO: Find a way to make docile an optional dependency +# (autoload for this file? and throw some informative error when docile isn't available. +# should also check gem version with a 'gem' call and appropriate version specifier.) +require 'docile' + +module Hammer + + class Parser + def self.build(&block) + ParserBuilder.new.sequence(&block).build + end + + def self.build_choice(&block) + ParserBuilder.new.choice(&block).build + end + end # class Parser + + class ParserBuilder + attr_reader :parsers + + def initialize + @parsers = [] + end + + def build + if @parsers.length > 1 + Hammer::Parser.sequence(*@parsers) + else + @parsers.first + end + end + + + # can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3) + # or as Parser.build { sequence { call parser1; call parser2; call parser3 } } + def sequence(*parsers, &block) + @parsers += parsers + @parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given? + return self + end + + def choice(*parsers, &block) + if block_given? + parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers + end + @parsers << Hammer::Parser.choice(*parsers) + return self + end + + def call(parser) + @parsers << parser + return self + end + + # modifies previous parser + def action(&block) + parser = @parsers.last + raise RuntimeError, 'need a parser before action' if parser.nil? + @parsers << Hammer::Parser.action(parser, &block) + return self + end + + # Defines a parser constructor with the given name. + def self.define_parser(name, options = {}) + define_method name do |*args| + # TODO: This is wrong!! Needs to accept a block for nested parsers! + @parsers << Hammer::Parser.send(name, *args) + return self + end + end + private_class_method :define_parser + + define_parser :token + define_parser :ch + define_parser :int64 + define_parser :int32 + define_parser :int16 + define_parser :int8 + define_parser :uint64 + define_parser :uint32 + define_parser :uint16 + define_parser :uint8 + define_parser :whitespace + define_parser :left + define_parser :right + define_parser :middle + define_parser :end_p + define_parser :nothing_p + define_parser :butnot + define_parser :difference + define_parser :xor + define_parser :many + define_parser :many1 + define_parser :optional + define_parser :ignore + define_parser :sepBy + define_parser :sepBy1 + define_parser :epsilon_p + define_parser :length_value + define_parser :and + define_parser :not + + # At least indirect must return the parser instead of the builder, so it can be stored in a variable. + # Other possible solution: + # Make indirect take a name parameter, and use the name to bind it later. + # Example: + # p = Hammer::Parser.build { indirect(:the_name) } + # p.bind(:the_name, inner_parser) + # (store names and parsers in hash in the builder, + # when building merge hashes from sub builders and store everything in the resulting sequence or choice. + # make Parser#bind take and optional symbol. if it is given, the name is looked up in the table.) + # TODO: + # Think about this more. + # Do we need to be able to build parsers by chaining function calls? DSL should be sufficient. + # If yes, the parser methods in this class should not return "self", but the Hammer::Parser object they create. + def indirect + parser = Hammer::Parser.indirect + @parsers << parser + return parser + end + + end # class ParserBuilder + +end # module Hammer diff --git a/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb new file mode 100644 index 0000000000000000000000000000000000000000..a23d8b9a639ef7efbd43a8ce277aeb36492d2a9e --- /dev/null +++ b/src/bindings/ruby/lib/minitest/hamer-parser_plugin.rb @@ -0,0 +1,31 @@ +module Minitest + + module Assertions + HAMMER_JUST_PARSE = Object.new + def assert_parse_ok(parser, probe, expected=HAMMER_JUST_PARSE) + refute_nil parser, "Parser must not be nil (this is a problem with your test)" + parse_result = parser.parse(probe) + refute_nil parse_result, "Parse failed" + if HAMMER_JUST_PARSE != expected + if parse_result.ast == nil + assert_nil expected, "Parser returned nil AST; expected #{expected}" + else + assert_equal parse_result.ast.unmarshal, expected + end + end + end + + def refute_parse_ok(parser, probe) + refute_nil parser, "Parser must not be nil (this is a problem with your test)" + parse_result = parser.parse(probe) + + if not parse_result.nil? + assert_nil parse_result, "Parse succeeded unexpectedly with " + parse_result.ast.inspect + end + end + end + + + #def self.plugin_hammer-parser_init(options) +end + diff --git a/src/bindings/ruby/test/autogen_test.rb b/src/bindings/ruby/test/autogen_test.rb new file mode 100644 index 0000000000000000000000000000000000000000..0600c0f66c139259f771fc2518b5af47128b08cd --- /dev/null +++ b/src/bindings/ruby/test/autogen_test.rb @@ -0,0 +1,755 @@ +require 'bundler/setup' +require 'minitest/autorun' +require 'hammer' +class TestToken < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.token("95\xa2") + end + + def test_1 + assert_parse_ok @parser_1, "95\xa2", "95\xa2" + end + + def test_2 + refute_parse_ok @parser_1, "95\xa3" + end +end + +class TestCh < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.ch(0xa2) + end + + def test_1 + assert_parse_ok @parser_1, "\xa2", 0xa2.chr + end + + def test_2 + refute_parse_ok @parser_1, "\xa3" + end +end + +class TestChRange < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.ch_range(0x61, 0x63) + end + + def test_1 + assert_parse_ok @parser_1, "b", 0x62.chr + end + + def test_2 + refute_parse_ok @parser_1, "d" + end +end + +class TestInt64 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int64 + end + + def test_1 + assert_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00\x00", -0x200000000 + end + + def test_2 + refute_parse_ok @parser_1, "\xff\xff\xff\xfe\x00\x00\x00" + end +end + +class TestInt32 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int32 + end + + def test_1 + assert_parse_ok @parser_1, "\xff\xfe\x00\x00", -0x20000 + end + + def test_2 + refute_parse_ok @parser_1, "\xff\xfe\x00" + end + + def test_3 + assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000 + end + + def test_4 + refute_parse_ok @parser_1, "\x00\x02\x00" + end +end + +class TestInt16 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int16 + end + + def test_1 + assert_parse_ok @parser_1, "\xfe\x00", -0x200 + end + + def test_2 + refute_parse_ok @parser_1, "\xfe" + end + + def test_3 + assert_parse_ok @parser_1, "\x02\x00", 0x200 + end + + def test_4 + refute_parse_ok @parser_1, "\x02" + end +end + +class TestInt8 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int8 + end + + def test_1 + assert_parse_ok @parser_1, "\x88", -0x78 + end + + def test_2 + refute_parse_ok @parser_1, "" + end +end + +class TestUint64 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint64 + end + + def test_1 + assert_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00\x00", 0x200000000 + end + + def test_2 + refute_parse_ok @parser_1, "\x00\x00\x00\x02\x00\x00\x00" + end +end + +class TestUint32 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint32 + end + + def test_1 + assert_parse_ok @parser_1, "\x00\x02\x00\x00", 0x20000 + end + + def test_2 + refute_parse_ok @parser_1, "\x00\x02\x00" + end +end + +class TestUint16 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint16 + end + + def test_1 + assert_parse_ok @parser_1, "\x02\x00", 0x200 + end + + def test_2 + refute_parse_ok @parser_1, "\x02" + end +end + +class TestUint8 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.uint8 + end + + def test_1 + assert_parse_ok @parser_1, "x", 0x78 + end + + def test_2 + refute_parse_ok @parser_1, "" + end +end + +class TestIntRange < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.int_range(h.uint8, 0x3, 0xa) + end + + def test_1 + assert_parse_ok @parser_1, "\x05", 0x5 + end + + def test_2 + refute_parse_ok @parser_1, "\x0b" + end +end + +class TestWhitespace < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.whitespace(h.ch(0x61)) + @parser_2 = h.whitespace(h.end_p) + end + + def test_1 + assert_parse_ok @parser_1, "a", 0x61.chr + end + + def test_2 + assert_parse_ok @parser_1, " a", 0x61.chr + end + + def test_3 + assert_parse_ok @parser_1, " a", 0x61.chr + end + + def test_4 + assert_parse_ok @parser_1, "\x09a", 0x61.chr + end + + def test_5 + refute_parse_ok @parser_1, "_a" + end + + def test_6 + assert_parse_ok @parser_2, "", nil + end + + def test_7 + assert_parse_ok @parser_2, " ", nil + end + + def test_8 + refute_parse_ok @parser_2, " x" + end +end + +class TestLeft < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.left(h.ch(0x61), h.ch(0x20)) + end + + def test_1 + assert_parse_ok @parser_1, "a ", 0x61.chr + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, " " + end + + def test_4 + refute_parse_ok @parser_1, "ba" + end +end + +class TestMiddle < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.middle(h.ch(0x20.chr), h.ch(0x61.chr), h.ch(0x20.chr)) + end + + def test_1 + assert_parse_ok @parser_1, " a ", 0x61.chr + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, " a" + end + + def test_4 + refute_parse_ok @parser_1, "a " + end + + def test_5 + refute_parse_ok @parser_1, " b " + end + + def test_6 + refute_parse_ok @parser_1, "ba " + end + + def test_7 + refute_parse_ok @parser_1, " ab" + end +end + +class TestIn < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.in("abc") + end + + def test_1 + assert_parse_ok @parser_1, "b", 0x62.chr + end + + def test_2 + refute_parse_ok @parser_1, "d" + end +end + +class TestNotIn < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.not_in("abc") + end + + def test_1 + assert_parse_ok @parser_1, "d", 0x64.chr + end + + def test_2 + refute_parse_ok @parser_1, "a" + end +end + +class TestEndP < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch(0x61.chr), h.end_p) + end + + def test_1 + assert_parse_ok @parser_1, "a", [0x61.chr] + end + + def test_2 + refute_parse_ok @parser_1, "aa" + end +end + +class TestNothingP < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.nothing_p + end + + def test_1 + refute_parse_ok @parser_1, "a" + end +end + +class TestSequence < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch(0x61.chr), h.ch(0x62.chr)) + @parser_2 = h.sequence(h.ch(0x61.chr), h.whitespace(h.ch(0x62.chr))) + end + + def test_1 + assert_parse_ok @parser_1, "ab", [0x61.chr, 0x62.chr] + end + + def test_2 + refute_parse_ok @parser_1, "a" + end + + def test_3 + refute_parse_ok @parser_1, "b" + end + + def test_4 + assert_parse_ok @parser_2, "ab", [0x61.chr, 0x62.chr] + end + + def test_5 + assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr] + end + + def test_6 + assert_parse_ok @parser_2, "a b", [0x61.chr, 0x62.chr] + end +end + +class TestChoice < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.choice(h.ch(0x61.chr), h.ch(0x62.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "a", 0x61.chr + end + + def test_2 + assert_parse_ok @parser_1, "b", 0x62.chr + end + + def test_3 + assert_parse_ok @parser_1, "ab", 0x61.chr + end + + def test_4 + refute_parse_ok @parser_1, "c" + end +end + +class TestButnot < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.butnot(h.ch(0x61.chr), h.token("ab")) + @parser_2 = h.butnot(h.ch_range(0x30.chr, 0x39.chr), h.ch(0x36.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "a", 0x61.chr + end + + def test_2 + refute_parse_ok @parser_1, "ab" + end + + def test_3 + assert_parse_ok @parser_1, "aa", 0x61.chr + end + + def test_4 + assert_parse_ok @parser_2, "5", 0x35.chr + end + + def test_5 + refute_parse_ok @parser_2, "6" + end +end + +class TestDifference < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.difference(h.token("ab"), h.ch(0x61.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "ab", "ab" + end + + def test_2 + refute_parse_ok @parser_1, "a" + end +end + +class TestXor < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.xor(h.ch_range(0x30.chr, 0x36.chr), h.ch_range(0x35.chr, 0x39.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "0", 0x30.chr + end + + def test_2 + assert_parse_ok @parser_1, "9", 0x39.chr + end + + def test_3 + refute_parse_ok @parser_1, "5" + end + + def test_4 + refute_parse_ok @parser_1, "a" + end +end + +class TestMany < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.many(h.choice(h.ch(0x61.chr), h.ch(0x62.chr))) + end + + def test_1 + assert_parse_ok @parser_1, "", [] + end + + def test_2 + assert_parse_ok @parser_1, "a", [0x61.chr] + end + + def test_3 + assert_parse_ok @parser_1, "b", [0x62.chr] + end + + def test_4 + assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr] + end +end + +class TestMany1 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.many1(h.choice(h.ch(0x61.chr), h.ch(0x62.chr))) + end + + def test_1 + refute_parse_ok @parser_1, "" + end + + def test_2 + assert_parse_ok @parser_1, "a", [0x61.chr] + end + + def test_3 + assert_parse_ok @parser_1, "b", [0x62.chr] + end + + def test_4 + assert_parse_ok @parser_1, "aabbaba", [0x61.chr, 0x61.chr, 0x62.chr, 0x62.chr, 0x61.chr, 0x62.chr, 0x61.chr] + end + + def test_5 + refute_parse_ok @parser_1, "daabbabadef" + end +end + +class TestRepeatN < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.repeat_n(h.choice(h.ch(0x61.chr), h.ch(0x62.chr)), 0x2) + end + + def test_1 + refute_parse_ok @parser_1, "adef" + end + + def test_2 + assert_parse_ok @parser_1, "abdef", [0x61.chr, 0x62.chr] + end + + def test_3 + refute_parse_ok @parser_1, "dabdef" + end +end + +class TestOptional < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch(0x61.chr), h.optional(h.choice(h.ch(0x62.chr), h.ch(0x63.chr))), h.ch(0x64.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "abd", [0x61.chr, 0x62.chr, 0x64.chr] + end + + def test_2 + assert_parse_ok @parser_1, "acd", [0x61.chr, 0x63.chr, 0x64.chr] + end + + def test_3 + assert_parse_ok @parser_1, "ad", [0x61.chr, nil, 0x64.chr] + end + + def test_4 + refute_parse_ok @parser_1, "aed" + end + + def test_5 + refute_parse_ok @parser_1, "ab" + end + + def test_6 + refute_parse_ok @parser_1, "ac" + end +end + +class TestIgnore < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch(0x61.chr), h.ignore(h.ch(0x62.chr)), h.ch(0x63.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "abc", [0x61.chr, 0x63.chr] + end + + def test_2 + refute_parse_ok @parser_1, "ac" + end +end + +class TestSepBy < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sepBy(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr] + end + + def test_2 + assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr] + end + + def test_3 + assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr] + end + + def test_4 + assert_parse_ok @parser_1, "3", [0x33.chr] + end + + def test_5 + assert_parse_ok @parser_1, "", [] + end +end + +class TestSepBy1 < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sepBy1(h.choice(h.ch(0x31.chr), h.ch(0x32.chr), h.ch(0x33.chr)), h.ch(0x2c.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "1,2,3", [0x31.chr, 0x32.chr, 0x33.chr] + end + + def test_2 + assert_parse_ok @parser_1, "1,3,2", [0x31.chr, 0x33.chr, 0x32.chr] + end + + def test_3 + assert_parse_ok @parser_1, "1,3", [0x31.chr, 0x33.chr] + end + + def test_4 + assert_parse_ok @parser_1, "3", [0x33.chr] + end + + def test_5 + refute_parse_ok @parser_1, "" + end +end + +class TestAnd < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x30.chr)) + @parser_2 = h.sequence(h.and(h.ch(0x30.chr)), h.ch(0x31.chr)) + @parser_3 = h.sequence(h.ch(0x31.chr), h.and(h.ch(0x32.chr))) + end + + def test_1 + assert_parse_ok @parser_1, "0", [0x30.chr] + end + + def test_2 + refute_parse_ok @parser_1, "1" + end + + def test_3 + refute_parse_ok @parser_2, "0" + end + + def test_4 + refute_parse_ok @parser_2, "1" + end + + def test_5 + assert_parse_ok @parser_3, "12", [0x31.chr] + end + + def test_6 + refute_parse_ok @parser_3, "13" + end +end + +class TestNot < Minitest::Test + def setup + super + h = Hammer::Parser + @parser_1 = h.sequence(h.ch(0x61.chr), h.choice(h.token("+"), h.token("++")), h.ch(0x62.chr)) + @parser_2 = h.sequence(h.ch(0x61.chr), h.choice(h.sequence(h.token("+"), h.not(h.ch(0x2b.chr))), h.token("++")), h.ch(0x62.chr)) + end + + def test_1 + assert_parse_ok @parser_1, "a+b", [0x61.chr, "+", 0x62.chr] + end + + def test_2 + refute_parse_ok @parser_1, "a++b" + end + + def test_3 + assert_parse_ok @parser_2, "a+b", [0x61.chr, ["+"], 0x62.chr] + end + + def test_4 + assert_parse_ok @parser_2, "a++b", [0x61.chr, "++", 0x62.chr] + end +end + +class TestRightrec < Minitest::Test + def setup + super + h = Hammer::Parser + @sp_rr = h.indirect + @sp_rr.bind h.choice(h.sequence(h.ch(0x61.chr), @sp_rr), h.epsilon_p) + @parser_1 = @sp_rr + end + + def test_1 + assert_parse_ok @parser_1, "a", [0x61.chr] + end + + def test_2 + assert_parse_ok @parser_1, "aa", [0x61.chr, [0x61.chr]] + end + + def test_3 + assert_parse_ok @parser_1, "aaa", [0x61.chr, [0x61.chr, [0x61.chr]]] + end +end + diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb new file mode 100644 index 0000000000000000000000000000000000000000..6bbdc3662553f1cf23dadc6e4953b39e28b3c182 --- /dev/null +++ b/src/bindings/ruby/test/parser_test.rb @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- +require 'bundler/setup' +require 'hammer' +require 'minitest/autorun' + +class ParserTest < Minitest::Test + def test_builder_1 + parser = Hammer::Parser.build { + token 'blah' + ch 'a'.ord + choice { + sequence { + token 'abc' + } + token 'def' + } + } + + refute_nil parser + + refute_nil parser.parse('blahaabcd') + refute_nil parser.parse('blahadefd') + assert_nil parser.parse('blahablad') + assert_nil parser.parse('blaha') + assert_nil parser.parse('blah') + end + + def test_builder_2 + parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser.token('Mom'), Hammer::Parser.token('Dad')) + .token('!') + .build + + refute_nil parser + refute_nil parser.parse('Hello Mom!') + end + + def test_builder_3 + h = Hammer::Parser + parser = h.sequence(h.token('Hello '), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) + + refute_nil parser + refute_nil parser.parse('Hello Mom!') + end + + def test_string_copied + s = 'blah' + parser = Hammer::Parser.token(s) + + refute_equal s, 'BLAH' + assert_nil parser.parse('BLAH') + + # parser still shouldn't match, even if we modify the string in-place + s.upcase! + assert_equal s, 'BLAH' + assert_nil parser.parse('BLAH') + end + + def test_indirect + x = nil + parser = Hammer::Parser.build { + token 'abc' + x = indirect + end_p + } + x.bind(Hammer::Parser.token('abd')) + + assert_nil parser.parse('abcabdabd') + refute_nil parser.parse('abcabd') + assert_nil parser.parse('abdabd') + assert_nil parser.parse('abc') + end + + def test_multibyte_token + parser = Hammer::Parser.build { + token '今日' + token 'a' + end_p + } + + refute_nil parser.parse('今日a') + end + + def test_token_encoding(encoding='UTF-8') + string = '今日'.encode(encoding) + parser = Hammer::Parser.token(string) + assert_equal string, parser.parse(string).ast.unmarshal + end + + def test_token_encoding_2 + test_token_encoding('EUC-JP') + end +end + +class AttrBoolTest < Minitest::Test + def setup + h = Hammer::Parser + @parser = h.attr_bool(h.many1(h.choice(h.ch('a'), h.ch('b')))) {|x| + data = x.unmarshal + data.length > 1 && data[0] == data[1] + } + end + + def test_1 + assert_parse_ok @parser, "aa", ['a','a'] + end + def test_2 + assert_parse_ok @parser, "bb", ['b','b'] + end + def test_3 + refute_parse_ok @parser, "ab" + end +end + +class ActionTest < Minitest::Test + def setup + h = Hammer::Parser + @parser = h.action(h.sequence(h.choice(h.ch('a'), h.ch('A')), + h.choice(h.ch('b'), h.ch('B')))) {|x| + x.unmarshal.join(",")} + end + def test_1 + assert_parse_ok @parser, "ab", "a,b" + end + def test_2 + assert_parse_ok @parser, "AB", "A,B" + end + def test_3 + refute_parse_ok @parser, "XX" + end +end