diff --git a/src/bindings/ruby/.gitignore b/src/bindings/ruby/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..0f57d62539dd624381fa38ab40fb63dc08bd9fa8 --- /dev/null +++ b/src/bindings/ruby/.gitignore @@ -0,0 +1,3 @@ +/tmp/ +/lib/hammer/hammer_ext.bundle +/Gemfile.lock diff --git a/src/bindings/ruby/Gemfile b/src/bindings/ruby/Gemfile new file mode 100644 index 0000000000000000000000000000000000000000..6f6178dcff53168c8722dc4f5c18f30f26d05f4d --- /dev/null +++ b/src/bindings/ruby/Gemfile @@ -0,0 +1,9 @@ +source 'https://rubygems.org' + +gemspec + +gem 'rake' + +group :test do + # ... +end diff --git a/src/bindings/ruby/README.md b/src/bindings/ruby/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5ed26aebf351da13b7687480d784ce2e7d6843fe --- /dev/null +++ b/src/bindings/ruby/README.md @@ -0,0 +1,72 @@ +# hammer-parser + +Ruby bindings for [hammer](https://github.com/UpstandingHackers/hammer), a parsing library. + + +## Notes + +* I called the gem `hammer-parser`, since there already is a [gem named `hammer`](https://rubygems.org/gems/hammer). + +* C extension not really needed at the moment, if we don't mind hardcoding the token types in the ruby code. + + +## Development + +1. `cd src/bindings/ruby`. + +2. Run `bundle install` to install dependencies. + +3. Run `rake compile` to compile the C extension. + +4. Run `irb -I ./lib -r hammer` to open `irb` with hammer loaded. + + +## Installation + +TODO + + + +## Examples + +### Building a parser + +```ruby +parser = Hammer::Parser.build { + token 'Hello ' + choice { + token 'Mom' + token 'Dad' + } + token '!' +} +``` + +Also possible: + +```ruby +parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .token('!') + .build +``` + +More like hammer in C: + +```ruby +h = Hammer::Parser +parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +``` + +### Parsing + +```ruby +parser.parse 'Hello Mom!' +=> true +parser.parse 'Hello Someone!' +=> false +``` + +Currently you only get `true` or `false` depending on whether the parse succeeded or failed. +There's no way to access the parsed data yet. diff --git a/src/bindings/ruby/Rakefile b/src/bindings/ruby/Rakefile new file mode 100644 index 0000000000000000000000000000000000000000..646654da332297cb1a8db844ee513da41f85ebb2 --- /dev/null +++ b/src/bindings/ruby/Rakefile @@ -0,0 +1,8 @@ +require 'rake/extensiontask' + +#spec = Gem::Specification.load('hammer-parser-ruby.gemspec') +#Rake::ExtensionTask.new('hammer_ext', spec) + +Rake::ExtensionTask.new 'hammer_ext' do |ext| + ext.lib_dir = 'lib/hammer' +end diff --git a/src/bindings/ruby/ext/hammer_ext/extconf.rb b/src/bindings/ruby/ext/hammer_ext/extconf.rb new file mode 100644 index 0000000000000000000000000000000000000000..d5158a70c8c41ed748bde99f9d9f74555e1625e8 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/extconf.rb @@ -0,0 +1,9 @@ +require 'mkmf' + +extension_name = 'hammer_ext' +dir_config extension_name + +abort 'ERROR: missing hammer library' unless have_library 'hammer' +abort 'ERROR: missing hammer.h' unless have_header 'hammer.h' + +create_makefile extension_name diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.c b/src/bindings/ruby/ext/hammer_ext/hammer_ext.c new file mode 100644 index 0000000000000000000000000000000000000000..6b461c6eda08b397c6a3082be856dc4fb6f9e1eb --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/hammer_ext.c @@ -0,0 +1,6 @@ +#include "token_type.h" + +void Init_hammer_ext(void) +{ + Init_token_type(); +} diff --git a/src/bindings/ruby/ext/hammer_ext/hammer_ext.h b/src/bindings/ruby/ext/hammer_ext/hammer_ext.h new file mode 100644 index 0000000000000000000000000000000000000000..98fc2dad30c6c8e8e204fef159fe1bc5285a3068 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/hammer_ext.h @@ -0,0 +1,6 @@ +#ifndef HAMMER_EXT__H +#define HAMMER_EXT__H + +// ... + +#endif diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.c b/src/bindings/ruby/ext/hammer_ext/token_type.c new file mode 100644 index 0000000000000000000000000000000000000000..a154d7ed8313b5b0a7aed876b319277de25438d8 --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/token_type.c @@ -0,0 +1,20 @@ +#include <ruby.h> +#include <hammer.h> + +#include "token_type.h" + +#define DefineHammerInternalConst(name) rb_define_const(mHammerInternal, #name, INT2FIX(name)); + +void Init_token_type(void) +{ + VALUE mHammer = rb_define_module("Hammer"); + VALUE mHammerInternal = rb_define_module_under(mHammer, "Internal"); + + DefineHammerInternalConst(TT_NONE); + DefineHammerInternalConst(TT_BYTES); + DefineHammerInternalConst(TT_SINT); + DefineHammerInternalConst(TT_UINT); + DefineHammerInternalConst(TT_SEQUENCE); + DefineHammerInternalConst(TT_ERR); + DefineHammerInternalConst(TT_USER); +} diff --git a/src/bindings/ruby/ext/hammer_ext/token_type.h b/src/bindings/ruby/ext/hammer_ext/token_type.h new file mode 100644 index 0000000000000000000000000000000000000000..5652ce6b5fd2a5e295c6da137013a649d6ac59fd --- /dev/null +++ b/src/bindings/ruby/ext/hammer_ext/token_type.h @@ -0,0 +1,6 @@ +#ifndef HAMMER_EXT_TOKEN_TYPE__H +#define HAMMER_EXT_TOKEN_TYPE__H + +void Init_token_type(void); + +#endif diff --git a/src/bindings/ruby/hammer-parser.gemspec b/src/bindings/ruby/hammer-parser.gemspec new file mode 100644 index 0000000000000000000000000000000000000000..80b7529065fa99fc4b8e192c055c05d29662583c --- /dev/null +++ b/src/bindings/ruby/hammer-parser.gemspec @@ -0,0 +1,23 @@ +#encoding: UTF-8 +Gem::Specification.new do |s| + s.name = 'hammer-parser' + s.version = '0.1.0' + s.summary = 'Ruby bindings to the hammer parsing library.' + s.description = s.summary # TODO: longer description? + s.authors = ['Meredith L. Patterson', 'TQ Hirsch', 'Jakob Rath'] + # TODO: + # s.email = ... + # s.homepage = ... + + files = [] + files << 'README.md' + files << Dir['{lib,test}/**/*.rb'] + s.files = files + s.test_files = s.files.select { |path| path =~ /^test\/.*_test.rb/ } + + s.require_paths = %w[lib] + + s.add_dependency 'ffi', '~> 1.9' + s.add_dependency 'docile', '~> 1.1' # TODO: Find a way to make this optional +end + diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb new file mode 100644 index 0000000000000000000000000000000000000000..2699d96f982a5f1e3dccf078d0567963f3e24ce7 --- /dev/null +++ b/src/bindings/ruby/lib/hammer.rb @@ -0,0 +1,65 @@ +require 'hammer/hammer_ext' +require 'hammer/internal' +require 'hammer/parser' +require 'hammer/parser_builder' + +# TODO: +# Probably need to rename this file to 'hammer-parser.rb', so +# people can use "require 'hammer-parser'" in their code. + + + +# TODO: Put tests in test/ directory. + +parser = Hammer::Parser.build do + token 'blah' + ch 'a' + choice { + sequence { + token 'abc' + } + token 'def' + } +end + +p parser + +if parser + p parser.parse 'blahaabcd' + p parser.parse 'blahadefd' + p parser.parse 'blahablad' + p parser.parse 'blaha' + p parser.parse 'blah' +end + +parser = Hammer::Parser::Sequence.new( + Hammer::Parser::Token.new('Hello '), + Hammer::Parser::Choice.new( + Hammer::Parser::Token.new('Mom'), + Hammer::Parser::Token.new('Dad') + ), + Hammer::Parser::Token.new('!') +) +p parser.parse 'Hello Mom!' + +parser = Hammer::Parser.build { + token 'Hello ' + choice { + token 'Mom' + token 'Dad' + } + token '!' +} +p parser.parse 'Hello Mom!' + +parser = Hammer::ParserBuilder.new + .token('Hello ') + .choice(Hammer::Parser::Token.new('Mom'), Hammer::Parser::Token.new('Dad')) + .token('!') + .build +p parser.parse 'Hello Mom!' + +# not yet working +#h = Hammer::Parser +#parser = h.sequence(h.token('Hello'), h.choice(h.token('Mom'), h.token('Dad')), h.token('!')) +#p parser.parse 'Hello Mom!' diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb new file mode 100644 index 0000000000000000000000000000000000000000..0083ebd916a6012125286b3221369665589abea8 --- /dev/null +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -0,0 +1,60 @@ +require 'ffi' + +module Hammer + module Internal + extend FFI::Library + + ffi_lib 'libhammer.dylib' + + # run a parser + attach_function :h_parse, [:pointer, :string, :size_t], :pointer + + # build a parser + attach_function :h_token, [:string, :size_t], :pointer + attach_function :h_ch, [:uint8], :pointer + attach_function :h_ch_range, [:uint8, :uint8], :pointer + attach_function :h_int_range, [:int64, :int64], :pointer + attach_function :h_bits, [:size_t, :bool], :pointer + attach_function :h_int64, [], :pointer + attach_function :h_int32, [], :pointer + attach_function :h_int16, [], :pointer + attach_function :h_int8, [], :pointer + attach_function :h_uint64, [], :pointer + attach_function :h_uint32, [], :pointer + attach_function :h_uint16, [], :pointer + attach_function :h_uint8, [], :pointer + attach_function :h_whitespace, [:pointer], :pointer + attach_function :h_left, [:pointer, :pointer], :pointer + attach_function :h_right, [:pointer, :pointer], :pointer + attach_function :h_middle, [:pointer, :pointer, :pointer], :pointer + # h_action + # h_in + # h_not_in + attach_function :h_end_p, [], :pointer + attach_function :h_nothing_p, [], :pointer + attach_function :h_sequence, [:varargs], :pointer + attach_function :h_choice, [:varargs], :pointer + attach_function :h_butnot, [:pointer, :pointer], :pointer + attach_function :h_difference, [:pointer, :pointer], :pointer + attach_function :h_xor, [:pointer, :pointer], :pointer + attach_function :h_many, [:pointer], :pointer + attach_function :h_many1, [:pointer], :pointer + # h_repeat_n + # h_optional + # h_ignore + # h_sepBy + # h_sepBy1 + # h_epsilon_p + # h_length_value + # h_attr_bool + # h_and + # h_not + # h_indirect + # h_bind_indirect + + # free the parse result + # h_parse_result_free + + # TODO: Does the HParser* need to be freed? + end +end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb new file mode 100644 index 0000000000000000000000000000000000000000..a7b75e273a820963ce0132270b8ad049f67f2e4d --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -0,0 +1,146 @@ +module Hammer + class Parser + + # Don't create new instances with Hammer::Parser.new, + # use the constructor methods instead (i.e. Hammer::Parser.int64 etc.) + def initialize + end + + def parse(data) + raise RuntimeError, '@h_parser is nil' if @h_parser.nil? + raise ArgumentError, 'expecting a String' unless data.is_a? String # TODO: Not needed, FFI checks that. + result = Hammer::Internal.h_parse(@h_parser, data, data.length); + # TODO: Do something with the data + !result.null? + end + + class Token < Parser + def initialize(string) + @h_parser = Hammer::Internal.h_token(string, string.length) + end + end + + class Ch < Parser + def initialize(char) + # TODO: Really? Should probably accept Fixnum in appropriate range + # Also, char.ord gives unexptected results if you pass e.g. Japanese characters: '今'.ord == 20170; Hammer::Parser::Ch.new('今').parse(202.chr) == true + # Not really unexpected though, since 20170 & 255 == 202. + # But probably it's better to use Ch for Fixnum in 0..255 only, and only Token for strings. + raise ArgumentError, 'expecting a one-character String' unless char.is_a?(String) && char.length == 1 + @h_parser = Hammer::Internal.h_ch(char.ord) + end + end + + class Sequence < Parser + def initialize(*parsers) + #args = [] + #parsers.each { |p| args += [:pointer, p.h_parser] } + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + @h_parser = Hammer::Internal.h_sequence(*args, :pointer, nil) + @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers + end + end + + class Choice < Parser + def initialize(*parsers) + #args = [] + #parsers.each { |p| args += [:pointer, p.h_parser] } + args = parsers.flat_map { |p| [:pointer, p.h_parser] } + @h_parser = Hammer::Internal.h_choice(*args, :pointer, nil) + @sub_parsers = parsers # store them so they don't get garbage-collected (probably not needed, though) + # TODO: Use (managed?) FFI struct instead of void pointers + end + end + + # Define parsers that take some number of other parsers + # TODO: Maybe use -1 for variable number, and use this for Sequence and Choice too + # TODO: Refactor this code as a method? And call it like: define_parser :Int64, :h_int64, 0 + [ + [:Int64, :h_int64, 0], + [:Int32, :h_int32, 0], + [:Int16, :h_int16, 0], + [:Int8, :h_int8, 0], + [:UInt64, :h_uint64, 0], + [:UInt32, :h_uint32, 0], + [:UInt16, :h_uint16, 0], + [:UInt8, :h_uint8, 0], + [:Whitespace, :h_whitespace, 1], + [:Left, :h_left, 2], + [:Right, :h_right, 2], + [:Middle, :h_middle, 3], + [:End, :h_end_p, 0], + [:Nothing, :h_nothing_p, 0], + [:ButNot, :h_butnot, 2], + [:Difference, :h_difference, 2], + [:Xor, :h_xor, 2], + [:Many, :h_many, 1], + [:Many1, :h_many1, 1] + ].each do |class_name, h_function_name, parameter_count| + # Create new subclass of Hammer::Parser + klass = Class.new(Hammer::Parser) do + # Need to use define_method instead of def to be able to access h_function_name in the method's body + define_method :initialize do |*parsers| + # Checking parameter_count is not really needed, since the h_* methods will complain anyways + @h_parser = Hammer::Internal.send(h_function_name, *parsers.map(&:h_parser)) + # TODO: Do we need to store sub-parsers to prevent them from getting garbage-collected? + end + end + # Register class with name Hammer::Parser::ClassName + Hammer::Parser.const_set class_name, klass + end + + # TODO: + # Hammer::Parser::Token.new('...') is a bit too long. Find a shorter way to use the parsers. + # Maybe: + # class Hammer::Parser + # def self.token(*args) + # Hammer::Parser::Token.new(*args) + # end + # end + # Can create functions like that automatically. Usage: + # h = Hammer::Parser + # parser = h.sequence(h.token('blah'), h.token('other_token')) + # Looks almost like hammer in C! + + # Defines a parser constructor with the given name. + # Options: + # hammer_function: name of the hammer function to call (default: 'h_'+name) + def self.define_parser(name, options = {}) + hammer_function = options[:hammer_function] || ('h_' + name.to_s) + + # Define a new class method + define_singleton_method name do |*parsers| + #args = parsers.map { |p| p.instance_variable_get :@h_parser } + h_parser = Hammer::Internal.send hammer_function, *parsers.map(&:h_parser) + + parser = Hammer::Parser.new + parser.instance_variable_set :@h_parser, h_parser + return parser + end + end + private_class_method :define_parser + + define_parser :int64 + define_parser :int32 + define_parser :int16 + define_parser :int8 + define_parser :uint64 + define_parser :uint32 + define_parser :uint16 + define_parser :uint8 + define_parser :whitespace + define_parser :left + define_parser :right + define_parser :middle + define_parser :end + define_parser :nothing + define_parser :butnot + define_parser :difference + define_parser :xor + define_parser :many + define_parser :many1 + + attr_reader :h_parser + end +end diff --git a/src/bindings/ruby/lib/hammer/parser_builder.rb b/src/bindings/ruby/lib/hammer/parser_builder.rb new file mode 100644 index 0000000000000000000000000000000000000000..2f36c844dea5ed1ebc36a89d9cee06ff56a598af --- /dev/null +++ b/src/bindings/ruby/lib/hammer/parser_builder.rb @@ -0,0 +1,75 @@ +# TODO: Find a way to make docile an optional dependency +# (autoload for this file? and throw some informative error when docile isn't available. +# should also check gem version with a 'gem' call and appropriate version specifier.) +require 'docile' + +module Hammer + + class Parser + def self.build(&block) + ParserBuilder.new.sequence(&block).build + end + end + + # TODO: Is this even useful for "real" usage? + class ParserBuilder + attr_reader :parsers + + def initialize + @parsers = [] + # TODO: Store an aggregator, e.g.: + # @aggregator = Hammer::Parser::Sequence + # Sequence is the default, set to Hammer::Parser::Choice for choice() calls + # In the build method, use @aggregator.new(*@parsers) to build the final parser. + end + + def build + if @parsers.length > 1 + Hammer::Parser::Sequence.new(*@parsers) + else + @parsers.first + end + end + + + # TODO: Need to check if that's really needed + def call(parser) + @parsers << parser + return self + end + + + def token(str) + #@h_parsers << Hammer::Internal.h_token(str, str.length) + @parsers << Hammer::Parser::Token.new(str) + return self + end + + def ch(char) + #@h_parsers << Hammer::Internal.h_ch(char.ord) + @parsers << Hammer::Parser::Ch.new(char) + return self + end + + # can call it either as ParserBuiler.new.sequence(parser1, parser2, parser3) + # or as Parser.build { sequence { call parser1; call parser2; call parser3 } } + def sequence(*parsers, &block) + @parsers += parsers + @parsers << Docile.dsl_eval(ParserBuilder.new, &block).build if block_given? + return self + #builder = Hammer::ParserBuilder.new + #builder.instance_eval &block + #@parsers << Hammer::Parser::Sequence.new(*builder.parsers) + ## TODO: Save original receiver and redirect missing methods! + end + + def choice(*parsers, &block) + if block_given? + parsers += Docile.dsl_eval(ParserBuilder.new, &block).parsers + end + @parsers << Hammer::Parser::Choice.new(*parsers) + return self + end + end + +end