diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index cec33fc00be11ce94d1efa5fd3d645fe051f43f0..79fb52dcb21fef7919794b0ca15ff35ab67fe930 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -27,8 +27,7 @@ parser = h.many( h.action(h.uint8) { |r| #p "TT=#{r[:ast][:token_type]}, value=#{r[:ast][:data][:uint]}" - r[:ast][:data][:uint] *= 2 - r[:ast] if r[:ast][:data][:uint] % 3 == 0 + r.data * 2 }) #parser = Hammer::Parser.build { diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 62a4bc6df31445dfe335cee1fa79d006a96927ba..bac3b3edb8cdbd44732ec0f69caafebf1258e1b0 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -65,7 +65,15 @@ module Hammer @@inverse_type_map = @@known_type_map.invert - def self.new(name) + @@from_hpt = { + :none => Proc.new { nil }, + :bytes => Proc.new {|hpt| hpt[:data][:bytes].token}, + :sint => Proc.new {|hpt| hpt[:data][:sint]}, + :uint => Proc.new {|hpt| hpt[:data][:uint]}, + :sequence => Proc.new {|hpt| hpt[:data][:seq].map {|x| x.unmarshal}}, + } + + def self.new(name, &block) if name.is_a?(Symbol) name_sym = name name_str = name.to_s @@ -73,14 +81,15 @@ module Hammer name_str = name.to_s name_sym = name.to_sym end - num = h_allocate_token_type(name_str) + num = Hammer::Internal.h_allocate_token_type(name_str) @@known_type_map[name_sym] = num - @@inverse_type_map[num] = name + @@inverse_type_map[num] = name_sym + @@from_hpt[name_sym] = block end def self.from_name(name) unless @@known_type_map.key? name - num = h_get_token_type_number(name.to_s) + num = Hammer::Internal.h_get_token_type_number(name.to_s) if num <= 0 raise ArgumentError, "Unknown token type #{name}" end @@ -92,13 +101,13 @@ module Hammer def self.from_num(num) unless @@inverse_type_map.key? num - name = h_get_token_type_name(num) + name = Hammer::Internal.h_get_token_type_name(num) if name.nil? return nil end name = name.to_sym @@known_type_map[name] = num - @@inverse_type_map_type_map[num] = name + @@inverse_type_map[num] = name end return @@inverse_type_map[num] end @@ -119,10 +128,10 @@ module Hammer # Define these as soon as possible, so that they can be used # without fear elsewhere - attach_function :h_allocate_token_type, [:string], HTokenType - attach_function :h_get_token_type_number, [:string], HTokenType - attach_function :h_get_token_type_name, [HTokenType], :string - + attach_function :h_allocate_token_type, [:string], :int + attach_function :h_get_token_type_number, [:string], :int + attach_function :h_get_token_type_name, [:int], :string + class HCountedArray < FFI::Struct layout :capacity, :size_t, :used, :size_t, @@ -161,7 +170,7 @@ module Hammer # Should be the same encoding as the string the token was created with. # But how do we get to this knowledge at this point? # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). - self[:token].read_string(self[:len]).force_encoding('UTF-8') + self[:token].read_string(self[:len]) end # TODO: Probably should rename this to match ruby conventions: length, count, size @@ -170,6 +179,22 @@ module Hammer end end + class HString < FFI::Struct + layout :content, HBytes.by_ref, + :encoding, :uint64 + def token + return self[:content].token.force_encoding( + ObjectSpace._id2ref(self[:encoding])) + end + end + + HTokenType.new(:"com.upstandinghackers.hammer.ruby.encodedStr") {|hpt| + hpt.user(HString).token + } + HTokenType.new(:"com.upstandinghackers.hammer.ruby.object") {|hpt| + ObjectSpace._id2ref(hpt[:data][:uint]) + } + class HParsedTokenDataUnion < FFI::Union layout :bytes, HBytes.by_value, :sint, :int64, @@ -223,22 +248,13 @@ module Hammer self[:bit_offset] end - def unmarshal - case token_type - when :sequence - self[:data][:seq].map {|x| x.unmarshal} - when :bytes - self[:data][:bytes].token - when :uint - self[:data][:uint] - when :sint - self[:data][:sint] - when :none - nil - end + def user(struct) + struct.by_ref.from_native(self[:data][:user], nil) end - + def unmarshal + Hammer::Internal::HTokenType.class_variable_get(:@@from_hpt)[token_type].call self + end end class HParseResult < FFI::Struct @@ -257,6 +273,15 @@ module Hammer def self.release(ptr) Hammer::Internal.h_parse_result_free(ptr) unless ptr.null? end + + def arena_alloc(type) + Hammer::Internal.arena_alloc(self[:arena], type) + end + end + + def self.arena_alloc(arena, type) + ptr = h_arena_malloc(arena, type.size) + return type.by_ref.from_native(ptr, nil) end # run a parser @@ -315,6 +340,7 @@ module Hammer # TODO: Does the HParser* need to be freed? - # Token type registry + # Add the arena + attach_function :h_arena_malloc, [:pointer, :size_t], :pointer end end diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index 09f2ff4157127549b987236a49fbf8934d0c3784..cdd2c3498c38dbd26cbf42f0dd3ced3d39ba09c9 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -58,8 +58,22 @@ module Hammer action = block if action.nil? raise ArgumentError, 'no action' if action.nil? - h_parser = Hammer::Internal.h_action(parser.h_parser, action) - return Hammer::Parser.new(:action, h_parser, [parser, action]) + real_action = Proc.new {|hpr| + ret = action.call(hpr.ast) + # Pin the result + @@saved_objects.value << ret + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + unless hpr.ast.nil? + hpt[:index] = hpr[:ast][:index] + hpt[:bit_offset] = hpr[:ast][:bit_offset] + end + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.object" + hpt[:data][:uint] = ret.object_id + hpt + } + + h_parser = Hammer::Internal.h_action(parser.h_parser, real_action) + return Hammer::Parser.new(:action, h_parser, [parser, action, real_action]) end # Can pass the predicate either as a Proc in second parameter, or as block. @@ -77,8 +91,22 @@ module Hammer # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) buffer = FFI::MemoryPointer.from_string(string) h_parser = Hammer::Internal.h_token(buffer, buffer.size-1) # buffer.size includes the null byte at the end - - return Hammer::Parser.new(:token, h_parser, [buffer, string]) + encoding = string.encoding + + wrapping_action = Proc.new {|hpr| + hstr = hpr.arena_alloc(Hammer::Internal::HString) + hstr[:content] = hpr[:ast][:data][:bytes] + hstr[:encoding] = encoding.object_id + + hpt = hpr.arena_alloc(Hammer::Internal::HParsedToken) + hpt[:token_type] = :"com.upstandinghackers.hammer.ruby.encodedStr" + hpt[:data][:user] = hstr.to_ptr + hpt[:bit_offset] = hpr[:ast][:bit_offset] + hpt[:index] = hpr[:ast][:index] + hpt + } + wrapped_parser = Hammer::Internal.h_action(h_parser, wrapping_action) + return Hammer::Parser.new(:token, wrapped_parser, [buffer, string, encoding, wrapping_action, h_parser]) end def self.marshal_ch_arg(num) diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index abbd1c1e460acab22975abfabfd4f33311789a25..b9fb37f2766724d625268c17ad47095cc99a4f7c 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- require 'bundler/setup' require 'hammer' require 'minitest/autorun' @@ -84,7 +85,7 @@ class ParserTest < Minitest::Test def test_token_encoding(encoding='UTF-8') string = '今日'.encode(encoding) parser = Hammer::Parser.token(string) - assert_equal string, parser.parse(string)[:ast][:data][:bytes].token + assert_equal string, parser.parse(string).ast.unmarshal end def test_token_encoding_2