diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 0ae353c185ce9e60d5dce76f0fc7ac0fe31a4a5d..db987b4521a0c930a0c297ca5b39db42058c2b7c 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -72,4 +72,14 @@ p parser.parse 'abcabd' p parser.parse 'abdabd' p parser.parse 'abd' -$r = parser.parse 'abcabd' +#$r = parser.parse 'abcabd' + + +# Test multibyte characters +parser = Hammer::Parser.build { + token '今日' + end_p +} + +p ($r = parser.parse('今日')) # should succeed + diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index badac10d8528ebbd9cf5af78be29722d058db6a9..8f673d4e8e5727e14ec624d3d2cff1d206062f9a 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -70,7 +70,7 @@ module Hammer attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? # build a parser - attach_function :h_token, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + attach_function :h_token, [:buffer_in, :size_t], :h_parser attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser attach_function :h_int_range, [:int64, :int64], :h_parser @@ -87,8 +87,8 @@ module Hammer attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser - #attach_function :h_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? - #attach_function :h_not_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + #attach_function :h_in, [:buffer_in, :size_t], :h_parser + #attach_function :h_not_in, [:buffer_in, :size_t], :h_parser attach_function :h_end_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser attach_function :h_sequence, [:varargs], :h_parser diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index a7e175dbccf5e2b6e26da74adf6599c48e99a2e9..8722cc6639d94420b7053e043d58ad0d06ec2fc3 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -6,7 +6,7 @@ module Hammer # # name: Name of the parser. Should be a symbol. # h_parser: The pointer to the parser as returned by hammer. - # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector. + # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives). def initialize(name, h_parser, dont_gc) @name = name @h_parser = h_parser @@ -34,13 +34,14 @@ module Hammer end def self.token(string) - # TODO: - # This might fail in JRuby. - # See "String Memory Allocation" at https://github.com/ffi/ffi/wiki/Core-Concepts - h_string = string.dup - h_parser = Hammer::Internal.h_token(h_string, h_string.length) + # Need to copy string to a memory buffer (not just string.dup) + # * Original string might be modified, this must not affect existing tokens + # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) + # * Use string.length instead of h_string.size to handle multibyte characters correctly. + buffer = FFI::MemoryPointer.from_string(string) + h_parser = Hammer::Internal.h_token(buffer, string.length) - return Hammer::Parser.new(:token, h_parser, h_string) + return Hammer::Parser.new(:token, h_parser, buffer) end def self.ch(num)