From 96dfad01789d1c780c496b66c79115e3f25adb33 Mon Sep 17 00:00:00 2001 From: Jakob Rath <git@jakobrath.eu> Date: Mon, 16 Dec 2013 17:07:50 +0100 Subject: [PATCH] h_token: Copy string correctly. See https://github.com/ffi/ffi/wiki/Core-Concepts#string-memory-allocation for additional information. --- src/bindings/ruby/lib/hammer.rb | 12 +++++++++++- src/bindings/ruby/lib/hammer/internal.rb | 6 +++--- src/bindings/ruby/lib/hammer/parser.rb | 15 ++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/bindings/ruby/lib/hammer.rb b/src/bindings/ruby/lib/hammer.rb index 0ae353c..db987b4 100644 --- a/src/bindings/ruby/lib/hammer.rb +++ b/src/bindings/ruby/lib/hammer.rb @@ -72,4 +72,14 @@ p parser.parse 'abcabd' p parser.parse 'abdabd' p parser.parse 'abd' -$r = parser.parse 'abcabd' +#$r = parser.parse 'abcabd' + + +# Test multibyte characters +parser = Hammer::Parser.build { + token '今日' + end_p +} + +p ($r = parser.parse('今日')) # should succeed + diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index badac10..8f673d4 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -70,7 +70,7 @@ module Hammer attach_function :h_parse, [:h_parser, :string, :size_t], HParseResult.auto_ptr # TODO: Use :buffer_in instead of :string? # build a parser - attach_function :h_token, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + attach_function :h_token, [:buffer_in, :size_t], :h_parser attach_function :h_ch, [:uint8], :h_parser attach_function :h_ch_range, [:uint8, :uint8], :h_parser attach_function :h_int_range, [:int64, :int64], :h_parser @@ -87,8 +87,8 @@ module Hammer attach_function :h_left, [:h_parser, :h_parser], :h_parser attach_function :h_right, [:h_parser, :h_parser], :h_parser attach_function :h_middle, [:h_parser, :h_parser, :h_parser], :h_parser - #attach_function :h_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? - #attach_function :h_not_in, [:string, :size_t], :h_parser # TODO: Use :buffer_in instead of :string? + #attach_function :h_in, [:buffer_in, :size_t], :h_parser + #attach_function :h_not_in, [:buffer_in, :size_t], :h_parser attach_function :h_end_p, [], :h_parser attach_function :h_nothing_p, [], :h_parser attach_function :h_sequence, [:varargs], :h_parser diff --git a/src/bindings/ruby/lib/hammer/parser.rb b/src/bindings/ruby/lib/hammer/parser.rb index a7e175d..8722cc6 100644 --- a/src/bindings/ruby/lib/hammer/parser.rb +++ b/src/bindings/ruby/lib/hammer/parser.rb @@ -6,7 +6,7 @@ module Hammer # # name: Name of the parser. Should be a symbol. # h_parser: The pointer to the parser as returned by hammer. - # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector. + # dont_gc: Pass additional data that's used by the parser and needs to be saved from the garbage collector (at least as long this object lives). def initialize(name, h_parser, dont_gc) @name = name @h_parser = h_parser @@ -34,13 +34,14 @@ module Hammer end def self.token(string) - # TODO: - # This might fail in JRuby. - # See "String Memory Allocation" at https://github.com/ffi/ffi/wiki/Core-Concepts - h_string = string.dup - h_parser = Hammer::Internal.h_token(h_string, h_string.length) + # Need to copy string to a memory buffer (not just string.dup) + # * Original string might be modified, this must not affect existing tokens + # * We need a constant memory address (Ruby string might be moved around by the Ruby VM) + # * Use string.length instead of h_string.size to handle multibyte characters correctly. + buffer = FFI::MemoryPointer.from_string(string) + h_parser = Hammer::Internal.h_token(buffer, string.length) - return Hammer::Parser.new(:token, h_parser, h_string) + return Hammer::Parser.new(:token, h_parser, buffer) end def self.ch(num) -- GitLab