From 8c653b519e4874e3b7868f91dbaf4c4ef3eb362a Mon Sep 17 00:00:00 2001 From: Jakob Rath <git@jakobrath.eu> Date: Mon, 16 Dec 2013 21:28:23 +0100 Subject: [PATCH] Add tests about token encoding (failing for now). --- src/bindings/ruby/lib/hammer/internal.rb | 7 +++++-- src/bindings/ruby/test/parser_test.rb | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/bindings/ruby/lib/hammer/internal.rb b/src/bindings/ruby/lib/hammer/internal.rb index 0c462fe2..12d797f5 100644 --- a/src/bindings/ruby/lib/hammer/internal.rb +++ b/src/bindings/ruby/lib/hammer/internal.rb @@ -39,8 +39,11 @@ module Hammer :len, :size_t def token - # TODO: Encoding? Should probably be the same encoding as the string the token was created with. - return self[:token].read_string(self[:len]) #.force_encoding('UTF-8') + # TODO: Encoding? + # Should be the same encoding as the string the token was created with. + # But how do we get to this knowledge at this point? + # Cheap solution: Just ask the user (additional parameter with default value of UTF-8). + return self[:token].read_string(self[:len]).force_encoding('UTF-8') end end diff --git a/src/bindings/ruby/test/parser_test.rb b/src/bindings/ruby/test/parser_test.rb index f5f12f2d..abbd1c1e 100644 --- a/src/bindings/ruby/test/parser_test.rb +++ b/src/bindings/ruby/test/parser_test.rb @@ -80,4 +80,14 @@ class ParserTest < Minitest::Test refute_nil parser.parse('今日a') end + + def test_token_encoding(encoding='UTF-8') + string = '今日'.encode(encoding) + parser = Hammer::Parser.token(string) + assert_equal string, parser.parse(string)[:ast][:data][:bytes].token + end + + def test_token_encoding_2 + test_token_encoding('EUC-JP') + end end -- GitLab