psych
psych copied to clipboard
Psych.dump fails to write UTF-8 to file
# coding: utf-8
require 'yaml'
h = {name: '李'}
YAML.dump(h, File.new('out.yml', 'w:utf-8'))
On Ruby 2.2.1, fails with: Encoding::UndefinedConversionError: "\xE6" from ASCII-8BIT to UTF-8
The script you gave me works for me. Can you give me more information?
I can reproduce with Ruby 2.1.9:
irb(main):001:0> RUBY_VERSION
=> "2.1.9"
irb(main):002:0> require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w:utf-8'))
Encoding::UndefinedConversionError: "\xE6" from ASCII-8BIT to UTF-8
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `write'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `end_document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `each'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/nodes/node.rb:48:in `yaml'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych.rb:410:in `dump'
from (irb):2
from /Users/Juan/.rubies/ruby-2.1.9/bin/irb:11:in `<main>'
And Ruby 2.2.5:
irb(main):001:0> RUBY_VERSION
=> "2.2.5"
irb(main):002:0> require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w:utf-8'))
Encoding::UndefinedConversionError: "\xE6" from ASCII-8BIT to UTF-8
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `write'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `end_document'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `each'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych/nodes/node.rb:48:in `yaml'
from /Users/Juan/.rubies/ruby-2.2.5/lib/ruby/2.2.0/psych.rb:410:in `dump'
from (irb):2
from /Users/Juan/.rubies/ruby-2.2.5/bin/irb:11:in `<main>'
🙇
@JuanitoFatas what is your "default_external" set to?
[aaron@TC ~]$ irb
irb(main):001:0> require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w:utf-8'))
=> #<File:out.yml>
irb(main):002:0> Encoding.default_external
=> #<Encoding:UTF-8>
irb(main):003:0> Encoding.default_internal
=> nil
irb(main):004:0> RUBY_VERSION
=> "2.2.4"
irb(main):005:0>
Also #<Encoding:UTF-8>
:
irb(main):001:0> Encoding.default_external
=> #<Encoding:UTF-8>
irb(main):002:0> Encoding.default_internal
=> nil
irb(main):003:0> RUBY_VERSION
=> "2.2.5"
for 2.1.9:
irb(main):001:0> Encoding.default_external
=> #<Encoding:UTF-8>
irb(main):002:0> Encoding.default_internal
=> nil
irb(main):003:0> RUBY_VERSION
=> "2.1.9"
And my ENVs:
> ENV["LANG"]
"en_US.UTF-8"
> ENV["LC_ALL"]
"en_US.UTF-8"
Some info before the failure:
def visit_Psych_Nodes_Document o
require "byebug"; debugger;
@handler.start_document o.version, o.tag_directives, o.implicit
o.children.each { |c| accept c }
@handler.end_document o.implicit_end
end
# Then I did the following:
> @handler.start_document o.version, o.tag_directives, o.implicit
#<Psych::Emitter:0x007f8a04136e48>
> o.children.each { |c| accept c }
[
#<Psych::Nodes::Mapping:0x007f8a060c81f8
@children=[
#<Psych::Nodes::Scalar:0x007f8a04137d48
@value=":name", @anchor=nil, @tag=nil,
@plain=true, @quoted=false, @style=0>,
#<Psych::Nodes::Scalar:0x007f8a04136f60
@value="李", @anchor=nil, @tag=nil,
@plain=true, @quoted=true, @style=3>
],
@anchor=nil, @tag=nil, @implicit=true, @style=1>
]
> @handler.end_document o.implicit_end
*** Encoding::UndefinedConversionError Exception: "\xE6" from ASCII-8BIT to UTF-8
nil
Possible related issue: YAML.dump fails to write a file with UTF-8 string #246
Thanks a lot, Aaron!
I tried to write a test:
def test_dump_chinese_string_to_file
Tempfile.create(['utf8', 'yml'], :encoding => 'UTF-8') do |t|
h = {'one' => '李'}
Psych.dump(h, t)
t.close
assert_equal h, Psych.load_file(t.path)
end
end
but when I run the test with 2.1.9, tests all passed:
$ ruby -v
ruby 2.1.9p490 (2016-03-30 revision 54437) [x86_64-darwin15.0]
$ bundle exec rake
install -c tmp/x86_64-darwin15.0/psych/2.1.9/psych.bundle lib/psych.bundle
cp tmp/x86_64-darwin15.0/psych/2.1.9/psych.bundle tmp/x86_64-darwin15.0/stage/lib/psych.bundle
/Users/Juan/dev/psych/test/psych/test_array.rb:19: warning: shadowing outer local variable - y
/Users/Juan/dev/psych/test/psych/test_array.rb:39: warning: shadowing outer local variable - y
/Users/Juan/dev/psych/test/psych/test_date_time.rb:8: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:36: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:43: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:50: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:57: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:65: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:75: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:87: warning: ambiguous first argument; put parentheses or even spaces
/Users/Juan/dev/psych/test/psych/test_string.rb:132: warning: shadowing outer local variable - y
/Users/Juan/dev/psych/test/psych/test_string.rb:157: warning: shadowing outer local variable - y
Run options: --seed 29669
# Running:
..................................................................../Users/Juan/dev/psych/test/psych/test_exception.rb: to_yaml_properties is deprecated, please implement "encode_with(coder)"
................................................................................................................................................................................................................................/Users/Juan/dev/psych/test/psych/test_to_yaml_properties.rb: to_yaml_properties is deprecated, please implement "encode_with(coder)"
/Users/Juan/dev/psych/test/psych/test_to_yaml_properties.rb: to_yaml_properties is deprecated, please implement "encode_with(coder)"
./Users/Juan/dev/psych/test/psych/test_to_yaml_properties.rb: to_yaml_properties is deprecated, please implement "encode_with(coder)"
../Users/Juan/dev/psych/test/psych/test_to_yaml_properties.rb: to_yaml_properties is deprecated, please implement "encode_with(coder)"
../Users/Juan/dev/psych/test/psych/test_psych.rb:102:in `test_load_documents': load_documents is deprecated, use load_stream
...................................................................................................................................../Users/Juan/dev/psych/test/psych/test_yaml.rb:515:in `test_spec_log_file': load_documents is deprecated, use load_stream
................................../Users/Juan/dev/psych/test/psych/test_yaml.rb:588:in `test_spec_oneline_docs': load_documents is deprecated, use load_stream
.................................................................................................
Finished in 0.241404s, 2323.9080 runs/s, 6279.9367 assertions/s.
561 runs, 1516 assertions, 0 failures, 0 errors, 0 skips
While trying out at console still failing:
$ irb
irb(main):001:0> require "tempfile"; require "psych";
irb(main):002:0* Tempfile.create(['utf8', 'yml'], :encoding => 'UTF-8') do |t|
irb(main):003:1* h = {'one' => '李'}
irb(main):004:1> Psych.dump(h, t)
irb(main):005:1> t.close
irb(main):006:1> Psych.load_file(t.path)
irb(main):007:1> end
Encoding::UndefinedConversionError: "\xE6" from ASCII-8BIT to UTF-8
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `write'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `end_document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `each'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/nodes/node.rb:48:in `yaml'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych.rb:410:in `dump'
from (irb):4:in `block in irb_binding'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/tempfile.rb:371:in `create'
from (irb):2
from /Users/Juan/.rubies/ruby-2.1.9/bin/irb:11:in `<main>'
this test from #246 also failed when tried at console:
irb(main):001:0> require "tempfile"; require "psych";
irb(main):002:0* Tempfile.create(['utf8', 'yml'], :encoding => 'UTF-8') do |t|
irb(main):003:1* h = {'one' => 'いち'}
irb(main):004:1> Psych.dump(h, t)
irb(main):005:1> t.close
irb(main):006:1> Psych.load_file(t.path)
irb(main):007:1> end
Encoding::UndefinedConversionError: "\xE3" from ASCII-8BIT to UTF-8
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `write'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `end_document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:27:in `visit_Psych_Nodes_Document'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `block in visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `each'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/emitter.rb:20:in `visit_Psych_Nodes_Stream'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:15:in `visit'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/visitors/visitor.rb:5:in `accept'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych/nodes/node.rb:48:in `yaml'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/psych.rb:410:in `dump'
from (irb):4:in `block in irb_binding'
from /Users/Juan/.rubies/ruby-2.1.9/lib/ruby/2.1.0/tempfile.rb:371:in `create'
from (irb):2
from /Users/Juan/.rubies/ruby-2.1.9/bin/irb:11:in `<main>'
I find out remove :utf-8
when specify mode works for 2.1.9 and 2.2.5:
--- a 2016-08-24 04:11:42.000000000 +0800
+++ b 2016-08-24 04:11:35.000000000 +0800
@@ -1 +1 @@
-require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w:utf-8'))
+require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w'))
> require 'yaml'; h = {name: '李'}; YAML.dump(h, File.new('out.yml', 'w'))
=> #<File:out.yml>