From 4949be00ae0b63c2cc1578063e51a0285e7b8c4d Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Wed, 19 Nov 2014 22:51:11 -0600 Subject: [PATCH 01/14] Test EncodedString for more undefined conversion / invalid byte sequence - Add tests for EncodedString#to_s, #split, #<< - For each Encoding failure - assert the expected failure is raised o a String, but not on an 'EncodedString' - assert invalid bytes or unconvertale characters are replaced Currently one test is failing (pending) - EncodedString#split when the string has an invalid byte sequence incorrectly raises an ArgumentError And I am unable to reproduce an Encoding::Compatibility error on the EncodedString object in Ruby > 1.9.2 --- lib/rspec/support/differ.rb | 1 + lib/rspec/support/encoded_string.rb | 19 ++- spec/rspec/support/encoded_string_spec.rb | 178 +++++++++++++++++++--- 3 files changed, 173 insertions(+), 25 deletions(-) diff --git a/lib/rspec/support/differ.rb b/lib/rspec/support/differ.rb index d59890a15..2f99dd601 100644 --- a/lib/rspec/support/differ.rb +++ b/lib/rspec/support/differ.rb @@ -190,6 +190,7 @@ def object_to_string(object) end if String.method_defined?(:encoding) + # see https://github.com/ruby/ruby/blob/ca24e581ba/encoding.c#L1191 def pick_encoding(source_a, source_b) Encoding.compatible?(source_a, source_b) || Encoding.default_external end diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 4fa90fdd4..3783fc0b2 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -2,7 +2,10 @@ module RSpec module Support # @private class EncodedString + # Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms + # else is '?' ("\x3F") MRI_UNICODE_UNKOWN_CHARACTER = "\xEF\xBF\xBD" + REPLACE = "\x3F" def initialize(string, encoding=nil) @encoding = encoding @@ -33,6 +36,20 @@ def to_s private + # Raised by Encoding and String methods: + # Encoding::UndefinedConversionError: + # when a transcoding operation fails + # e.g. "\x80".encode('utf-8','ASCII-8BIT') + # Encoding::InvalidByteSequenceError: + # when the string being transcoded contains a byte invalid for the either + # the source or target encoding + # e.g. "\x80".encode('utf-8','US-ASCII') + # Raised by transcoding methods: + # Encoding::ConverterNotFoundError: + # when a named encoding does not correspond with a known converter + # e.g. 'abc'.force_encoding('utf-8').encode('foo') + # Encoding::CompatibilityError + # def matching_encoding(string) string.encode(@encoding) rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError @@ -43,7 +60,7 @@ def matching_encoding(string) def normalize_missing(string) if @encoding.to_s == "UTF-8" - string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), "?") + string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), REPLACE) else string end diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 4df40a408..2ea256d4d 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -1,14 +1,15 @@ +# encoding: utf-8 require 'spec_helper' require 'rspec/support/encoded_string' module RSpec::Support describe EncodedString do - let(:target_encoding) { 'UTF-8' } + let(:utf8_encoding) { 'UTF-8' } delegated_methods = String.instance_methods.map(&:to_s) & %w[eql? lines == encoding empty?] delegated_methods.each do |delegated_method| it "responds to #{delegated_method}" do - encoded_string = EncodedString.new("abc", target_encoding) + encoded_string = EncodedString.new("abc", utf8_encoding) expect(encoded_string).to respond_to(delegated_method) end end @@ -17,35 +18,118 @@ module RSpec::Support describe '#source_encoding' do it 'knows the original encoding of the string' do str = EncodedString.new("abc".encode('ASCII-8BIT'), "UTF-8") - expect( str.source_encoding.to_s ).to eq('ASCII-8BIT') + expect(str.source_encoding.to_s).to eq('ASCII-8BIT') end end - let(:ascii_arrow_symbol) { "\xAE" } + describe '#to_s' do + context 'when encoding a string with invalid bytes in the target encoding' do + # see https://github.com/jruby/jruby/blob/c1be61a501/test/mri/ruby/test_transcode.rb#L13 + let(:source_encoding) { Encoding.find('US-ASCII') } + let(:target_encoding) { Encoding.find('UTF-8') } + let(:string) { "I have a bad byté\x80".force_encoding(source_encoding) } + + it 'replaces invalid byte sequences with the REPLACE string' do + resulting_string = build_encoded_string(string, target_encoding).to_s + expected_string = "I have a bad byt\x3F\x3F\x3F" + expect(resulting_string).to eq(expected_string) + end + + it 'normally raises an EncodedString::InvalidByteSequenceError' do + expect { + string.encode(target_encoding) + }.to raise_error(Encoding::InvalidByteSequenceError) + end + end + + context 'when no converter is known for an encoding' do + let(:source_encoding) { Encoding.find('UTF-16LE') } + if RUBY_VERSION == '1.9.2' && Ruby.mri? + let(:no_converter_encoding) { Encoding.find('IBM737') } + else + let(:no_converter_encoding) { 'not_a_known_encoding' } + end + let(:string) { "I am not going to changé\xEF".force_encoding(source_encoding) } + + it 'normally raises an Encoding::ConverterNotFoundError' do + expect { + string.encode(no_converter_encoding) + }.to raise_error(Encoding::ConverterNotFoundError) + end + + it 'forces the new encoding' do + pending 'cannot reproduce' unless RUBY_VERSION == '1.9.2' && Ruby.mri? + resulting_string = build_encoded_string(string, no_converter_encoding).to_s + expected_string = "I am not going to changé\xEF".force_encoding(no_converter_encoding) + expect(resulting_string).to eq(expected_string) + end + end + + # see https://github.com/ruby/ruby/blob/34fbf57aaa/transcode.c#L4289 + # ISO-8859-1 -> UTF-8 -> EUC-JP + # "\xa0" NO-BREAK SPACE, which is available in UTF-8 but not in EUC-JP + context 'when there is an undefined conversion to the target encoding' do + let(:source_encoding) { Encoding.find('ISO-8859-1') } + let(:incompatible_encoding) { Encoding.find('EUC-JP') } + let(:string) { "\xa0 hi I am not going to work".force_encoding(source_encoding) } + + it 'normally raises an Encoding::UndefinedConversionError' do + expect { + string.encode(incompatible_encoding) + }.to raise_error(Encoding::UndefinedConversionError) + end + + it 'replaces all undefines conversions with the REPLACE string' do + resulting_string = build_encoded_string(string, incompatible_encoding).to_s + expected_string = "? hi I am not going to work" + expect(resulting_string).to eq(expected_string) + end + end + end + let(:ascii_arrow_symbol) { "\xAE" } let(:utf_8_euro_symbol) { "\xE2\x82\xAC" } describe '#<<' do context 'with strings that can be converted to the target encoding' do it 'encodes and appends the string' do - valid_ascii_string = "abc".force_encoding("ASCII-8BIT") + valid_ascii_string = "abcdé".force_encoding("ASCII-8BIT") valid_unicode_string = utf_8_euro_symbol.force_encoding('UTF-8') - resulting_string = build_encoded_string(valid_unicode_string, target_encoding) << valid_ascii_string - expect(resulting_string).to eq "#{utf_8_euro_symbol}abc".force_encoding('UTF-8') + resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << valid_ascii_string + expected_string = "#{utf_8_euro_symbol}abcd??".force_encoding('UTF-8') + expect(resulting_string).to eq(expected_string) + end + + it 'copes with encoded strings' do + source_encoding = Encoding.find('UTF-16LE') + accentless = build_encoded_string("Tu avec carte {count} item has\n", source_encoding) + accented = "Tu avec carté {count} itém has\n".encode(source_encoding) + resulting_string = accentless << accented + expected_string = <<-EOS.encode('UTF-16LE') +Tu avec carte {count} item has +Tu avec cart\u00E9 {count} it\u00E9m has + EOS + expect(resulting_string).to eq(expected_string) end end context 'with a string that cannot be converted to the target encoding' do - it 'replaces undefined characters with either a ? or a unicode ?' do - ascii_string = ascii_arrow_symbol.force_encoding("ASCII-8BIT") - valid_unicode_string = utf_8_euro_symbol.force_encoding('UTF-8') - - resulting_string = build_encoded_string(valid_unicode_string, target_encoding) << ascii_string - expected_bytes = utf_8_euro_symbol.each_byte.to_a + ["?".unpack("c").first] - actual_bytes = resulting_string.each_byte.to_a - - expect(actual_bytes).to eq(expected_bytes) + context 'when appending a string with an incompatible character encoding' do + let(:ascii_string) { ascii_arrow_symbol.force_encoding("ASCII-8BIT") } + let(:valid_unicode_string) { utf_8_euro_symbol.force_encoding('UTF-8') } + + it "normally raises an Encoding::CompatibilityError" do + expect { + valid_unicode_string.encode(utf8_encoding) << ascii_string + }.to raise_error(Encoding::CompatibilityError) + end + + it 'replaces unconvertable characters with a string representation of their hex value' do + resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << ascii_string + expected_string = "#{utf_8_euro_symbol}?" + expect(resulting_string).to eq(expected_string) + end end end @@ -54,19 +138,65 @@ module RSpec::Support ascii_string = 'abc'.force_encoding("ASCII-8BIT") other_ascii_string = '123'.force_encoding("ASCII-8BIT") - resulting_string = build_encoded_string(ascii_string, target_encoding) << other_ascii_string - expect(resulting_string.encoding.to_s).to eq 'UTF-8' + resulting_string = build_encoded_string(ascii_string, utf8_encoding) << other_ascii_string + expect(resulting_string.encoding.to_s).to eq('UTF-8') end end end describe '#split' do - it 'splits the string based on the delimiter accounting for encoding' do - wrapped_string = "aaaaaaaaaaa#{ascii_arrow_symbol}aaaaa".force_encoding("ASCII-8BIT") + context 'when the string has an invalid byte sequence' do + let(:message_with_invalid_byte_sequence) { "\xEF \255 \xAD I have bad bytes".force_encoding(utf8_encoding) } + + it 'normally raises an ArgumentError' do + expect { + message_with_invalid_byte_sequence.split("\n") + }.to raise_error(ArgumentError) + end + + it 'replaces invalid bytes with the REPLACE string' do + pending 'but is currently failing' + resulting_array = build_encoded_string(message_with_invalid_byte_sequence, utf8_encoding).split("\n") + expected_array = ["? ? ? I have bad bytes"] + expect(resulting_array).to eq(expected_array) + end + + end - expect { - build_encoded_string(wrapped_string, target_encoding).split(utf_8_euro_symbol.force_encoding("UTF-8")) - }.not_to raise_error + context 'when there is an undefined conversion to the target encoding' do + let(:wrapped_string) { "aaaaaaaaaaa#{ascii_arrow_symbol}aaaaa".force_encoding("ASCII-8BIT") } + + it 'normally raises an Encoding::UndefinedConversionError' do + expect { + wrapped_string.encode(utf8_encoding).split(utf_8_euro_symbol) + }.to raise_error(Encoding::UndefinedConversionError) + end + + it 'splits the string based on the delimiter accounting for encoding' do + expect { + build_encoded_string(wrapped_string, utf8_encoding).split(utf_8_euro_symbol.force_encoding("UTF-8")) + }.not_to raise_error + end + end + + # see https://github.com/rspec/rspec-expectations/blob/f8a1232/spec/rspec/expectations/fail_with_spec.rb#L50 + # https://github.com/rspec/rspec-expectations/issues/201 + # https://github.com/rspec/rspec-expectations/pull/220 + context 'when splitting a string that is not US-ASCII compatible' do + let(:non_ascii_compatible_string) { "This is a pile of poo: 💩".encode("UTF-16LE") } + + it 'normally raises an Encoding::CompatibilityError' do + expect { + non_ascii_compatible_string.split("\n") + }.to raise_error(Encoding::CompatibilityError) + end + + it 'falls back to the Encoding.default_external' do + resulting_array = build_encoded_string(non_ascii_compatible_string, utf8_encoding).split("\n") + expected_array = ["This is a pile of poo: \u{1F4A9}"] + expect(resulting_array).to eq(expected_array) + expect(resulting_array.first.encoding.to_s).to eq(Encoding.default_external.to_s) + end end end @@ -78,7 +208,7 @@ def build_encoded_string(string, target_encoding) describe '#source_encoding' do it 'defaults to US-ASCII' do str = EncodedString.new("abc", "UTF-8") - expect( str.source_encoding ).to eq('US-ASCII') + expect(str.source_encoding).to eq('US-ASCII') end end end From 2615f3d1be63e128a3a2f962cd18966a101fd5df Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 03:54:27 -0600 Subject: [PATCH 02/14] Differ tests no longer use Differ to report diff expectation For example, previously we would write our differ_spec expectations as actual = differ.diff(str1, str2) expect(actual).to eq(expected) Which had the suprising (but expected) result of failure diffing changing the string actual = differ.diff(str1, str2) actual != expected && RSpec::Expectations.fail_with(message, actual, expected) RSpec::Expectations.fail_with calls differ.diff(actual, expected) where differ is **RSpec::Support::Differ.new**( :object_preparer => lambda { |object| RSpec::Matchers::Composable.surface_descriptions_in(object) }, :color => RSpec::Matchers.configuration.color? ) So, all these differ_spec tests where we say expect(str1).to eq(str2) will run through the differ twice on failure. We should be testing them as expect(str1 == str2).to eq(true) --- lib/rspec/support/differ.rb | 12 ++- spec/rspec/support/differ_spec.rb | 171 +++++++++++++++++++++++------- 2 files changed, 140 insertions(+), 43 deletions(-) diff --git a/lib/rspec/support/differ.rb b/lib/rspec/support/differ.rb index 2f99dd601..c6f5cadaf 100644 --- a/lib/rspec/support/differ.rb +++ b/lib/rspec/support/differ.rb @@ -7,8 +7,14 @@ module RSpec module Support # rubocop:disable ClassLength class Differ + if String.method_defined?(:encoding) + EMPTY_DIFF = EncodedString.new("", Encoding.default_external) + else + EMPTY_DIFF = EncodedString.new("") + end + def diff(actual, expected) - diff = "" + diff = EMPTY_DIFF.dup if actual && expected if all_strings?(actual, expected) @@ -26,11 +32,9 @@ def diff(actual, expected) # rubocop:disable MethodLength def diff_as_string(actual, expected) @encoding = pick_encoding actual, expected - @actual = EncodedString.new(actual, @encoding) @expected = EncodedString.new(expected, @encoding) - - output = EncodedString.new("\n", @encoding) + output = EncodedString.new("\n", @encoding) hunks.each_cons(2) do |prev_hunk, current_hunk| begin diff --git a/spec/rspec/support/differ_spec.rb b/spec/rspec/support/differ_spec.rb index abd927a3c..68a876632 100644 --- a/spec/rspec/support/differ_spec.rb +++ b/spec/rspec/support/differ_spec.rb @@ -7,6 +7,49 @@ module RSpec module Support describe Differ do + def safe_chr + @safe_chr ||= Hash.new { |h, x| h[x] = x.chr rescue ("U+%.4X" % [x]) } + end + + def safe_codepoints(str) + str.each_codepoint.map {|codepoint| safe_chr[codepoint] } + rescue ArgumentError + str.each_byte.map {|byte| safe_chr[byte] } + end + + def expect_identical_string(str1, str2, expected_encoding = str1.encoding) + expect(str1.encoding).to eq(expected_encoding) + str1_bytes = safe_codepoints(str1) + str2_bytes = safe_codepoints(str2) + return unless str1_bytes != str2_bytes + str1_differences = [] + str2_differences = [] + str2_bytes.each_with_index do |str2_byte, index| + str1_byte = str1_bytes.fetch(index) do + str2_differences.concat str2_bytes[index..-1] + return + end + if str1_byte != str2_byte + str1_differences << str1_byte + str2_differences << str2_byte + end + end + expect(str1_differences.join).to eq(str2_differences.join) + end + + describe '#pick_encoding' do + let(:differ) { RSpec::Support::Differ.new } + if String.method_defined?(:encoding) + it "picks and encoding" do + + str1 = "\xa1".force_encoding("iso-8859-1") + str2 = "\xa1\xa1".force_encoding("euc-jp") + expect(Encoding.compatible?(str1, str2)).to be_nil + expect(differ.send(:pick_encoding, str1, str2)).to eq(Encoding.default_external) + end + end + end + describe '#diff' do let(:differ) { RSpec::Support::Differ.new } @@ -34,50 +77,87 @@ module Support EOD diff = differ.diff(actual, expected) - expect(diff).to eql(expected_diff) + expect_identical_string(diff, expected_diff) end if String.method_defined?(:encoding) it "returns an empty string if strings are not multiline" do - expected = "Tu avec carte {count} item has".encode('UTF-16LE') - actual = "Tu avec carté {count} itém has".encode('UTF-16LE') + expected = "It has trouble when {string} has an e".encode('UTF-16LE') + actual = "It has trouble when {string} has an é".encode('UTF-16LE') expect(differ.diff(actual, expected)).to be_empty end it 'copes with encoded strings' do - expected = "Tu avec carte {count} item has\n".encode('UTF-16LE') - actual = "Tu avec carté {count} itém has\n".encode('UTF-16LE') - expect(differ.diff(actual, expected)).to eql(<<-EOD.encode('UTF-16LE')) + expected = "It has trouble when {string} has an e".encode('UTF-16LE') + actual = "It has trouble when {string} has an é".encode('UTF-16LE') + + diff = differ.diff(actual, expected) + expected_diff = <<-EOD.encode('UTF-16LE') @@ -1,2 +1,2 @@ --Tu avec carte {count} item has -+Tu avec carté {count} itém has -EOD +-It has trouble when {string} has an e ++It has trouble when {string} has an é + EOD + expect_identical_string(diff, expected_diff) end it 'handles differently encoded strings that are compatible' do expected = "abc\n".encode('us-ascii') actual = "강인철\n".encode('UTF-8') - expect(differ.diff(actual, expected)).to eql "\n@@ -1,2 +1,2 @@\n-abc\n+강인철\n" + + diff = differ.diff(actual, expected) + expected_diff = "\n@@ -1,2 +1,2 @@\n-abc\n+강인철\n" + expect_identical_string(diff, expected_diff) end - it 'uses the default external encoding when the two strings have incompatible encodings', :failing_on_appveyor do - expected = "Tu avec carte {count} item has\n" - actual = "Tu avec carté {count} itém has\n".encode('UTF-16LE') - expect(differ.diff(actual, expected)).to eq("\n@@ -1,2 +1,2 @@\n-Tu avec carte {count} item has\n+Tu avec carté {count} itém has\n") - expect(differ.diff(actual, expected).encoding).to eq(Encoding.default_external) + it 'uses the default external encoding when the two strings have incompatible encodings' do + source_encoding = Encoding.find('iso-8859-1') + target_encoding = Encoding.find('euc-jp') + expected = "This is #{source_encoding.name}".force_encoding(source_encoding) + actual = "This is #{target_encoding.name}".force_encoding(target_encoding) + expected_diff = <<-EOD + +@@ -1,2 +1,2 @@ +-This is iso-8859-1 ++This is euc-jp + EOD + diff = differ.diff(actual, expected) + expect(Encoding.compatible?(actual.encoding, expected.encoding)).to be_nil + expect(diff.encoding).to eq(Encoding.default_external) + expect_identical_string(diff, expected_diff) end - it 'handles any encoding error that occurs with a helpful error message' do - expect(RSpec::Support::HunkGenerator).to receive(:new). - and_raise(Encoding::CompatibilityError) - expected = "Tu avec carte {count} item has\n".encode('us-ascii') - actual = "Tu avec carté {count} itém has\n" + it 'handles an Encoding::ConverterNotFoundError' do + expected = "Tu avec carte {count} item has\n".encode('UTF-16LE') + actual = "Tu avec carté {count} itém has\n".force_encoding('IBM737') + if RUBY_VERSION === '1.9.2' && Ruby.mri? + e = '\xC3\xA9\xC3\xA9' + else + e = "é".force_encoding('IBM737').encode('UTF-16LE').unpack('C*').pack('c*') + end diff = differ.diff(actual, expected) - expect(diff).to match(/Could not produce a diff/) - expect(diff).to match(/actual string \(UTF-8\)/) - expect(diff).to match(/expected string \(US-ASCII\)/) + expected_diff = <<-EOD + +@@ -1,2 +1,2 @@ +-Tu avec carte {count} item has ++Tu avec cart#{e} {count} it#{e}m has + EOD + expect_identical_string(diff, expected_diff) + end + + it 'handles an Encoding::CompatibilityError' do + expected = "\xAETu avec carte {count} item has\n".force_encoding("ASCII-8BIT") + actual = "\xE2\x82\xACTu avec carte {count} item has\n".force_encoding('UTF-8') + + diff = differ.diff(actual, expected) + expected_diff = <<-EOD + +@@ -1,2 +1,2 @@ +-?Tu avec carte {count} item has ++\xE2\x82\xACTu avec carte {count} item has + EOD + expect_identical_string(diff, expected_diff) end end @@ -111,7 +191,7 @@ def inspect EOD diff = differ.diff(expected,actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it "outputs unified diff message of two arrays" do @@ -133,7 +213,7 @@ def inspect EOD diff = differ.diff(expected,actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it 'outputs a unified diff message for an array which flatten recurses' do @@ -148,12 +228,13 @@ def inspect; ""; end diff = differ.diff [obj], [] end - expect(diff).to eq <<-EOD + expected_diff = <<-EOD @@ -1,2 +1,2 @@ -[] +[] -EOD + EOD + expect_identical_string(diff, expected_diff) end it "outputs unified diff message of two hashes" do @@ -172,29 +253,41 @@ def inspect; ""; end EOD diff = differ.diff(expected,actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it 'outputs unified diff message of two hashes with differing encoding', :failing_on_appveyor do + replacement = if RUBY_VERSION > '1.9.3' + %{+"ö" => "ö"} + else + '+"\303\266" => "\303\266"' + end expected_diff = %Q{ @@ -1,2 +1,2 @@ -"a" => "a", -#{ (RUBY_VERSION.to_f > 1.8) ? %Q{+"ö" => "ö"} : '+"\303\266" => "\303\266"' }, +#{ replacement }, } diff = differ.diff({'ö' => 'ö'}, {'a' => 'a'}) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it 'outputs unified diff message of two hashes with encoding different to key encoding', :failing_on_appveyor do + actual = { "한글" => "한글2"} + expected = { :a => "a"} + replacement = if RUBY_VERSION > '1.9.3' + %{+\"한글\" => \"한글2\"} + else + '+"\355\225\234\352\270\200" => "\355\225\234\352\270\2002"' + end expected_diff = %Q{ @@ -1,2 +1,2 @@ -:a => "a", -#{ (RUBY_VERSION.to_f > 1.8) ? %Q{+\"한글\" => \"한글2\"} : '+"\355\225\234\352\270\200" => "\355\225\234\352\270\2002"' }, +#{ replacement }, } - diff = differ.diff({ "한글" => "한글2"}, { :a => "a"}) - expect(diff).to eq expected_diff + diff = differ.diff(actual, expected) + expect_identical_string(diff, expected_diff) end it "outputs unified diff message of two hashes with object keys" do @@ -205,7 +298,7 @@ def inspect; ""; end } diff = differ.diff({ ['d','c'] => 'b'}, { ['a','c'] => 'b' }) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it "outputs unified diff of multi line strings" do @@ -221,7 +314,7 @@ def inspect; ""; end EOD diff = differ.diff(expected,actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end it "splits items with newlines" do @@ -233,7 +326,7 @@ def inspect; ""; end EOD diff = differ.diff [], ["a\nb", "c\nd"] - expect(diff).to eql expected_diff + expect_identical_string(diff, expected_diff) end it "shows inner arrays on a single line" do @@ -245,7 +338,7 @@ def inspect; ""; end EOD diff = differ.diff [], ["a\nb", ["c\nd"]] - expect(diff).to eql expected_diff + expect_identical_string(diff, expected_diff) end it "returns an empty string if no expected or actual" do @@ -300,7 +393,7 @@ def inspect; ""; end EOS diff = differ.diff(expected, actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end end @@ -313,7 +406,7 @@ def inspect; ""; end expected_diff = "\e[0m\n\e[0m\e[34m@@ -1,2 +1,2 @@\n\e[0m\e[31m-foo bang baz\n\e[0m\e[32m+foo bar baz\n\e[0m" diff = differ.diff(expected,actual) - expect(diff).to eq expected_diff + expect_identical_string(diff, expected_diff) end end end From 5c54a6e9a88f7d62d5759b41c4eba62ea439a4f0 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 05:53:38 -0600 Subject: [PATCH 03/14] Fix invalid byte sequence on EncodedString#split --- lib/rspec/support/encoded_string.rb | 40 ++++++++++++++++------- spec/rspec/support/encoded_string_spec.rb | 1 - 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 3783fc0b2..4b5a98ead 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -4,7 +4,6 @@ module Support class EncodedString # Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms # else is '?' ("\x3F") - MRI_UNICODE_UNKOWN_CHARACTER = "\xEF\xBF\xBD" REPLACE = "\x3F" def initialize(string, encoding=nil) @@ -36,6 +35,24 @@ def to_s private + ENCODING_STRATEGY = { + :bad_bytes => { + :invalid => :replace, + # :undef => :nil, + :replace => REPLACE + }, + :cannot_convert => { + # :invalid => :nil, + :undef => :replace, + :replace => REPLACE + }, + :no_converter => { + :invalid => :replace, + # :undef => :nil, + :replace => REPLACE + } + } + # Raised by Encoding and String methods: # Encoding::UndefinedConversionError: # when a transcoding operation fails @@ -51,20 +68,19 @@ def to_s # Encoding::CompatibilityError # def matching_encoding(string) - string.encode(@encoding) - rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError - normalize_missing(string.encode(@encoding, :invalid => :replace, :undef => :replace)) + # Converting it to a higher character set (UTF-16) and then back (to UTF-8) + # ensures that we strip away invalid or undefined byte sequences + # => no need to rescue Encoding::InvalidByteSequenceError, ArgumentError + string.encode(::Encoding::UTF_16LE, ENCODING_STRATEGY[:bad_bytes]). + encode(@encoding) + rescue Encoding::UndefinedConversionError, Encoding::CompatibilityError + string.encode(@encoding, ENCODING_STRATEGY[:cannot_convert]) + # Begin: Needed for 1.9.2 rescue Encoding::ConverterNotFoundError - normalize_missing(string.force_encoding(@encoding).encode(:invalid => :replace)) + string.force_encoding(@encoding).encode(ENCODING_STRATEGY[:no_converter]) end + # End: Needed for 1.9.2 - def normalize_missing(string) - if @encoding.to_s == "UTF-8" - string.gsub(MRI_UNICODE_UNKOWN_CHARACTER.force_encoding(@encoding), REPLACE) - else - string - end - end def detect_source_encoding(string) string.encoding diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 2ea256d4d..78d8f64f4 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -155,7 +155,6 @@ module RSpec::Support end it 'replaces invalid bytes with the REPLACE string' do - pending 'but is currently failing' resulting_array = build_encoded_string(message_with_invalid_byte_sequence, utf8_encoding).split("\n") expected_array = ["? ? ? I have bad bytes"] expect(resulting_array).to eq(expected_array) From 09ec191ff48daa5bcae409005a34b6c26047bbf9 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 06:12:53 -0600 Subject: [PATCH 04/14] Move encoding helper methods into RSpec::Support::EncodingHelpers --- lib/rspec/support/encoded_string.rb | 1 - lib/rspec/support/spec.rb | 2 + lib/rspec/support/spec/encoding_helpers.rb | 58 ++++++++++++++++++++++ spec/rspec/support/differ_spec.rb | 29 ----------- 4 files changed, 60 insertions(+), 30 deletions(-) create mode 100644 lib/rspec/support/spec/encoding_helpers.rb diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 4b5a98ead..1c0187a25 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -81,7 +81,6 @@ def matching_encoding(string) end # End: Needed for 1.9.2 - def detect_source_encoding(string) string.encoding end diff --git a/lib/rspec/support/spec.rb b/lib/rspec/support/spec.rb index 3f90aa5fe..c5fde9780 100644 --- a/lib/rspec/support/spec.rb +++ b/lib/rspec/support/spec.rb @@ -1,5 +1,6 @@ require 'rspec/support' RSpec::Support.require_rspec_support "spec/deprecation_helpers" +RSpec::Support.require_rspec_support "spec/encoding_helpers" RSpec::Support.require_rspec_support "spec/with_isolated_stderr" RSpec::Support.require_rspec_support "spec/stderr_splitter" RSpec::Support.require_rspec_support "spec/formatting_support" @@ -12,6 +13,7 @@ c.include RSpecHelpers c.include RSpec::Support::WithIsolatedStdErr c.include RSpec::Support::FormattingSupport + c.include RSpec::Support::EncodingHelpers unless defined?(Debugger) # debugger causes warnings when used c.before do diff --git a/lib/rspec/support/spec/encoding_helpers.rb b/lib/rspec/support/spec/encoding_helpers.rb new file mode 100644 index 000000000..5f36b8685 --- /dev/null +++ b/lib/rspec/support/spec/encoding_helpers.rb @@ -0,0 +1,58 @@ +module RSpec + module Support + module EncodingHelpers + module_function + + def safe_chr + # rubocop:disable Style/RescueModifier + @safe_chr ||= Hash.new { |h, x| h[x] = x.chr rescue ("U+%.4X" % [x]) } + # rubocop:enable Style/RescueModifier + end + + if String.method_defined?(:encoding) + + def safe_codepoints(str) + str.each_codepoint.map { |codepoint| safe_chr[codepoint] } + rescue ArgumentError + str.each_byte.map { |byte| safe_chr[byte] } + end + + # rubocop:disable MethodLength + def expect_identical_string(str1, str2, expected_encoding=str1.encoding) + expect(str1.encoding).to eq(expected_encoding) + str1_bytes = safe_codepoints(str1) + str2_bytes = safe_codepoints(str2) + return unless str1_bytes != str2_bytes + str1_differences = [] + str2_differences = [] + # rubocop:disable Style/Next + str2_bytes.each_with_index do |str2_byte, index| + str1_byte = str1_bytes.fetch(index) do + str2_differences.concat str2_bytes[index..-1] + return + end + if str1_byte != str2_byte + str1_differences << str1_byte + str2_differences << str2_byte + end + end + # rubocop:enable Style/Next + expect(str1_differences.join).to eq(str2_differences.join) + end + # rubocop:enable Style/MethodLength + + else + + def safe_codepoints(str) + str.split(//) + end + + def expect_identical_string(str1, str2) + str1_bytes = safe_codepoints(str1) + str2_bytes = safe_codepoints(str2) + expect(str1_bytes).to eq(str2_bytes) + end + end + end + end +end diff --git a/spec/rspec/support/differ_spec.rb b/spec/rspec/support/differ_spec.rb index 68a876632..36eec2a1c 100644 --- a/spec/rspec/support/differ_spec.rb +++ b/spec/rspec/support/differ_spec.rb @@ -7,35 +7,6 @@ module RSpec module Support describe Differ do - def safe_chr - @safe_chr ||= Hash.new { |h, x| h[x] = x.chr rescue ("U+%.4X" % [x]) } - end - - def safe_codepoints(str) - str.each_codepoint.map {|codepoint| safe_chr[codepoint] } - rescue ArgumentError - str.each_byte.map {|byte| safe_chr[byte] } - end - - def expect_identical_string(str1, str2, expected_encoding = str1.encoding) - expect(str1.encoding).to eq(expected_encoding) - str1_bytes = safe_codepoints(str1) - str2_bytes = safe_codepoints(str2) - return unless str1_bytes != str2_bytes - str1_differences = [] - str2_differences = [] - str2_bytes.each_with_index do |str2_byte, index| - str1_byte = str1_bytes.fetch(index) do - str2_differences.concat str2_bytes[index..-1] - return - end - if str1_byte != str2_byte - str1_differences << str1_byte - str2_differences << str2_byte - end - end - expect(str1_differences.join).to eq(str2_differences.join) - end describe '#pick_encoding' do let(:differ) { RSpec::Support::Differ.new } From 78b032eb395da3faaeabe3cafd910365d001e063 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 06:19:12 -0600 Subject: [PATCH 05/14] Update EncodedString specs to use EncodingHelpers#expect_identical_string --- spec/rspec/support/encoded_string_spec.rb | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 78d8f64f4..5b1e2bdb9 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -32,7 +32,7 @@ module RSpec::Support it 'replaces invalid byte sequences with the REPLACE string' do resulting_string = build_encoded_string(string, target_encoding).to_s expected_string = "I have a bad byt\x3F\x3F\x3F" - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end it 'normally raises an EncodedString::InvalidByteSequenceError' do @@ -61,7 +61,7 @@ module RSpec::Support pending 'cannot reproduce' unless RUBY_VERSION == '1.9.2' && Ruby.mri? resulting_string = build_encoded_string(string, no_converter_encoding).to_s expected_string = "I am not going to changé\xEF".force_encoding(no_converter_encoding) - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end end @@ -82,7 +82,7 @@ module RSpec::Support it 'replaces all undefines conversions with the REPLACE string' do resulting_string = build_encoded_string(string, incompatible_encoding).to_s expected_string = "? hi I am not going to work" - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end end end @@ -98,7 +98,7 @@ module RSpec::Support resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << valid_ascii_string expected_string = "#{utf_8_euro_symbol}abcd??".force_encoding('UTF-8') - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end it 'copes with encoded strings' do @@ -110,7 +110,7 @@ module RSpec::Support Tu avec carte {count} item has Tu avec cart\u00E9 {count} it\u00E9m has EOS - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end end @@ -128,7 +128,7 @@ module RSpec::Support it 'replaces unconvertable characters with a string representation of their hex value' do resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << ascii_string expected_string = "#{utf_8_euro_symbol}?" - expect(resulting_string).to eq(expected_string) + expect_identical_string(resulting_string, expected_string) end end end @@ -156,8 +156,9 @@ module RSpec::Support it 'replaces invalid bytes with the REPLACE string' do resulting_array = build_encoded_string(message_with_invalid_byte_sequence, utf8_encoding).split("\n") - expected_array = ["? ? ? I have bad bytes"] - expect(resulting_array).to eq(expected_array) + expect(resulting_array.size).to eq(1) # sanity check + expected_string = "? ? ? I have bad bytes" + expect_identical_string(resulting_array.first, expected_string) end end @@ -192,9 +193,9 @@ module RSpec::Support it 'falls back to the Encoding.default_external' do resulting_array = build_encoded_string(non_ascii_compatible_string, utf8_encoding).split("\n") - expected_array = ["This is a pile of poo: \u{1F4A9}"] - expect(resulting_array).to eq(expected_array) - expect(resulting_array.first.encoding.to_s).to eq(Encoding.default_external.to_s) + expect(resulting_array.size).to eq(1) # sanity check + expected_string = "This is a pile of poo: \u{1F4A9}" + expect_identical_string(resulting_array.first, expected_string, Encoding.default_external) end end end From d431a4d7a6981cd80fdbf72b4ce6d73b26a1bdf3 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Fri, 26 Dec 2014 13:45:22 -0600 Subject: [PATCH 06/14] The Differ no longer catches CompatibilityErrors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was introduced in https://github.com/rspec/rspec-expectations/pull/220/files#diff-9533f5f156a38a3307ecfc610d2282d7R47 but is no longer raised either by the the current test code, the original test code @expected="Tu avec carté {count} itém has".encode('UTF-16LE') @actual="Tu avec carte {count} item has".encode('UTF-16LE') or any other variations I've tried Since I'm moving all the encoding logic into EncodedString, and this doesn't appear to do anything any more, I am proposing to remove it. Next commits will also move over #pick_encoding Conflicts: spec/rspec/support/differ_spec.rb --- lib/rspec/support/differ.rb | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/lib/rspec/support/differ.rb b/lib/rspec/support/differ.rb index c6f5cadaf..56dc7e562 100644 --- a/lib/rspec/support/differ.rb +++ b/lib/rspec/support/differ.rb @@ -51,8 +51,6 @@ def diff_as_string(actual, expected) finalize_output(output, hunks.last.diff(format_type).to_s) if hunks.last color_diff output - rescue Encoding::CompatibilityError - handle_encoding_errors end # rubocop:enable MethodLength @@ -202,17 +200,6 @@ def pick_encoding(source_a, source_b) def pick_encoding(_source_a, _source_b) end end - - def handle_encoding_errors - if @actual.source_encoding != @expected.source_encoding - "Could not produce a diff because the encoding of the actual string " \ - "(#{@actual.source_encoding}) differs from the encoding of the expected " \ - "string (#{@expected.source_encoding})" - else - "Could not produce a diff because of the encoding of the string " \ - "(#{@expected.source_encoding})" - end - end end # rubocop:enable ClassLength end From d85222de35b8aa4b210cc347ab7057b1c0111938 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 06:28:16 -0600 Subject: [PATCH 07/14] Move Differ#pick_encoding into EncodedString#pick_encoding --- lib/rspec/support/differ.rb | 12 +----------- lib/rspec/support/encoded_string.rb | 10 ++++++++++ lib/rspec/support/spec/encoding_helpers.rb | 3 +++ spec/rspec/support/differ_spec.rb | 13 ------------- spec/rspec/support/encoded_string_spec.rb | 13 +++++++++++++ 5 files changed, 27 insertions(+), 24 deletions(-) diff --git a/lib/rspec/support/differ.rb b/lib/rspec/support/differ.rb index 56dc7e562..56bbd0a35 100644 --- a/lib/rspec/support/differ.rb +++ b/lib/rspec/support/differ.rb @@ -31,7 +31,7 @@ def diff(actual, expected) # rubocop:disable MethodLength def diff_as_string(actual, expected) - @encoding = pick_encoding actual, expected + @encoding = EncodedString.pick_encoding(actual, expected) @actual = EncodedString.new(actual, @encoding) @expected = EncodedString.new(expected, @encoding) output = EncodedString.new("\n", @encoding) @@ -190,16 +190,6 @@ def object_to_string(object) PP.pp(object, "") end end - - if String.method_defined?(:encoding) - # see https://github.com/ruby/ruby/blob/ca24e581ba/encoding.c#L1191 - def pick_encoding(source_a, source_b) - Encoding.compatible?(source_a, source_b) || Encoding.default_external - end - else - def pick_encoding(_source_a, _source_b) - end - end end # rubocop:enable ClassLength end diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index 1c0187a25..f8a5dbb90 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -2,6 +2,16 @@ module RSpec module Support # @private class EncodedString + if String.method_defined?(:encoding) + # see https://github.com/ruby/ruby/blob/ca24e581ba/encoding.c#L1191 + def self.pick_encoding(source_a, source_b) + Encoding.compatible?(source_a, source_b) || Encoding.default_external + end + else + def self.pick_encoding(_source_a, _source_b) + end + end + # Ruby's default replacement string for is U+FFFD ("\xEF\xBF\xBD") for Unicode encoding forms # else is '?' ("\x3F") REPLACE = "\x3F" diff --git a/lib/rspec/support/spec/encoding_helpers.rb b/lib/rspec/support/spec/encoding_helpers.rb index 5f36b8685..8bb70e79e 100644 --- a/lib/rspec/support/spec/encoding_helpers.rb +++ b/lib/rspec/support/spec/encoding_helpers.rb @@ -3,6 +3,9 @@ module Support module EncodingHelpers module_function + # For undefined conversions, replace as "U+" + # e.g. '\xa0' becomes 'U+00A0' + # see https://github.com/ruby/ruby/blob/34fbf57aaa/test/ruby/test_transcode.rb#L2050 def safe_chr # rubocop:disable Style/RescueModifier @safe_chr ||= Hash.new { |h, x| h[x] = x.chr rescue ("U+%.4X" % [x]) } diff --git a/spec/rspec/support/differ_spec.rb b/spec/rspec/support/differ_spec.rb index 36eec2a1c..da0ab7b4b 100644 --- a/spec/rspec/support/differ_spec.rb +++ b/spec/rspec/support/differ_spec.rb @@ -8,19 +8,6 @@ module RSpec module Support describe Differ do - describe '#pick_encoding' do - let(:differ) { RSpec::Support::Differ.new } - if String.method_defined?(:encoding) - it "picks and encoding" do - - str1 = "\xa1".force_encoding("iso-8859-1") - str2 = "\xa1\xa1".force_encoding("euc-jp") - expect(Encoding.compatible?(str1, str2)).to be_nil - expect(differ.send(:pick_encoding, str1, str2)).to eq(Encoding.default_external) - end - end - end - describe '#diff' do let(:differ) { RSpec::Support::Differ.new } diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 5b1e2bdb9..dae03f9a5 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -15,6 +15,19 @@ module RSpec::Support end if String.method_defined?(:encoding) + + describe '#pick_encoding' do + if String.method_defined?(:encoding) + it "picks a compatible encoding, falling back to default_external" do + str1 = "\xa1".force_encoding("iso-8859-1") + str2 = "\xa1\xa1".force_encoding("euc-jp") + expect(Encoding.compatible?(str1, str2)).to be_nil + + expect(EncodedString.pick_encoding(str1, str2)).to eq(Encoding.default_external) + end + end + end + describe '#source_encoding' do it 'knows the original encoding of the string' do str = EncodedString.new("abc".encode('ASCII-8BIT'), "UTF-8") From 93776b9ccc6e613be82d32dec82a3f07fae7794f Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 06:32:13 -0600 Subject: [PATCH 08/14] Always try a compatible encoding --- lib/rspec/support/encoded_string.rb | 7 ++++--- spec/rspec/support/encoded_string_spec.rb | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/rspec/support/encoded_string.rb b/lib/rspec/support/encoded_string.rb index f8a5dbb90..8976131e5 100644 --- a/lib/rspec/support/encoded_string.rb +++ b/lib/rspec/support/encoded_string.rb @@ -78,16 +78,17 @@ def to_s # Encoding::CompatibilityError # def matching_encoding(string) + encoding = EncodedString.pick_encoding(source_encoding, @encoding) # Converting it to a higher character set (UTF-16) and then back (to UTF-8) # ensures that we strip away invalid or undefined byte sequences # => no need to rescue Encoding::InvalidByteSequenceError, ArgumentError string.encode(::Encoding::UTF_16LE, ENCODING_STRATEGY[:bad_bytes]). - encode(@encoding) + encode(encoding) rescue Encoding::UndefinedConversionError, Encoding::CompatibilityError - string.encode(@encoding, ENCODING_STRATEGY[:cannot_convert]) + string.encode(encoding, ENCODING_STRATEGY[:cannot_convert]) # Begin: Needed for 1.9.2 rescue Encoding::ConverterNotFoundError - string.force_encoding(@encoding).encode(ENCODING_STRATEGY[:no_converter]) + string.force_encoding(encoding).encode(ENCODING_STRATEGY[:no_converter]) end # End: Needed for 1.9.2 diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index dae03f9a5..0415030a5 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -94,7 +94,7 @@ module RSpec::Support it 'replaces all undefines conversions with the REPLACE string' do resulting_string = build_encoded_string(string, incompatible_encoding).to_s - expected_string = "? hi I am not going to work" + expected_string = "\xA0 hi I am not going to work" expect_identical_string(resulting_string, expected_string) end end From a51874cbb13912b9c7653fe1db62b1868aca43ab Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 14:50:37 -0600 Subject: [PATCH 09/14] Fix Ruby 2.2 warning: possible reference to past scope /home/travis/build/rspec/rspec-support/spec/rspec/support/encoded_string_spec.rb:137: warning: possible reference to past scope - valid_unicode_string /home/travis/build/rspec/rspec-support/spec/rspec/support/encoded_string_spec.rb:142: warning: possible reference to past scope - valid_unicode_string https://travis-ci.org/rspec/rspec-support/jobs/45770204 --- spec/rspec/support/encoded_string_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 0415030a5..05a2a6a7c 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -105,9 +105,9 @@ module RSpec::Support describe '#<<' do context 'with strings that can be converted to the target encoding' do + let(:valid_ascii_string) { "abcdé".force_encoding("ASCII-8BIT") } + let(:valid_unicode_string) { utf_8_euro_symbol.force_encoding('UTF-8') } it 'encodes and appends the string' do - valid_ascii_string = "abcdé".force_encoding("ASCII-8BIT") - valid_unicode_string = utf_8_euro_symbol.force_encoding('UTF-8') resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << valid_ascii_string expected_string = "#{utf_8_euro_symbol}abcd??".force_encoding('UTF-8') From d3a95dfe7f2c91f3a52b2bf94026f219de57cda2 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 16:06:56 -0600 Subject: [PATCH 10/14] Misc fixes for JRuby and Windows specs --- lib/rspec/support/spec/in_sub_process.rb | 4 +- spec/rspec/support/differ_spec.rb | 16 +++--- spec/rspec/support/encoded_string_spec.rb | 64 +++++++++++++++-------- 3 files changed, 51 insertions(+), 33 deletions(-) diff --git a/lib/rspec/support/spec/in_sub_process.rb b/lib/rspec/support/spec/in_sub_process.rb index b641e3935..638797e6b 100644 --- a/lib/rspec/support/spec/in_sub_process.rb +++ b/lib/rspec/support/spec/in_sub_process.rb @@ -1,7 +1,7 @@ module RSpec module Support module InSubProcess - if Process.respond_to?(:fork) && !(RUBY_PLATFORM == 'java' && RUBY_VERSION == '1.8.7') + if Process.respond_to?(:fork) && !(Ruby.jruby? && RUBY_VERSION == '1.8.7') # Useful as a way to isolate a global change to a subprocess. # rubocop:disable MethodLength @@ -35,7 +35,7 @@ def in_sub_process(prevent_warnings=true) raise exception if exception end else - def in_sub_process + def in_sub_process(*) skip "This spec requires forking to work properly, " \ "and your platform does not support forking" end diff --git a/spec/rspec/support/differ_spec.rb b/spec/rspec/support/differ_spec.rb index da0ab7b4b..6a99e20c7 100644 --- a/spec/rspec/support/differ_spec.rb +++ b/spec/rspec/support/differ_spec.rb @@ -214,11 +214,11 @@ def inspect; ""; end expect_identical_string(diff, expected_diff) end - it 'outputs unified diff message of two hashes with differing encoding', :failing_on_appveyor do - replacement = if RUBY_VERSION > '1.9.3' - %{+"ö" => "ö"} - else + it 'outputs unified diff message of two hashes with differing encoding' do + replacement = if OS.windows? || RUBY_VERSION < '1.9.3' '+"\303\266" => "\303\266"' + else + %{+"ö" => "ö"} end expected_diff = %Q{ @@ -1,2 +1,2 @@ @@ -230,13 +230,13 @@ def inspect; ""; end expect_identical_string(diff, expected_diff) end - it 'outputs unified diff message of two hashes with encoding different to key encoding', :failing_on_appveyor do + it 'outputs unified diff message of two hashes with encoding different to key encoding' do actual = { "한글" => "한글2"} expected = { :a => "a"} - replacement = if RUBY_VERSION > '1.9.3' - %{+\"한글\" => \"한글2\"} - else + replacement = if OS.windows? || RUBY_VERSION < '1.9.3' '+"\355\225\234\352\270\200" => "\355\225\234\352\270\2002"' + else + %{+\"한글\" => \"한글2\"} end expected_diff = %Q{ @@ -1,2 +1,2 @@ diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index 05a2a6a7c..f402c1e42 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -56,13 +56,10 @@ module RSpec::Support end context 'when no converter is known for an encoding' do - let(:source_encoding) { Encoding.find('UTF-16LE') } - if RUBY_VERSION == '1.9.2' && Ruby.mri? - let(:no_converter_encoding) { Encoding.find('IBM737') } - else - let(:no_converter_encoding) { 'not_a_known_encoding' } - end - let(:string) { "I am not going to changé\xEF".force_encoding(source_encoding) } + # see https://github.com/rubyspec/rubyspec/blob/91ce9f6549/core/string/shared/encode.rb#L12 + let(:source_encoding) { Encoding.find('ASCII-8BIT') } + let(:no_converter_encoding) { Encoding::Emacs_Mule } + let(:string) { "\x80".force_encoding(source_encoding) } it 'normally raises an Encoding::ConverterNotFoundError' do expect { @@ -70,11 +67,10 @@ module RSpec::Support }.to raise_error(Encoding::ConverterNotFoundError) end - it 'forces the new encoding' do - pending 'cannot reproduce' unless RUBY_VERSION == '1.9.2' && Ruby.mri? + it 'forces the encoding to Encoding.default_external' do resulting_string = build_encoded_string(string, no_converter_encoding).to_s - expected_string = "I am not going to changé\xEF".force_encoding(no_converter_encoding) - expect_identical_string(resulting_string, expected_string) + expected_string = "I am not going to changé\xEF".force_encoding(Encoding.default_external) + expect_identical_string(resulting_string, expected_string, Encoding.default_external) end end @@ -94,7 +90,12 @@ module RSpec::Support it 'replaces all undefines conversions with the REPLACE string' do resulting_string = build_encoded_string(string, incompatible_encoding).to_s - expected_string = "\xA0 hi I am not going to work" + if OS.windows? + replacement = "\xFF" + else + replacement = "\xA0" + end + expected_string = "#{replacement} hi I am not going to work" expect_identical_string(resulting_string, expected_string) end end @@ -110,7 +111,12 @@ module RSpec::Support it 'encodes and appends the string' do resulting_string = build_encoded_string(valid_unicode_string, utf8_encoding) << valid_ascii_string - expected_string = "#{utf_8_euro_symbol}abcd??".force_encoding('UTF-8') + if OS.windows? + replacement = "\x82\x82" + else + replacement = "\xE9\xE9" + end + expected_string = "#{utf_8_euro_symbol}abcd#{replacement}".force_encoding('UTF-8') expect_identical_string(resulting_string, expected_string) end @@ -119,9 +125,14 @@ module RSpec::Support accentless = build_encoded_string("Tu avec carte {count} item has\n", source_encoding) accented = "Tu avec carté {count} itém has\n".encode(source_encoding) resulting_string = accentless << accented + if OS.windows? + replacement = "\x82\x82" + else + replacement = "\u00E9" + end expected_string = <<-EOS.encode('UTF-16LE') Tu avec carte {count} item has -Tu avec cart\u00E9 {count} it\u00E9m has +Tu avec cart#{replacement} {count} it#{replacement}m has EOS expect_identical_string(resulting_string, expected_string) end @@ -152,7 +163,8 @@ module RSpec::Support other_ascii_string = '123'.force_encoding("ASCII-8BIT") resulting_string = build_encoded_string(ascii_string, utf8_encoding) << other_ascii_string - expect(resulting_string.encoding.to_s).to eq('UTF-8') + expected_string = 'abc123'.force_encoding('ASCII-8BIT') + expect_identical_string(resulting_string, expected_string) end end end @@ -181,7 +193,7 @@ module RSpec::Support it 'normally raises an Encoding::UndefinedConversionError' do expect { - wrapped_string.encode(utf8_encoding).split(utf_8_euro_symbol) + wrapped_string.encode(utf8_encoding) }.to raise_error(Encoding::UndefinedConversionError) end @@ -195,8 +207,9 @@ module RSpec::Support # see https://github.com/rspec/rspec-expectations/blob/f8a1232/spec/rspec/expectations/fail_with_spec.rb#L50 # https://github.com/rspec/rspec-expectations/issues/201 # https://github.com/rspec/rspec-expectations/pull/220 - context 'when splitting a string that is not US-ASCII compatible' do - let(:non_ascii_compatible_string) { "This is a pile of poo: 💩".encode("UTF-16LE") } + context 'with a string that cannot be converted to the target encoding' do + let(:binary_poop) {'💩' } # [128169] "\u{1F4A9}" + let(:non_ascii_compatible_string) { "This is a pile of poo: #{binary_poop}, yuck".encode("UTF-16LE") } it 'normally raises an Encoding::CompatibilityError' do expect { @@ -204,16 +217,21 @@ module RSpec::Support }.to raise_error(Encoding::CompatibilityError) end - it 'falls back to the Encoding.default_external' do - resulting_array = build_encoded_string(non_ascii_compatible_string, utf8_encoding).split("\n") + it 'corrects for the encoding if possible, else replaces the incompatible character' do + resulting_array = build_encoded_string(non_ascii_compatible_string).split("\n") expect(resulting_array.size).to eq(1) # sanity check - expected_string = "This is a pile of poo: \u{1F4A9}" - expect_identical_string(resulting_array.first, expected_string, Encoding.default_external) + if OS.windows? + replacement = EncodedString::REPLACE + else + replacement = binary_poop + end + expected_string = "This is a pile of poo: #{replacement}, yuck" + expect_identical_string(resulting_array.first, expected_string) end end end - def build_encoded_string(string, target_encoding) + def build_encoded_string(string, target_encoding = string.encoding) EncodedString.new(string, target_encoding) end else From 8aa73ae65d65e535d4d16df63fb9bdd51a94a37e Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sat, 3 Jan 2015 15:02:46 -0600 Subject: [PATCH 11/14] Setting JRUBY_OPTS without cext.enabled or --2.0 https://travis-ci.org/rspec/rspec-support/jobs/45770207#L544 See https://github.com/travis-ci/travis-ci/issues/3067 Per http://docs.travis-ci.com/user/ci-environment/ Use -Xcompat.version=2.0 per `jruby --properties` as `--2.0` wasn't being respected Note that script/functions.sh declares # idea taken from: http://blog.headius.com/2010/03/jruby-startup-time-tips.html export JRUBY_OPTS="${JRUBY_OPTS} -X-C" # disable JIT since these processes are so short lived --- .travis.yml | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index 94ebd55c4..5dc616276 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,19 +22,24 @@ rvm: - 2.2 - ruby-head - ree - - jruby-18mode - - jruby - - jruby-head - rbx matrix: include: - rvm: jruby - env: JRUBY_OPTS='--2.0' - allow_failures: + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false -Xcompat.version=2.0' - rvm: jruby-head + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false' + # These two are temporary until https://github.com/travis-ci/travis-ci/issues/3067 is solved. + - rvm: jruby-18mode + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false' + - rvm: jruby + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false' + allow_failures: - rvm: ruby-head - rvm: rbx # These two are temporary until https://github.com/travis-ci/travis-ci/issues/3067 is solved. - rvm: jruby-18mode + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false' - rvm: jruby + env: JRUBY_OPTS='--server -Xcompile.invokedynamic=false' fast_finish: true From db2c3a43e1cdb0fc1491394328f78c712aa9ed19 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Sun, 4 Jan 2015 15:15:52 -0600 Subject: [PATCH 12/14] Ensure and assert tests run with UTF-8 external encoding Some tests fail if the external/locale/filesystem encoding is ISO-8859-1 Rather than debugging environmental issues, we can control them per Ruby docs by running our CI specs via `ruby -E UTF-8 -S $spec_command`. See https://github.com/ruby/ruby/blob/ca24e581ba/encoding.c#L1674 Alternatively, we could control them via the environmental variables LANG, LC_ALL, LC_CTYPE in the .travis.yml or appveyor.yml with e.g. env: - LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 (which is the default on Travis) ( http://docs.travis-ci.com/user/ci-environment/#Environment-variables ) but I'm not sure how windows-compatible that is and it appears more reliable to be explicit in the runner, so that running script/run_build uses the correct encoding, and one doesn't need to, for example, gem install wwtd && wwtd --local to get the CI behavior Also, see see https://github.com/rubyspec/rubyspec/blob/91ce9f6549/core/encoding/find_spec.rb#L57 --- script/functions.sh | 8 +++--- spec/rspec/support/encoded_string_spec.rb | 34 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/script/functions.sh b/script/functions.sh index 75fe0465d..2aedea3d9 100644 --- a/script/functions.sh +++ b/script/functions.sh @@ -26,7 +26,7 @@ function run_specs_and_record_done { fi; echo "${PWD}/bin/rspec" - $rspec_bin spec --backtrace --format progress --profile --format progress --out $SPECS_HAVE_RUN_FILE + ruby -E UTF-8:UTF-8 -S $rspec_bin spec --backtrace --format progress --profile --format progress --out $SPECS_HAVE_RUN_FILE } function run_cukes { @@ -50,7 +50,7 @@ function run_cukes { # and PATH for those that are using `rspec` or `rake`. RUBYOPT="-I${PWD}/../bundle -rbundler/setup" \ PATH="${PWD}/bin:$PATH" \ - bin/cucumber --strict + ruby -E UTF-8:UTF-8 -S bin/cucumber --strict fi fi } @@ -59,7 +59,7 @@ function run_specs_one_by_one { echo "Running each spec file, one-by-one..." for file in `find spec -iname '*_spec.rb'`; do - bin/rspec $file -b --format progress + ruby -E UTF-8:UTF-8 -S bin/rspec $file -b --format progress done } @@ -112,7 +112,7 @@ function check_documentation_coverage { } function check_style_and_lint { - echo "bin/rubucop lib" + echo "bin/rubocop lib" bin/rubocop lib } diff --git a/spec/rspec/support/encoded_string_spec.rb b/spec/rspec/support/encoded_string_spec.rb index f402c1e42..0bfc8178e 100644 --- a/spec/rspec/support/encoded_string_spec.rb +++ b/spec/rspec/support/encoded_string_spec.rb @@ -16,6 +16,40 @@ module RSpec::Support if String.method_defined?(:encoding) + # see https://github.com/rubyspec/rubyspec/blob/91ce9f6549/core/encoding/find_spec.rb#L57 + describe 'Ensure tests are running with utf-8 encoding' do + + it 'default_internal' do + if Encoding.default_external == Encoding.find('locale') + expected_encoding = '' + else + expected_encoding = utf8_encoding + end + expect(Encoding.default_internal.to_s).to eq(expected_encoding) + end + + it 'default_external' do + expect(Encoding.default_external.to_s).to eq(utf8_encoding) + end + + it 'locale' do + skip "Not sure how to determine locale (#{Encoding.find('locale')})"\ + "from LC_ALL or on windows" + end + + it 'filesystem' do + encoding = Encoding.find('filesystem').to_s + if OS.windows? + skip "Not sure how to tell filesystem encoding is #{encoding}" + expect(encoding).to eq(utf8_encoding) + end + end + + it 'current script (file)' do + expect(__ENCODING__.to_s).to eq(utf8_encoding) + end + end + describe '#pick_encoding' do if String.method_defined?(:encoding) it "picks a compatible encoding, falling back to default_external" do From 591c3fcb6b398621f4782c50052fe0f0a14e93de Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Tue, 6 Jan 2015 00:05:10 -0600 Subject: [PATCH 13/14] Set the external encoding for specs in a 1.8.7-compatible way --- script/functions.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/script/functions.sh b/script/functions.sh index 2aedea3d9..83b4362d8 100644 --- a/script/functions.sh +++ b/script/functions.sh @@ -7,6 +7,10 @@ source $SCRIPT_DIR/predicate_functions.sh # idea taken from: http://blog.headius.com/2010/03/jruby-startup-time-tips.html export JRUBY_OPTS="${JRUBY_OPTS} -X-C" # disable JIT since these processes are so short lived +# Set the external encoding to UTF-8 in a 1.8.7-compatible way +export LANG=en_US.UTF-8 +export LC_ALL=en_US.UTF-8 + SPECS_HAVE_RUN_FILE=specs.out MAINTENANCE_BRANCH=`cat maintenance-branch` @@ -26,7 +30,7 @@ function run_specs_and_record_done { fi; echo "${PWD}/bin/rspec" - ruby -E UTF-8:UTF-8 -S $rspec_bin spec --backtrace --format progress --profile --format progress --out $SPECS_HAVE_RUN_FILE + $rspec_bin spec --backtrace --format progress --profile --format progress --out $SPECS_HAVE_RUN_FILE } function run_cukes { @@ -50,7 +54,7 @@ function run_cukes { # and PATH for those that are using `rspec` or `rake`. RUBYOPT="-I${PWD}/../bundle -rbundler/setup" \ PATH="${PWD}/bin:$PATH" \ - ruby -E UTF-8:UTF-8 -S bin/cucumber --strict + bin/cucumber --strict fi fi } @@ -59,7 +63,7 @@ function run_specs_one_by_one { echo "Running each spec file, one-by-one..." for file in `find spec -iname '*_spec.rb'`; do - ruby -E UTF-8:UTF-8 -S bin/rspec $file -b --format progress + bin/rspec $file -b --format progress done } From b3a02570d11156dda3974b67de50bf224f1a7d65 Mon Sep 17 00:00:00 2001 From: Benjamin Fleischer Date: Tue, 6 Jan 2015 00:07:57 -0600 Subject: [PATCH 14/14] Try simplecov-html master for writing out in binary --- Gemfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Gemfile b/Gemfile index 4cae93c45..2c9a5ef3e 100644 --- a/Gemfile +++ b/Gemfile @@ -14,7 +14,8 @@ branch = File.read(File.expand_path("../maintenance-branch", __FILE__)).chomp end ### dep for ci/coverage -gem 'simplecov', '~> 0.8' +gem 'simplecov', '~> 0.9' +gem 'simplecov-html', :github => 'colszowka/simplecov-html' gem 'rubocop', "~> 0.23.0", :platform => [:ruby_19, :ruby_20, :ruby_21]