From 9f55273d5cacb6c1fa26a6d218eb40b5ecc8e12b Mon Sep 17 00:00:00 2001 From: Tietew Date: Fri, 14 Feb 2020 13:14:25 +0900 Subject: [PATCH] Convert source file encoding to UTF-8 --- .rubocop.yml | 3 +++ lib/simplecov/source_file.rb | 28 +++++++++++++++++++++++++- spec/fixtures/euc-jp.rb | 2 ++ spec/fixtures/utf-8-magic.rb | 2 ++ spec/source_file_spec.rb | 38 ++++++++++++++++++++++++++++++++++++ 5 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 spec/fixtures/euc-jp.rb create mode 100644 spec/fixtures/utf-8-magic.rb diff --git a/.rubocop.yml b/.rubocop.yml index ce1bd2aa..9a1b76f9 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -1,6 +1,9 @@ AllCops: Exclude: - "spec/fixtures/iso-8859.rb" + - "spec/fixtures/utf-8.rb" + - "spec/fixtures/utf-8-magic.rb" + - "spec/fixtures/euc-jp.rb" - "tmp/**/*" - "vendor/bundle/**/*" - "vendor/bundle/**/.*" diff --git a/lib/simplecov/source_file.rb b/lib/simplecov/source_file.rb index af0922d9..65897361 100644 --- a/lib/simplecov/source_file.rb +++ b/lib/simplecov/source_file.rb @@ -25,7 +25,7 @@ def project_filename def src # We intentionally read source code lazily to # suppress reading unused source code. - @src ||= File.open(filename, "rb", &:readlines) + @src ||= load_source end alias source src @@ -175,6 +175,32 @@ def build_no_cov_chunks end end + def load_source + lines = [] + # The default encoding is UTF-8 + File.open(filename, "rb:UTF-8") do |file| + line = file.gets + + # Check for shbang + if /\A#!/.match?(line) + lines << line + line = file.gets + end + return lines unless line + + check_magic_comment(file, line) + lines.concat([line], file.readlines) + end + end + + def check_magic_comment(file, line) + # Check for encoding magic comment + # Encoding magic comment must be placed at first line except for shbang + if (match = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.match(line)) + file.set_encoding(match[1], "UTF-8") + end + end + def build_lines coverage_exceeding_source_warn if coverage_data["lines"].size > src.size lines = src.map.with_index(1) do |src, i| diff --git a/spec/fixtures/euc-jp.rb b/spec/fixtures/euc-jp.rb new file mode 100644 index 00000000..0eecd26e --- /dev/null +++ b/spec/fixtures/euc-jp.rb @@ -0,0 +1,2 @@ +# encoding: EUC-JP +puts "135¡ëC" diff --git a/spec/fixtures/utf-8-magic.rb b/spec/fixtures/utf-8-magic.rb new file mode 100644 index 00000000..64996ba2 --- /dev/null +++ b/spec/fixtures/utf-8-magic.rb @@ -0,0 +1,2 @@ +# encoding: UTF-8 +puts "135°C" diff --git a/spec/source_file_spec.rb b/spec/source_file_spec.rb index 48ad3cc2..b7ea394e 100644 --- a/spec/source_file_spec.rb +++ b/spec/source_file_spec.rb @@ -695,4 +695,42 @@ end end end + + context "a file contains non-ASCII characters" do + COVERAGE_FOR_SINGLE_LINE = {"lines" => [nil]}.freeze + COVERAGE_FOR_DOUBLE_LINES = {"lines" => [nil]}.freeze + + shared_examples_for "converting to UTF-8" do + it "has all source lines of encoding UTF-8" do + subject.lines.each do |line| + expect(line.source.encoding).to eq(Encoding::UTF_8) + expect(line.source).to be_valid_encoding + end + end + end + + describe "UTF-8 without magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("utf-8.rb"), COVERAGE_FOR_SINGLE_LINE) + end + + it_behaves_like "converting to UTF-8" + end + + describe "UTF-8 with magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("utf-8-magic.rb"), COVERAGE_FOR_DOUBLE_LINES) + end + + it_behaves_like "converting to UTF-8" + end + + describe "EUC-JP with magic comment" do + subject do + SimpleCov::SourceFile.new(source_fixture("euc-jp.rb"), COVERAGE_FOR_DOUBLE_LINES) + end + + it_behaves_like "converting to UTF-8" + end + end end