Skip to content

Commit

Permalink
Convert source file encoding to UTF-8
Browse files Browse the repository at this point in the history
  • Loading branch information
Tietew committed Feb 14, 2020
1 parent 0d0f692 commit 9f55273
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
AllCops:
Exclude:
- "spec/fixtures/iso-8859.rb"
- "spec/fixtures/utf-8.rb"
- "spec/fixtures/utf-8-magic.rb"
- "spec/fixtures/euc-jp.rb"
- "tmp/**/*"
- "vendor/bundle/**/*"
- "vendor/bundle/**/.*"
Expand Down
28 changes: 27 additions & 1 deletion lib/simplecov/source_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def project_filename
def src
# We intentionally read source code lazily to
# suppress reading unused source code.
@src ||= File.open(filename, "rb", &:readlines)
@src ||= load_source
end
alias source src

Expand Down Expand Up @@ -175,6 +175,32 @@ def build_no_cov_chunks
end
end

def load_source
lines = []
# The default encoding is UTF-8
File.open(filename, "rb:UTF-8") do |file|
line = file.gets

# Check for shbang
if /\A#!/.match?(line)
lines << line
line = file.gets
end
return lines unless line

check_magic_comment(file, line)
lines.concat([line], file.readlines)
end
end

def check_magic_comment(file, line)
# Check for encoding magic comment
# Encoding magic comment must be placed at first line except for shbang
if (match = /\A#\s*(?:-\*-)?\s*(?:en)?coding:\s*(\S+)\s*(?:-\*-)?\s*\z/.match(line))
file.set_encoding(match[1], "UTF-8")
end
end

def build_lines
coverage_exceeding_source_warn if coverage_data["lines"].size > src.size
lines = src.map.with_index(1) do |src, i|
Expand Down
2 changes: 2 additions & 0 deletions spec/fixtures/euc-jp.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# encoding: EUC-JP
puts "135¡ëC"
2 changes: 2 additions & 0 deletions spec/fixtures/utf-8-magic.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# encoding: UTF-8
puts "135°C"
38 changes: 38 additions & 0 deletions spec/source_file_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -695,4 +695,42 @@
end
end
end

context "a file contains non-ASCII characters" do
COVERAGE_FOR_SINGLE_LINE = {"lines" => [nil]}.freeze
COVERAGE_FOR_DOUBLE_LINES = {"lines" => [nil]}.freeze

shared_examples_for "converting to UTF-8" do
it "has all source lines of encoding UTF-8" do
subject.lines.each do |line|
expect(line.source.encoding).to eq(Encoding::UTF_8)
expect(line.source).to be_valid_encoding
end
end
end

describe "UTF-8 without magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("utf-8.rb"), COVERAGE_FOR_SINGLE_LINE)
end

it_behaves_like "converting to UTF-8"
end

describe "UTF-8 with magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("utf-8-magic.rb"), COVERAGE_FOR_DOUBLE_LINES)
end

it_behaves_like "converting to UTF-8"
end

describe "EUC-JP with magic comment" do
subject do
SimpleCov::SourceFile.new(source_fixture("euc-jp.rb"), COVERAGE_FOR_DOUBLE_LINES)
end

it_behaves_like "converting to UTF-8"
end
end
end

0 comments on commit 9f55273

Please sign in to comment.