diff --git a/CHANGELOG.md b/CHANGELOG.md index ea6dc4f7eb..463e890b79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### New features * [#173](https://github.com/rubocop-hq/rubocop-performance/pull/173): Add new `Performance/BlockGivenWithExplicitBlock` cop. ([@fatkodima][]) +* [#151](https://github.com/rubocop-hq/rubocop-performance/issues/151): Add new `Performance/ConstantRegexp` cop. ([@fatkodima][]) ### Changes diff --git a/config/default.yml b/config/default.yml index db14ebe4c0..a5eb034721 100644 --- a/config/default.yml +++ b/config/default.yml @@ -66,6 +66,11 @@ Performance/CompareWithBlock: Enabled: true VersionAdded: '0.46' +Performance/ConstantRegexp: + Description: 'Finds regular expressions with dynamic components that are all constants.' + Enabled: pending + VersionAdded: '1.9' + Performance/Count: Description: >- Use `count` instead of `{select,find_all,filter,reject}...{size,count,length}`. diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index 32046a5af3..04c50ffd46 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -22,6 +22,7 @@ Performance cops optimization analysis for your projects. * xref:cops_performance.adoc#performancechainarrayallocation[Performance/ChainArrayAllocation] * xref:cops_performance.adoc#performancecollectionliteralinloop[Performance/CollectionLiteralInLoop] * xref:cops_performance.adoc#performancecomparewithblock[Performance/CompareWithBlock] +* xref:cops_performance.adoc#performanceconstantregexp[Performance/ConstantRegexp] * xref:cops_performance.adoc#performancecount[Performance/Count] * xref:cops_performance.adoc#performancedeleteprefix[Performance/DeletePrefix] * xref:cops_performance.adoc#performancedeletesuffix[Performance/DeleteSuffix] diff --git a/docs/modules/ROOT/pages/cops_performance.adoc b/docs/modules/ROOT/pages/cops_performance.adoc index 3c25d9cf1b..1bf7b25b53 100644 --- a/docs/modules/ROOT/pages/cops_performance.adoc +++ b/docs/modules/ROOT/pages/cops_performance.adoc @@ -405,6 +405,46 @@ array.min_by(&:foo) array.sort_by { |a| a[:foo] } ---- +== Performance/ConstantRegexp + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Pending +| Yes +| Yes +| 1.9 +| - +|=== + +This cop finds regular expressions with dynamic components that are all constants. + +Ruby allocates a new Regexp object every time it executes a code containing such +a regular expression. It is more efficient to extract it into a constant +or add an `/o` option to perform `#{}` interpolation only once and reuse that +Regexp object. + +=== Examples + +[source,ruby] +---- +# bad +def tokens(pattern) + pattern.scan(TOKEN).reject { |token| token.match?(/\A#{SEPARATORS}\Z/) } +end + +# good +ALL_SEPARATORS = /\A#{SEPARATORS}\Z/ +def tokens(pattern) + pattern.scan(TOKEN).reject { |token| token.match?(ALL_SEPARATORS) } +end + +# good +def tokens(pattern) + pattern.scan(TOKEN).reject { |token| token.match?(/\A#{SEPARATORS}\Z/o) } +end +---- + == Performance/Count |=== diff --git a/lib/rubocop/cop/mixin/regexp_metacharacter.rb b/lib/rubocop/cop/mixin/regexp_metacharacter.rb index 9d3ab3cc43..db41b0af87 100644 --- a/lib/rubocop/cop/mixin/regexp_metacharacter.rb +++ b/lib/rubocop/cop/mixin/regexp_metacharacter.rb @@ -25,7 +25,7 @@ def literal_at_start_with_backslash_a?(regex_str) # (tricky: \s, \d, and so on are metacharacters, but other characters # escaped with a slash are just literals. LITERAL_REGEX takes all # that into account.) - /\A\\A(?:#{Util::LITERAL_REGEX})+\z/.match?(regex_str) + /\A\\A(?:#{Util::LITERAL_REGEX})+\z/o.match?(regex_str) end def literal_at_start_with_caret?(regex_str) @@ -35,21 +35,21 @@ def literal_at_start_with_caret?(regex_str) # (tricky: \s, \d, and so on are metacharacters, but other characters # escaped with a slash are just literals. LITERAL_REGEX takes all # that into account.) - /\A\^(?:#{Util::LITERAL_REGEX})+\z/.match?(regex_str) + /\A\^(?:#{Util::LITERAL_REGEX})+\z/o.match?(regex_str) end def literal_at_end_with_backslash_z?(regex_str) # is this regexp 'literal' in the sense of only matching literal # chars, rather than using metachars like . and * and so on? # also, is it anchored at the end of the string? - /\A(?:#{Util::LITERAL_REGEX})+\\z\z/.match?(regex_str) + /\A(?:#{Util::LITERAL_REGEX})+\\z\z/o.match?(regex_str) end def literal_at_end_with_dollar?(regex_str) # is this regexp 'literal' in the sense of only matching literal # chars, rather than using metachars like . and * and so on? # also, is it anchored at the end of the string? - /\A(?:#{Util::LITERAL_REGEX})+\$\z/.match?(regex_str) + /\A(?:#{Util::LITERAL_REGEX})+\$\z/o.match?(regex_str) end def drop_start_metacharacter(regexp_string) diff --git a/lib/rubocop/cop/performance/constant_regexp.rb b/lib/rubocop/cop/performance/constant_regexp.rb new file mode 100644 index 0000000000..09e5c0d81f --- /dev/null +++ b/lib/rubocop/cop/performance/constant_regexp.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Performance + # This cop finds regular expressions with dynamic components that are all constants. + # + # Ruby allocates a new Regexp object every time it executes a code containing such + # a regular expression. It is more efficient to extract it into a constant + # or add an `/o` option to perform `#{}` interpolation only once and reuse that + # Regexp object. + # + # @example + # + # # bad + # def tokens(pattern) + # pattern.scan(TOKEN).reject { |token| token.match?(/\A#{SEPARATORS}\Z/) } + # end + # + # # good + # ALL_SEPARATORS = /\A#{SEPARATORS}\Z/ + # def tokens(pattern) + # pattern.scan(TOKEN).reject { |token| token.match?(ALL_SEPARATORS) } + # end + # + # # good + # def tokens(pattern) + # pattern.scan(TOKEN).reject { |token| token.match?(/\A#{SEPARATORS}\Z/o) } + # end + # + class ConstantRegexp < Base + extend AutoCorrector + + MSG = 'Extract this regexp into a constant or append an `/o` option to its options.' + + def on_regexp(node) + return if within_const_assignment?(node) || + !include_interpolated_const?(node) || + node.single_interpolation? + + add_offense(node) do |corrector| + corrector.insert_after(node, 'o') + end + end + + private + + def within_const_assignment?(node) + node.each_ancestor(:casgn).any? + end + + def_node_matcher :regexp_escape?, <<~PATTERN + (send + (const nil? :Regexp) :escape const_type?) + PATTERN + + def include_interpolated_const?(node) + return false unless node.interpolation? + + node.each_child_node(:begin).all? do |begin_node| + inner_node = begin_node.children.first + inner_node && (inner_node.const_type? || regexp_escape?(inner_node)) + end + end + end + end + end +end diff --git a/lib/rubocop/cop/performance/squeeze.rb b/lib/rubocop/cop/performance/squeeze.rb index 2c07f184de..315e5e3a23 100644 --- a/lib/rubocop/cop/performance/squeeze.rb +++ b/lib/rubocop/cop/performance/squeeze.rb @@ -59,7 +59,7 @@ def on_send(node) private def repeating_literal?(regex_str) - regex_str.match?(/\A(?:#{Util::LITERAL_REGEX})\+\z/) + regex_str.match?(/\A(?:#{Util::LITERAL_REGEX})\+\z/o) end end end diff --git a/lib/rubocop/cop/performance/string_include.rb b/lib/rubocop/cop/performance/string_include.rb index c7fc6cd1b2..c549300aff 100644 --- a/lib/rubocop/cop/performance/string_include.rb +++ b/lib/rubocop/cop/performance/string_include.rb @@ -48,7 +48,7 @@ def on_send(node) private def literal?(regex_str) - regex_str.match?(/\A#{Util::LITERAL_REGEX}+\z/) + regex_str.match?(/\A#{Util::LITERAL_REGEX}+\z/o) end end end diff --git a/lib/rubocop/cop/performance_cops.rb b/lib/rubocop/cop/performance_cops.rb index e1e55c7d64..09ed676a69 100644 --- a/lib/rubocop/cop/performance_cops.rb +++ b/lib/rubocop/cop/performance_cops.rb @@ -12,6 +12,7 @@ require_relative 'performance/casecmp' require_relative 'performance/collection_literal_in_loop' require_relative 'performance/compare_with_block' +require_relative 'performance/constant_regexp' require_relative 'performance/count' require_relative 'performance/delete_prefix' require_relative 'performance/delete_suffix' diff --git a/spec/rubocop/cop/performance/constant_regexp_spec.rb b/spec/rubocop/cop/performance/constant_regexp_spec.rb new file mode 100644 index 0000000000..75314e65d1 --- /dev/null +++ b/spec/rubocop/cop/performance/constant_regexp_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Performance::ConstantRegexp do + subject(:cop) { described_class.new } + + it 'registers an offense and corrects when regexp contains interpolated constant' do + expect_offense(<<~RUBY) + str.match?(/\A\#{CONST}/) + ^^^^^^^^^^^ Extract this regexp into a constant or append an `/o` option to its options. + RUBY + + expect_correction(<<~RUBY) + str.match?(/\A\#{CONST}/o) + RUBY + end + + it 'registers an offense and corrects when regexp contains multiple interpolated constants' do + expect_offense(<<~RUBY) + str.match?(/\A\#{CONST1}something\#{CONST2}\z/) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Extract this regexp into a constant or append an `/o` option to its options. + RUBY + + expect_correction(<<~RUBY) + str.match?(/\A\#{CONST1}something\#{CONST2}\z/o) + RUBY + end + + it 'registers an offense and corrects when regexp contains `Regexp.escape` on constant' do + expect_offense(<<~RUBY) + str.match?(/\A\#{CONST1}something\#{Regexp.escape(CONST2)}\z/) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Extract this regexp into a constant or append an `/o` option to its options. + RUBY + + expect_correction(<<~RUBY) + str.match?(/\A\#{CONST1}something\#{Regexp.escape(CONST2)}\z/o) + RUBY + end + + it 'does not register an offense when regexp does not contain interpolated constant' do + expect_no_offenses(<<~RUBY) + str.match?(/foo/) + RUBY + end + + it 'does not register an offense when regexp is within assignment to a constant' do + expect_no_offenses(<<~RUBY) + CONST = str.match?(/\#{ANOTHER_CONST}/) + RUBY + end + + it 'does not register an offense when regexp has `/o` option' do + expect_no_offenses(<<~RUBY) + str.match?(/\#{CONST}/o) + RUBY + end + + it 'does not register an offense when regexp contains interpolated constant and '\ + 'and interpolated non constant' do + expect_no_offenses(<<~RUBY) + str.match?(/\#{CONST}\#{do_something(1)}/) + RUBY + end +end