Skip to content

Commit

Permalink
Change language detection to include hashtags as words (#11341)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gargron authored Jul 18, 2019
1 parent 3a6fe65 commit 5bfe1e1
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion app/lib/language_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def simplify_text(text)
new_text = remove_html(text)
new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
new_text.gsub!(Account::MENTION_RE, '')
new_text.gsub!(Tag::HASHTAG_RE, '')
new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase }
new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
new_text.gsub!(/\s+/, ' ')
new_text
Expand Down
6 changes: 3 additions & 3 deletions spec/lib/language_detector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@
expect(result).to eq 'Our website is and also'
end

it 'strips #hashtags from strings before detection' do
string = 'Hey look at all the #animals and #fish'
it 'converts #hashtags back to normal text before detection' do
string = 'Hey look at all the #animals and #FishAndChips'

result = described_class.instance.send(:prepare_text, string)
expect(result).to eq 'Hey look at all the and'
expect(result).to eq 'Hey look at all the animals and fish and chips'
end
end

Expand Down

0 comments on commit 5bfe1e1

Please sign in to comment.