Skip to content

Commit

Permalink
Split on all non-words
Browse files Browse the repository at this point in the history
Previously, there were some special cases (like "<"). This change takes
care of all non-words instead of just special cases. This resolves item
3 of issue lyda#16 in an alternate way.
  • Loading branch information
myint committed Mar 31, 2013
1 parent 5d8f144 commit d2bc0b2
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
2 changes: 1 addition & 1 deletion misspellings_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import string

_NORM_REGEX = re.compile('([a-z])([A-Z][a-z])')
_WORD_REGEX = re.compile('[\s_0-9<>/,\.]+')
_WORD_REGEX = re.compile('[\s_0-9\W]+', flags=re.UNICODE)


def normalize(word):
Expand Down
9 changes: 7 additions & 2 deletions tests/test_class.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# For Python 2.5
from __future__ import with_statement
Expand Down Expand Up @@ -135,8 +136,12 @@ def testSplitWordsWithCamelCase(self):
self.assertEqual(['one', 'Two', 'Three', 'four', 'five'],
misspellings.split_words('oneTwoThree_four five'))

def testNormalize(self):
self.assertEqual('alpha', misspellings.normalize('"alpha".'))
def testSplitWordsWithOtherCharacters(self):
self.assertEqual(['the', 'big', 'cat'],
misspellings.split_words('the%big$cat'))

def testNormalize(self):
self.assertEqual('alpha', misspellings.normalize('"alpha".'))


if __name__ == '__main__':
Expand Down

0 comments on commit d2bc0b2

Please sign in to comment.