From d2bc0b2b9ede7b700acb93ec539b7f5fe19419fb Mon Sep 17 00:00:00 2001
From: Steven Myint <git@stevenmyint.com>
Date: Sun, 31 Mar 2013 07:27:31 -0700
Subject: [PATCH] Split on all non-words

Previously, there were some special cases (like "<"). This change takes
care of all non-words instead of just special cases. This resolves item
3 of issue #16 in an alternate way.
---
 misspellings_lib.py | 2 +-
 tests/test_class.py | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/misspellings_lib.py b/misspellings_lib.py
index 5fc2c79..5cf9311 100644
--- a/misspellings_lib.py
+++ b/misspellings_lib.py
@@ -15,7 +15,7 @@
 import string
 
 _NORM_REGEX = re.compile('([a-z])([A-Z][a-z])')
-_WORD_REGEX = re.compile('[\s_0-9<>/,\.]+')
+_WORD_REGEX = re.compile('[\s_0-9\W]+', flags=re.UNICODE)
 
 
 def normalize(word):
diff --git a/tests/test_class.py b/tests/test_class.py
index 918a2a0..ee3aeb4 100755
--- a/tests/test_class.py
+++ b/tests/test_class.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # For Python 2.5
 from __future__ import with_statement
@@ -135,8 +136,12 @@ def testSplitWordsWithCamelCase(self):
     self.assertEqual(['one', 'Two', 'Three', 'four', 'five'],
                      misspellings.split_words('oneTwoThree_four five'))
 
-    def testNormalize(self):
-      self.assertEqual('alpha', misspellings.normalize('"alpha".'))
+  def testSplitWordsWithOtherCharacters(self):
+    self.assertEqual(['the', 'big', 'cat'],
+                     misspellings.split_words('the%big$cat'))
+
+  def testNormalize(self):
+    self.assertEqual('alpha', misspellings.normalize('"alpha".'))
 
 
 if __name__ == '__main__':