diff --git a/.gitignore b/.gitignore
index af4be75..a699f2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 /MANIFEST
 __pycache__/
 *.pyc
+test.cram.err
diff --git a/scspell/__init__.py b/scspell/__init__.py
index 4300f4e..2c9c951 100644
--- a/scspell/__init__.py
+++ b/scspell/__init__.py
@@ -25,10 +25,11 @@
 
 import argparse
 import os
-import re
+import regex
 import sys
 import shutil
 import uuid
+import unicodedata
 
 try:
     import ConfigParser
@@ -78,22 +79,22 @@
 # Treat anything alphanumeric as a token of interest, as long as it is not
 # immediately preceded by a single backslash.  (The string "\ntext" should
 # match on "text" rather than "ntext".)
-C_ESCAPE_TOKEN_REGEX = re.compile(r'(?<![^\\]\\)\w+')
+C_ESCAPE_TOKEN_REGEX = regex.compile(r'(?<![^\\]\\)\w+')
 
 # \ is not a character escape in e.g. LaTeX
-TOKEN_REGEX = re.compile(r'\w+')
+TOKEN_REGEX = regex.compile(r'\w+')
 
 # Hex digits will be treated as a special case, because they can look like
 # word-like even though they are actually numeric
-HEX_REGEX = re.compile(r'0x[0-9a-fA-F]+')
+HEX_REGEX = regex.compile(r'0x[0-9a-fA-F]+')
 
 # We assume that tokens will be split using either underscores,
 # digits, or camelCase conventions (or both)
-US_REGEX = re.compile(r'[_\d]+')
-CAMEL_WORD_REGEX = re.compile(r'([A-Z][a-z]*)')
+US_REGEX = regex.compile(r'[_\d]+')
+CAMEL_WORD_REGEX = regex.compile(r'([[:upper:]][[:lower:]]*)')
 
 # File-id specifiers take this form
-FILE_ID_REGEX = re.compile(r'scspell-id:[ \t]*([a-zA-Z0-9_\-]+)')
+FILE_ID_REGEX = regex.compile(r'scspell-id:[ \t]*([a-zA-Z0-9_\-]+)')
 
 
 class MatchDescriptor(object):
@@ -104,7 +105,7 @@ class MatchDescriptor(object):
     def __init__(self, text, match_obj):
         self._data = text
         self._pos = match_obj.start()
-        self._token = match_obj.group()
+        self._token = unicodedata.normalize('NFC', match_obj.group())
         self._context = None
         self._line_num = None
 
@@ -384,7 +385,7 @@ def handle_failed_check_interactively(
     print("%s:%u: Unmatched '%s' --> {%s}" %
           (filename, match_desc.get_line_num(), token,
            ', '.join([st for st in unmatched_subtokens])))
-    MATCH_REGEX = re.compile(re.escape(match_desc.get_token()))
+    MATCH_REGEX = regex.compile(regex.escape(match_desc.get_token()))
     while True:
         print("""\
    (i)gnore, (I)gnore all, (r)eplace, (R)eplace all, (a)dd to dictionary, or
@@ -405,7 +406,7 @@ def handle_failed_check_interactively(
       (Canceled.)\n""")
             else:
                 ignores.add(replacement.lower())
-                tail = re.sub(
+                tail = regex.sub(
                     MATCH_REGEX, replacement, match_desc.get_remainder(),
                     1 if ch == 'r' else 0)
                 print()
@@ -771,7 +772,7 @@ def add_to_dict(dictionary_type, word, files=[],
             dicts.add_by_file_id(word, file_id)
 
         elif dictionary_type[0] == 'p':
-            ext = re.sub(r'.*\.', '.', '.{}'.format(files[0].lower()))
+            ext = regex.sub(r'.*\.', '.', '.{}'.format(files[0].lower()))
             if not dicts.add_by_extension(word, ext):
                 print("Dictionary for file extension '{}' not found."
                       .format(ext), file=sys.stderr)
diff --git a/scspell/_corpus.py b/scspell/_corpus.py
index 6a3e4d8..1c9946c 100644
--- a/scspell/_corpus.py
+++ b/scspell/_corpus.py
@@ -29,7 +29,7 @@
 import io
 import json
 import os
-import re
+import regex
 import sys
 from bisect import bisect_left
 from . import _util
@@ -41,7 +41,7 @@
 
 
 # Valid file ID strings take this form
-FILE_ID_REGEX = re.compile(r'[a-zA-Z0-9_\-]+')
+FILE_ID_REGEX = regex.compile(r'[a-zA-Z0-9_\-]+')
 
 
 MATCH_NATURAL = 0x1
diff --git a/setup.py b/setup.py
index e8b77fa..7a8c474 100755
--- a/setup.py
+++ b/setup.py
@@ -43,5 +43,6 @@ def get_version():
         'Topic :: Software Development',
         'Topic :: Text Processing :: Linguistic',
         'Topic :: Utilities'],
-    platforms=['any']
+    platforms=['any'],
+    install_requires=['regex']
 )
diff --git a/test.cram b/test.cram
index 854fe3a..90ff02a 100755
--- a/test.cram
+++ b/test.cram
@@ -16,7 +16,6 @@ Test okay file.
     $ echo 'This is okay.' > good.txt
     $ $SCSPELL good.txt
 
-
 Test file with --override-dictionary and a fileid mapping entry
 
     $ cp -a "$TESTDIR/tests" .
@@ -27,6 +26,14 @@ Test file with --override-dictionary and a fileid mapping entry
     tests/fileidmap/inputfile2.txt:4: 'soem' not found in dictionary (from token 'soem')
     [1]
 
+Test spelling mistake with diacritics.
+
+    $ $SCSPELL 'tests/basedicts/unicode-testfile'
+    tests/basedicts/unicode-testfile:1: 'b\xc3\xa4dly' not found in dictionary (from token 'B\xc3\xa4dly') (esc)
+    tests/basedicts/unicode-testfile:1: '\xc3\xa1lmost' not found in dictionary (from token '\xc3\x81lmost') (esc)
+    tests/basedicts/unicode-testfile:1: '\xc3\xa7\xc3\xa5m\xc3\xa9l', '\xc3\xa7\xc3\xa4se' were not found in the dictionary (from token '\xc3\x87\xc3\xa5m\xc3\xa9l\xc3\x87\xc3\xa4se') (esc)
+    [1]
+
 Test file ID manipulations
 
     $ $SCSPELL --override-dictionary tests/fileidmap/dictionary \
diff --git a/tests/basedicts/unicode-testfile b/tests/basedicts/unicode-testfile
new file mode 100644
index 0000000..30e7b38
--- /dev/null
+++ b/tests/basedicts/unicode-testfile
@@ -0,0 +1 @@
+Bädly Álmost ÇåmélÇäse