From 3525e4a4cd3ec93a9e14685c9354b0a643e8db38 Mon Sep 17 00:00:00 2001
From: Manu <manu@snapdragon.cc>
Date: Wed, 21 Aug 2019 21:13:01 +0800
Subject: [PATCH] Format with Black.

---
 src/invoice2data/extract/invoice_template.py | 120 ++++++++++---------
 src/invoice2data/extract/loader.py           |  26 ++--
 src/invoice2data/extract/plugins/lines.py    |  64 +++++-----
 src/invoice2data/extract/plugins/tables.py   |  30 ++---
 src/invoice2data/input/gvision.py            |  24 ++--
 src/invoice2data/input/pdfminer_wrapper.py   |   8 +-
 src/invoice2data/input/pdftotext.py          |   4 +-
 src/invoice2data/input/tesseract.py          |  22 +++-
 src/invoice2data/input/tesseract4.py         |  64 +++++-----
 src/invoice2data/main.py                     | 105 ++++++++--------
 src/invoice2data/output/to_csv.py            |  10 +-
 src/invoice2data/output/to_json.py           |  15 ++-
 src/invoice2data/output/to_xml.py            |  30 ++---
 13 files changed, 286 insertions(+), 236 deletions(-)

diff --git a/src/invoice2data/extract/invoice_template.py b/src/invoice2data/extract/invoice_template.py
index b12c2cc2..8ea203dd 100644
--- a/src/invoice2data/extract/invoice_template.py
+++ b/src/invoice2data/extract/invoice_template.py
@@ -14,17 +14,17 @@
 logger = logging.getLogger(__name__)
 
 OPTIONS_DEFAULT = {
-    'remove_whitespace': False,
-    'remove_accents': False,
-    'lowercase': False,
-    'currency': 'EUR',
-    'date_formats': [],
-    'languages': [],
-    'decimal_separator': '.',
-    'replace': [],  # example: see templates/fr/fr.free.mobile.yml
+    "remove_whitespace": False,
+    "remove_accents": False,
+    "lowercase": False,
+    "currency": "EUR",
+    "date_formats": [],
+    "languages": [],
+    "decimal_separator": ".",
+    "replace": [],  # example: see templates/fr/fr.free.mobile.yml
 }
 
-PLUGIN_MAPPING = {'lines': lines, 'tables': tables}
+PLUGIN_MAPPING = {"lines": lines, "tables": tables}
 
 
 class InvoiceTemplate(OrderedDict):
@@ -53,15 +53,15 @@ def __init__(self, *args, **kwargs):
         # Merge template-specific options with defaults
         self.options = OPTIONS_DEFAULT.copy()
 
-        for lang in self.options['languages']:
-            assert len(lang) == 2, 'lang code must have 2 letters'
+        for lang in self.options["languages"]:
+            assert len(lang) == 2, "lang code must have 2 letters"
 
-        if 'options' in self:
-            self.options.update(self['options'])
+        if "options" in self:
+            self.options.update(self["options"])
 
         # Set issuer, if it doesn't exist.
-        if 'issuer' not in self.keys():
-            self['issuer'] = self['keywords'][0]
+        if "issuer" not in self.keys():
+            self["issuer"] = self["keywords"][0]
 
     def prepare_input(self, extracted_str):
         """
@@ -69,22 +69,22 @@ def prepare_input(self, extracted_str):
         """
 
         # Remove withspace
-        if self.options['remove_whitespace']:
-            optimized_str = re.sub(' +', '', extracted_str)
+        if self.options["remove_whitespace"]:
+            optimized_str = re.sub(" +", "", extracted_str)
         else:
             optimized_str = extracted_str
 
         # Remove accents
-        if self.options['remove_accents']:
+        if self.options["remove_accents"]:
             optimized_str = unidecode(optimized_str)
 
         # convert to lower case
-        if self.options['lowercase']:
+        if self.options["lowercase"]:
             optimized_str = optimized_str.lower()
 
         # specific replace
-        for replace in self.options['replace']:
-            assert len(replace) == 2, 'A replace should be a list of 2 items'
+        for replace in self.options["replace"]:
+            assert len(replace) == 2, "A replace should be a list of 2 items"
             optimized_str = optimized_str.replace(replace[0], replace[1])
 
         return optimized_str
@@ -92,72 +92,76 @@ def prepare_input(self, extracted_str):
     def matches_input(self, optimized_str):
         """See if string matches keywords set in template file"""
 
-        if all([keyword in optimized_str for keyword in self['keywords']]):
-            logger.debug('Matched template %s', self['template_name'])
+        if all([keyword in optimized_str for keyword in self["keywords"]]):
+            logger.debug("Matched template %s", self["template_name"])
             return True
 
     def parse_number(self, value):
         assert (
-            value.count(self.options['decimal_separator']) < 2
-        ), 'Decimal separator cannot be present several times'
+            value.count(self.options["decimal_separator"]) < 2
+        ), "Decimal separator cannot be present several times"
         # replace decimal separator by a |
-        amount_pipe = value.replace(self.options['decimal_separator'], '|')
+        amount_pipe = value.replace(self.options["decimal_separator"], "|")
         # remove all possible thousands separators
-        amount_pipe_no_thousand_sep = re.sub(r'[.,\s]', '', amount_pipe)
+        amount_pipe_no_thousand_sep = re.sub(r"[.,\s]", "", amount_pipe)
         # put dot as decimal sep
-        return float(amount_pipe_no_thousand_sep.replace('|', '.'))
+        return float(amount_pipe_no_thousand_sep.replace("|", "."))
 
     def parse_date(self, value):
         """Parses date and returns date after parsing"""
         res = dateparser.parse(
-            value, date_formats=self.options['date_formats'], languages=self.options['languages']
+            value,
+            date_formats=self.options["date_formats"],
+            languages=self.options["languages"],
         )
         logger.debug("result of date parsing=%s", res)
         return res
 
     def coerce_type(self, value, target_type):
-        if target_type == 'int':
+        if target_type == "int":
             if not value.strip():
                 return 0
             return int(self.parse_number(value))
-        elif target_type == 'float':
+        elif target_type == "float":
             if not value.strip():
                 return 0.0
             return float(self.parse_number(value))
-        elif target_type == 'date':
+        elif target_type == "date":
             return self.parse_date(value)
-        assert False, 'Unknown type'
+        assert False, "Unknown type"
 
     def extract(self, optimized_str):
         """
         Given a template file and a string, extract matching data fields.
         """
 
-        logger.debug('START optimized_str ========================')
+        logger.debug("START optimized_str ========================")
         logger.debug(optimized_str)
-        logger.debug('END optimized_str ==========================')
+        logger.debug("END optimized_str ==========================")
         logger.debug(
-            'Date parsing: languages=%s date_formats=%s',
-            self.options['languages'],
-            self.options['date_formats'],
+            "Date parsing: languages=%s date_formats=%s",
+            self.options["languages"],
+            self.options["date_formats"],
         )
-        logger.debug('Float parsing: decimal separator=%s', self.options['decimal_separator'])
-        logger.debug("keywords=%s", self['keywords'])
+        logger.debug(
+            "Float parsing: decimal separator=%s", self.options["decimal_separator"]
+        )
+        logger.debug("keywords=%s", self["keywords"])
         logger.debug(self.options)
 
         # Try to find data for each field.
         output = {}
-        output['issuer'] = self['issuer']
+        output["issuer"] = self["issuer"]
 
-        for k, v in self['fields'].items():
-            if k.startswith('static_'):
+        for k, v in self["fields"].items():
+            if k.startswith("static_"):
                 logger.debug("field=%s | static value=%s", k, v)
-                output[k.replace('static_', '')] = v
+                output[k.replace("static_", "")] = v
             else:
                 logger.debug("field=%s | regexp=%s", k, v)
 
                 sum_field = False
-                if k.startswith('sum_amount') and type(v) is list:
+                if k.startswith("sum_amount") and type(v) is list:
                     k = k[4:]  # remove 'sum_' prefix
                     sum_field = True
                 # Fields can have multiple expressions
@@ -174,12 +178,14 @@ def extract(self, optimized_str):
                     res_find = re.findall(v, optimized_str)
                 if res_find:
                     logger.debug("res_find=%s", res_find)
-                    if k.startswith('date') or k.endswith('date'):
+                    if k.startswith("date") or k.endswith("date"):
                         output[k] = self.parse_date(res_find[0])
                         if not output[k]:
-                            logger.error("Date parsing failed on date '%s'", res_find[0])
+                            logger.error(
+                                "Date parsing failed on date '%s'", res_find[0]
+                            )
                             return None
-                    elif k.startswith('amount'):
+                    elif k.startswith("amount"):
                         if sum_field:
                             output[k] = 0
                             for amount_to_parse in res_find:
@@ -195,7 +201,7 @@ def extract(self, optimized_str):
                 else:
                     logger.warning("regexp for field %s didn't match", k)
 
-        output['currency'] = self.options['currency']
+        output["currency"] = self.options["currency"]
 
         # Run plugins:
         for plugin_keyword, plugin_func in PLUGIN_MAPPING.items():
@@ -203,22 +209,24 @@ def extract(self, optimized_str):
                 plugin_func.extract(self, optimized_str, output)
 
         # If required fields were found, return output, else log error.
-        if 'required_fields' not in self.keys():
-            required_fields = ['date', 'amount', 'invoice_number', 'issuer']
+        if "required_fields" not in self.keys():
+            required_fields = ["date", "amount", "invoice_number", "issuer"]
         else:
             required_fields = []
-            for v in self['required_fields']:
+            for v in self["required_fields"]:
                 required_fields.append(v)
 
         if set(required_fields).issubset(output.keys()):
-            output['desc'] = 'Invoice from %s' % (self['issuer'])
+            output["desc"] = "Invoice from %s" % (self["issuer"])
             logger.debug(output)
             return output
         else:
             fields = list(set(output.keys()))
             logger.error(
-                'Unable to match all required fields. '
-                'The required fields are: {0}. '
-                'Output contains the following fields: {1}.'.format(required_fields, fields)
+                "Unable to match all required fields. "
+                "The required fields are: {0}. "
+                "Output contains the following fields: {1}.".format(
+                    required_fields, fields
+                )
             )
             return None
diff --git a/src/invoice2data/extract/loader.py b/src/invoice2data/extract/loader.py
index 4ae31f3c..48de256a 100644
--- a/src/invoice2data/extract/loader.py
+++ b/src/invoice2data/extract/loader.py
@@ -13,7 +13,7 @@
 import codecs
 import chardet
 
-logging.getLogger('chardet').setLevel(logging.WARNING)
+logging.getLogger("chardet").setLevel(logging.WARNING)
 
 
 # borrowed from http://stackoverflow.com/a/21912744
@@ -31,7 +31,9 @@ def construct_mapping(loader, node):
         loader.flatten_mapping(node)
         return object_pairs_hook(loader.construct_pairs(node))
 
-    OrderedLoader.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping)
+    OrderedLoader.add_constructor(
+        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, construct_mapping
+    )
 
     return yaml.load(stream, OrderedLoader)
 
@@ -77,23 +79,25 @@ def read_templates(folder=None):
     output = []
 
     if folder is None:
-        folder = pkg_resources.resource_filename(__name__, 'templates')
+        folder = pkg_resources.resource_filename(__name__, "templates")
 
     for path, subdirs, files in os.walk(folder):
         for name in sorted(files):
-            if name.endswith('.yml'):
-                with open(os.path.join(path, name), 'rb') as f:
-                    encoding = chardet.detect(f.read())['encoding']
-                with codecs.open(os.path.join(path, name), encoding=encoding) as template_file:
+            if name.endswith(".yml"):
+                with open(os.path.join(path, name), "rb") as f:
+                    encoding = chardet.detect(f.read())["encoding"]
+                with codecs.open(
+                    os.path.join(path, name), encoding=encoding
+                ) as template_file:
                     tpl = ordered_load(template_file.read())
-                tpl['template_name'] = name
+                tpl["template_name"] = name
 
                 # Test if all required fields are in template:
-                assert 'keywords' in tpl.keys(), 'Missing keywords field.'
+                assert "keywords" in tpl.keys(), "Missing keywords field."
 
                 # Keywords as list, if only one.
-                if type(tpl['keywords']) is not list:
-                    tpl['keywords'] = [tpl['keywords']]
+                if type(tpl["keywords"]) is not list:
+                    tpl["keywords"] = [tpl["keywords"]]
 
                 output.append(InvoiceTemplate(tpl))
     return output
diff --git a/src/invoice2data/extract/plugins/lines.py b/src/invoice2data/extract/plugins/lines.py
index 34727dcf..171e893a 100644
--- a/src/invoice2data/extract/plugins/lines.py
+++ b/src/invoice2data/extract/plugins/lines.py
@@ -9,7 +9,7 @@
 
 logger = logging.getLogger(__name__)
 
-DEFAULT_OPTIONS = {'field_separator': r'\s+', 'line_separator': r'\n'}
+DEFAULT_OPTIONS = {"field_separator": r"\s+", "line_separator": r"\n"}
 
 
 def extract(self, content, output):
@@ -17,73 +17,73 @@ def extract(self, content, output):
 
     # First apply default options.
     plugin_settings = DEFAULT_OPTIONS.copy()
-    plugin_settings.update(self['lines'])
-    self['lines'] = plugin_settings
+    plugin_settings.update(self["lines"])
+    self["lines"] = plugin_settings
 
     # Validate settings
-    assert 'start' in self['lines'], 'Lines start regex missing'
-    assert 'end' in self['lines'], 'Lines end regex missing'
-    assert 'line' in self['lines'], 'Line regex missing'
+    assert "start" in self["lines"], "Lines start regex missing"
+    assert "end" in self["lines"], "Lines end regex missing"
+    assert "line" in self["lines"], "Line regex missing"
 
-    start = re.search(self['lines']['start'], content)
-    end = re.search(self['lines']['end'], content)
+    start = re.search(self["lines"]["start"], content)
+    end = re.search(self["lines"]["end"], content)
     if not start or not end:
-        logger.warning('no lines found - start %s, end %s', start, end)
+        logger.warning("no lines found - start %s, end %s", start, end)
         return
-    content = content[start.end(): end.start()]
+    content = content[start.end() : end.start()]
     lines = []
     current_row = {}
-    if 'first_line' not in self['lines'] and 'last_line' not in self['lines']:
-        self['lines']['first_line'] = self['lines']['line']
-    for line in re.split(self['lines']['line_separator'], content):
+    if "first_line" not in self["lines"] and "last_line" not in self["lines"]:
+        self["lines"]["first_line"] = self["lines"]["line"]
+    for line in re.split(self["lines"]["line_separator"], content):
         # if the line has empty lines in it , skip them
-        if not line.strip('').strip('\n') or not line:
+        if not line.strip("").strip("\n") or not line:
             continue
-        if 'first_line' in self['lines']:
-            match = re.search(self['lines']['first_line'], line)
+        if "first_line" in self["lines"]:
+            match = re.search(self["lines"]["first_line"], line)
             if match:
-                if 'last_line' not in self['lines']:
+                if "last_line" not in self["lines"]:
                     if current_row:
                         lines.append(current_row)
                     current_row = {}
                 if current_row:
                     lines.append(current_row)
                 current_row = {
-                    field: value.strip() if value else ''
+                    field: value.strip() if value else ""
                     for field, value in match.groupdict().items()
                 }
                 continue
-        if 'last_line' in self['lines']:
-            match = re.search(self['lines']['last_line'], line)
+        if "last_line" in self["lines"]:
+            match = re.search(self["lines"]["last_line"], line)
             if match:
                 for field, value in match.groupdict().items():
-                    current_row[field] = '%s%s%s' % (
-                        current_row.get(field, ''),
-                        current_row.get(field, '') and '\n' or '',
-                        value.strip() if value else '',
+                    current_row[field] = "%s%s%s" % (
+                        current_row.get(field, ""),
+                        current_row.get(field, "") and "\n" or "",
+                        value.strip() if value else "",
                     )
                 if current_row:
                     lines.append(current_row)
                 current_row = {}
                 continue
-        match = re.search(self['lines']['line'], line)
+        match = re.search(self["lines"]["line"], line)
         if match:
             for field, value in match.groupdict().items():
-                current_row[field] = '%s%s%s' % (
-                    current_row.get(field, ''),
-                    current_row.get(field, '') and '\n' or '',
-                    value.strip() if value else '',
+                current_row[field] = "%s%s%s" % (
+                    current_row.get(field, ""),
+                    current_row.get(field, "") and "\n" or "",
+                    value.strip() if value else "",
                 )
             continue
-        logger.debug('ignoring *%s* because it doesn\'t match anything', line)
+        logger.debug("ignoring *%s* because it doesn't match anything", line)
     if current_row:
         lines.append(current_row)
 
-    types = self['lines'].get('types', [])
+    types = self["lines"].get("types", [])
     for row in lines:
         for name in row.keys():
             if name in types:
                 row[name] = self.coerce_type(row[name], types[name])
 
     if lines:
-        output['lines'] = lines
+        output["lines"] = lines
diff --git a/src/invoice2data/extract/plugins/tables.py b/src/invoice2data/extract/plugins/tables.py
index 1a537a6e..ea979135 100644
--- a/src/invoice2data/extract/plugins/tables.py
+++ b/src/invoice2data/extract/plugins/tables.py
@@ -7,13 +7,13 @@
 
 logger = logging.getLogger(__name__)
 
-DEFAULT_OPTIONS = {'field_separator': r'\s+', 'line_separator': r'\n'}
+DEFAULT_OPTIONS = {"field_separator": r"\s+", "line_separator": r"\n"}
 
 
 def extract(self, content, output):
     """Try to extract tables from an invoice"""
 
-    for table in self['tables']:
+    for table in self["tables"]:
 
         # First apply default options.
         plugin_settings = DEFAULT_OPTIONS.copy()
@@ -21,38 +21,38 @@ def extract(self, content, output):
         table = plugin_settings
 
         # Validate settings
-        assert 'start' in table, 'Table start regex missing'
-        assert 'end' in table, 'Table end regex missing'
-        assert 'body' in table, 'Table body regex missing'
+        assert "start" in table, "Table start regex missing"
+        assert "end" in table, "Table end regex missing"
+        assert "body" in table, "Table body regex missing"
 
-        start = re.search(table['start'], content)
-        end = re.search(table['end'], content)
+        start = re.search(table["start"], content)
+        end = re.search(table["end"], content)
 
         if not start or not end:
-            logger.warning('no table body found - start %s, end %s', start, end)
+            logger.warning("no table body found - start %s, end %s", start, end)
             continue
 
-        table_body = content[start.end(): end.start()]
+        table_body = content[start.end() : end.start()]
 
-        for line in re.split(table['line_separator'], table_body):
+        for line in re.split(table["line_separator"], table_body):
             # if the line has empty lines in it , skip them
-            if not line.strip('').strip('\n') or not line:
+            if not line.strip("").strip("\n") or not line:
                 continue
 
-            match = re.search(table['body'], line)
+            match = re.search(table["body"], line)
             if match:
                 for field, value in match.groupdict().items():
                     # If a field name already exists, do not overwrite it
                     if field in output:
                         continue
 
-                    if field.startswith('date') or field.endswith('date'):
+                    if field.startswith("date") or field.endswith("date"):
                         output[field] = self.parse_date(value)
                         if not output[field]:
                             logger.error("Date parsing failed on date '%s'", value)
                             return None
-                    elif field.startswith('amount'):
+                    elif field.startswith("amount"):
                         output[field] = self.parse_number(value)
                     else:
                         output[field] = value
-            logger.debug('ignoring *%s* because it doesn\'t match anything', line)
+            logger.debug("ignoring *%s* because it doesn't match anything", line)
diff --git a/src/invoice2data/input/gvision.py b/src/invoice2data/input/gvision.py
index cfb48b49..6e7ea63c 100644
--- a/src/invoice2data/input/gvision.py
+++ b/src/invoice2data/input/gvision.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
+def to_text(path, bucket_name="cloud-vision-84893", language="fr"):
     """Sends PDF files to Google Cloud Vision for OCR.
 
     Before using invoice2data, make sure you have the auth json path set as
@@ -26,13 +26,13 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
     from google.protobuf import json_format
 
     # Supported mime_types are: 'application/pdf' and 'image/tiff'
-    mime_type = 'application/pdf'
+    mime_type = "application/pdf"
 
     path_dir, filename = os.path.split(path)
-    result_blob_basename = filename.replace('.pdf', '').replace('.PDF', '')
-    result_blob_name = result_blob_basename + '/output-1-to-1.json'
-    result_blob_uri = 'gs://{}/{}/'.format(bucket_name, result_blob_basename)
-    input_blob_uri = 'gs://{}/{}'.format(bucket_name, filename)
+    result_blob_basename = filename.replace(".pdf", "").replace(".PDF", "")
+    result_blob_name = result_blob_basename + "/output-1-to-1.json"
+    result_blob_uri = "gs://{}/{}/".format(bucket_name, result_blob_basename)
+    input_blob_uri = "gs://{}/{}".format(bucket_name, filename)
 
     # Upload file to gcloud if it doesn't exist yet
     storage_client = storage.Client()
@@ -51,10 +51,14 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
 
         client = vision.ImageAnnotatorClient()
 
-        feature = vision.types.Feature(type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)
+        feature = vision.types.Feature(
+            type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION
+        )
 
         gcs_source = vision.types.GcsSource(uri=input_blob_uri)
-        input_config = vision.types.InputConfig(gcs_source=gcs_source, mime_type=mime_type)
+        input_config = vision.types.InputConfig(
+            gcs_source=gcs_source, mime_type=mime_type
+        )
 
         gcs_destination = vision.types.GcsDestination(uri=result_blob_uri)
         output_config = vision.types.OutputConfig(
@@ -67,7 +71,7 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
 
         operation = client.async_batch_annotate_files(requests=[async_request])
 
-        print('Waiting for the operation to finish.')
+        print("Waiting for the operation to finish.")
         operation.result(timeout=180)
 
     # Get result after OCR is completed
@@ -80,4 +84,4 @@ def to_text(path, bucket_name='cloud-vision-84893', language='fr'):
     first_page_response = response.responses[0]
     annotation = first_page_response.full_text_annotation
 
-    return annotation.text.encode('utf-8')
+    return annotation.text.encode("utf-8")
diff --git a/src/invoice2data/input/pdfminer_wrapper.py b/src/invoice2data/input/pdfminer_wrapper.py
index 6852ecd0..c0c14b71 100644
--- a/src/invoice2data/input/pdfminer_wrapper.py
+++ b/src/invoice2data/input/pdfminer_wrapper.py
@@ -20,7 +20,7 @@ def to_text(path):
         import sys
 
         reload(sys)  # noqa: F821
-        sys.setdefaultencoding('utf8')
+        sys.setdefaultencoding("utf8")
     except ImportError:
         from io import StringIO
 
@@ -31,11 +31,11 @@ def to_text(path):
 
     rsrcmgr = PDFResourceManager()
     retstr = StringIO()
-    codec = 'utf-8'
+    codec = "utf-8"
     laparams = LAParams()
     laparams.all_texts = True
     device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
-    with open(path, 'rb') as fp:
+    with open(path, "rb") as fp:
         interpreter = PDFPageInterpreter(rsrcmgr, device)
         password = ""
         maxpages = 0
@@ -54,4 +54,4 @@ def to_text(path):
     device.close()
     str = retstr.getvalue()
     retstr.close()
-    return str.encode('utf-8')
+    return str.encode("utf-8")
diff --git a/src/invoice2data/input/pdftotext.py b/src/invoice2data/input/pdftotext.py
index 6afb3039..4ba37992 100644
--- a/src/invoice2data/input/pdftotext.py
+++ b/src/invoice2data/input/pdftotext.py
@@ -22,10 +22,10 @@ def to_text(path):
 
     if spawn.find_executable("pdftotext"):  # shutil.which('pdftotext'):
         out, err = subprocess.Popen(
-            ["pdftotext", '-layout', '-enc', 'UTF-8', path, '-'], stdout=subprocess.PIPE
+            ["pdftotext", "-layout", "-enc", "UTF-8", path, "-"], stdout=subprocess.PIPE
         ).communicate()
         return out
     else:
         raise EnvironmentError(
-            'pdftotext not installed. Can be downloaded from https://poppler.freedesktop.org/'
+            "pdftotext not installed. Can be downloaded from https://poppler.freedesktop.org/"
         )
diff --git a/src/invoice2data/input/tesseract.py b/src/invoice2data/input/tesseract.py
index ab2bd7ea..0a7a3c31 100644
--- a/src/invoice2data/input/tesseract.py
+++ b/src/invoice2data/input/tesseract.py
@@ -19,16 +19,26 @@ def to_text(path):
     from distutils import spawn
 
     # Check for dependencies. Needs Tesseract and Imagemagick installed.
-    if not spawn.find_executable('tesseract'):
-        raise EnvironmentError('tesseract not installed.')
-    if not spawn.find_executable('convert'):
-        raise EnvironmentError('imagemagick not installed.')
+    if not spawn.find_executable("tesseract"):
+        raise EnvironmentError("tesseract not installed.")
+    if not spawn.find_executable("convert"):
+        raise EnvironmentError("imagemagick not installed.")
 
     # convert = "convert -density 350 %s -depth 8 tiff:-" % (path)
-    convert = ['convert', '-density', '350', path, '-depth', '8', '-alpha', 'off', 'png:-']
+    convert = [
+        "convert",
+        "-density",
+        "350",
+        path,
+        "-depth",
+        "8",
+        "-alpha",
+        "off",
+        "png:-",
+    ]
     p1 = subprocess.Popen(convert, stdout=subprocess.PIPE)
 
-    tess = ['tesseract', 'stdin', 'stdout']
+    tess = ["tesseract", "stdin", "stdout"]
     p2 = subprocess.Popen(tess, stdin=p1.stdout, stdout=subprocess.PIPE)
 
     out, err = p2.communicate()
diff --git a/src/invoice2data/input/tesseract4.py b/src/invoice2data/input/tesseract4.py
index b406a585..83b41570 100644
--- a/src/invoice2data/input/tesseract4.py
+++ b/src/invoice2data/input/tesseract4.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-def to_text(path, language='fra'):
+def to_text(path, language="fra"):
     """Wraps Tesseract 4 OCR with custom language model.
 
     Parameters
@@ -19,47 +19,57 @@ def to_text(path, language='fra'):
     import time
 
     # Check for dependencies. Needs Tesseract and Imagemagick installed.
-    if not spawn.find_executable('tesseract'):
-        raise EnvironmentError('tesseract not installed.')
-    if not spawn.find_executable('convert'):
-        raise EnvironmentError('imagemagick not installed.')
-    if not spawn.find_executable('gs'):
-        raise EnvironmentError('ghostscript not installed.')
+    if not spawn.find_executable("tesseract"):
+        raise EnvironmentError("tesseract not installed.")
+    if not spawn.find_executable("convert"):
+        raise EnvironmentError("imagemagick not installed.")
+    if not spawn.find_executable("gs"):
+        raise EnvironmentError("ghostscript not installed.")
 
-    with tempfile.NamedTemporaryFile(suffix='.tiff') as tf:
+    with tempfile.NamedTemporaryFile(suffix=".tiff") as tf:
         # Step 1: Convert to TIFF
         gs_cmd = [
-            'gs',
-            '-q',
-            '-dNOPAUSE',
-            '-r600x600',
-            '-sDEVICE=tiff24nc',
-            '-sOutputFile=' + tf.name,
+            "gs",
+            "-q",
+            "-dNOPAUSE",
+            "-r600x600",
+            "-sDEVICE=tiff24nc",
+            "-sOutputFile=" + tf.name,
             path,
-            '-c',
-            'quit',
+            "-c",
+            "quit",
         ]
         subprocess.Popen(gs_cmd)
         time.sleep(3)
 
         # Step 2: Enhance TIFF
         magick_cmd = [
-            'convert',
+            "convert",
             tf.name,
-            '-colorspace',
-            'gray',
-            '-type',
-            'grayscale',
-            '-contrast-stretch',
-            '0',
-            '-sharpen',
-            '0x1',
-            'tiff:-',
+            "-colorspace",
+            "gray",
+            "-type",
+            "grayscale",
+            "-contrast-stretch",
+            "0",
+            "-sharpen",
+            "0x1",
+            "tiff:-",
         ]
 
         p1 = subprocess.Popen(magick_cmd, stdout=subprocess.PIPE)
 
-        tess_cmd = ['tesseract', '-l', language, '--oem', '1', '--psm', '3', 'stdin', 'stdout']
+        tess_cmd = [
+            "tesseract",
+            "-l",
+            language,
+            "--oem",
+            "1",
+            "--psm",
+            "3",
+            "stdin",
+            "stdout",
+        ]
         p2 = subprocess.Popen(tess_cmd, stdin=p1.stdout, stdout=subprocess.PIPE)
 
         out, err = p2.communicate()
diff --git a/src/invoice2data/main.py b/src/invoice2data/main.py
index a06063e0..d4b1074d 100644
--- a/src/invoice2data/main.py
+++ b/src/invoice2data/main.py
@@ -23,14 +23,14 @@
 logger = logging.getLogger(__name__)
 
 input_mapping = {
-    'pdftotext': pdftotext,
-    'tesseract': tesseract,
-    'tesseract4': tesseract4,
-    'pdfminer': pdfminer_wrapper,
-    'gvision': gvision,
+    "pdftotext": pdftotext,
+    "tesseract": tesseract,
+    "tesseract4": tesseract4,
+    "pdfminer": pdfminer_wrapper,
+    "gvision": gvision,
 }
 
-output_mapping = {'csv': to_csv, 'json': to_json, 'xml': to_xml, 'none': None}
+output_mapping = {"csv": to_csv, "json": to_json, "xml": to_xml, "none": None}
 
 
 def extract_data(invoicefile, templates=None, input_module=pdftotext):
@@ -79,20 +79,20 @@ def extract_data(invoicefile, templates=None, input_module=pdftotext):
         templates = read_templates()
 
     # print(templates[0])
-    extracted_str = input_module.to_text(invoicefile).decode('utf-8')
+    extracted_str = input_module.to_text(invoicefile).decode("utf-8")
 
-    logger.debug('START pdftotext result ===========================')
+    logger.debug("START pdftotext result ===========================")
     logger.debug(extracted_str)
-    logger.debug('END pdftotext result =============================')
+    logger.debug("END pdftotext result =============================")
 
-    logger.debug('Testing {} template files'.format(len(templates)))
+    logger.debug("Testing {} template files".format(len(templates)))
     for t in templates:
         optimized_str = t.prepare_input(extracted_str)
 
         if t.matches_input(optimized_str):
             return t.extract(optimized_str)
 
-    logger.error('No template for %s', invoicefile)
+    logger.error("No template for %s", invoicefile)
     return False
 
 
@@ -100,75 +100,84 @@ def create_parser():
     """Returns argument parser """
 
     parser = argparse.ArgumentParser(
-        description='Extract structured data from PDF files and save to CSV or JSON.'
+        description="Extract structured data from PDF files and save to CSV or JSON."
     )
 
     parser.add_argument(
-        '--input-reader',
+        "--input-reader",
         choices=input_mapping.keys(),
-        default='pdftotext',
-        help='Choose text extraction function. Default: pdftotext',
+        default="pdftotext",
+        help="Choose text extraction function. Default: pdftotext",
     )
 
     parser.add_argument(
-        '--output-format',
+        "--output-format",
         choices=output_mapping.keys(),
-        default='none',
-        help='Choose output format. Default: none',
+        default="none",
+        help="Choose output format. Default: none",
     )
 
     parser.add_argument(
-        '--output-date-format',
-        dest='output_date_format',
+        "--output-date-format",
+        dest="output_date_format",
         default="%Y-%m-%d",
-        help='Choose output date format. Default: %%Y-%%m-%%d (ISO 8601 Date)',
+        help="Choose output date format. Default: %%Y-%%m-%%d (ISO 8601 Date)",
     )
 
     parser.add_argument(
-        '--output-name',
-        '-o',
-        dest='output_name',
-        default='invoices-output',
-        help='Custom name for output file. Extension is added based on chosen format.',
+        "--output-name",
+        "-o",
+        dest="output_name",
+        default="invoices-output",
+        help="Custom name for output file. Extension is added based on chosen format.",
     )
 
     parser.add_argument(
-        '--debug', dest='debug', action='store_true', help='Enable debug information.'
+        "--debug", dest="debug", action="store_true", help="Enable debug information."
     )
 
     parser.add_argument(
-        '--copy', '-c', dest='copy', help='Copy and rename processed PDFs to specified folder.'
+        "--copy",
+        "-c",
+        dest="copy",
+        help="Copy and rename processed PDFs to specified folder.",
     )
 
     parser.add_argument(
-        '--move', '-m', dest='move', help='Move and rename processed PDFs to specified folder.'
+        "--move",
+        "-m",
+        dest="move",
+        help="Move and rename processed PDFs to specified folder.",
     )
 
     parser.add_argument(
-        '--filename-format',
-        dest='filename',
+        "--filename-format",
+        dest="filename",
         default="{date} {invoice_number} {desc}.pdf",
-        help='Filename format to use when moving or copying processed PDFs.'
-             'Default: "{date} {invoice_number} {desc}.pdf"',
+        help="Filename format to use when moving or copying processed PDFs."
+        'Default: "{date} {invoice_number} {desc}.pdf"',
     )
 
     parser.add_argument(
-        '--template-folder',
-        '-t',
-        dest='template_folder',
-        help='Folder containing invoice templates in yml file. Always adds built-in templates.',
+        "--template-folder",
+        "-t",
+        dest="template_folder",
+        help="Folder containing invoice templates in yml file. Always adds built-in templates.",
     )
 
     parser.add_argument(
-        '--exclude-built-in-templates',
-        dest='exclude_built_in_templates',
+        "--exclude-built-in-templates",
+        dest="exclude_built_in_templates",
         default=False,
-        help='Ignore built-in templates.',
+        help="Ignore built-in templates.",
         action="store_true",
     )
 
     parser.add_argument(
-        'input_files', type=argparse.FileType('r'), nargs='+', help='File or directory to analyze.'
+        "input_files",
+        type=argparse.FileType("r"),
+        nargs="+",
+        help="File or directory to analyze.",
     )
 
     return parser
@@ -204,16 +213,16 @@ def main(args=None):
             output.append(res)
             if args.copy:
                 filename = args.filename.format(
-                    date=res['date'].strftime('%Y-%m-%d'),
-                    invoice_number=res['invoice_number'],
-                    desc=res['desc'],
+                    date=res["date"].strftime("%Y-%m-%d"),
+                    invoice_number=res["invoice_number"],
+                    desc=res["desc"],
                 )
                 shutil.copyfile(f.name, join(args.copy, filename))
             if args.move:
                 filename = args.filename.format(
-                    date=res['date'].strftime('%Y-%m-%d'),
-                    invoice_number=res['invoice_number'],
-                    desc=res['desc'],
+                    date=res["date"].strftime("%Y-%m-%d"),
+                    invoice_number=res["invoice_number"],
+                    desc=res["desc"],
                 )
                 shutil.move(f.name, join(args.move, filename))
         f.close()
@@ -222,5 +231,5 @@ def main(args=None):
         output_module.write_to_file(output, args.output_name, args.output_date_format)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/src/invoice2data/output/to_csv.py b/src/invoice2data/output/to_csv.py
index df37e1ec..68d13d21 100644
--- a/src/invoice2data/output/to_csv.py
+++ b/src/invoice2data/output/to_csv.py
@@ -27,18 +27,18 @@ def write_to_file(data, path, date_format="%Y-%m-%d"):
         >>> to_csv.write_to_file(data, "invoice.csv")
 
     """
-    if path.endswith('.csv'):
+    if path.endswith(".csv"):
         filename = path
     else:
-        filename = path + '.csv'
+        filename = path + ".csv"
 
     if sys.version_info[0] < 3:
         openfile = open(filename, "wb")
     else:
-        openfile = open(filename, "w", newline='')
+        openfile = open(filename, "w", newline="")
 
     with openfile as csv_file:
-        writer = csv.writer(csv_file, delimiter=',')
+        writer = csv.writer(csv_file, delimiter=",")
 
         for line in data:
             first_row = []
@@ -50,7 +50,7 @@ def write_to_file(data, path, date_format="%Y-%m-%d"):
             csv_items = []
             for k, v in line.items():
                 # first_row.append(k)
-                if k.startswith('date') or k.endswith('date'):
+                if k.startswith("date") or k.endswith("date"):
                     v = v.strftime(date_format)
                 csv_items.append(v)
             writer.writerow(csv_items)
diff --git a/src/invoice2data/output/to_json.py b/src/invoice2data/output/to_json.py
index 1e184d91..4c15aef5 100644
--- a/src/invoice2data/output/to_json.py
+++ b/src/invoice2data/output/to_json.py
@@ -34,18 +34,23 @@ def write_to_file(data, path, date_format="%Y-%m-%d"):
         >>> to_json.write_to_file(data, "invoice.json")
 
     """
-    if path.endswith('.json'):
+    if path.endswith(".json"):
         filename = path
     else:
-        filename = path + '.json'
+        filename = path + ".json"
 
-    with codecs.open(filename, "w", encoding='utf-8') as json_file:
+    with codecs.open(filename, "w", encoding="utf-8") as json_file:
         for line in data:
             for k, v in line.items():
-                if k.startswith('date') or k.endswith('date'):
+                if k.startswith("date") or k.endswith("date"):
                     line[k] = v.strftime(date_format)
         print(type(json))
         print(json)
         json.dump(
-            data, json_file, indent=4, sort_keys=True, default=myconverter, ensure_ascii=False
+            data,
+            json_file,
+            indent=4,
+            sort_keys=True,
+            default=myconverter,
+            ensure_ascii=False,
         )
diff --git a/src/invoice2data/output/to_xml.py b/src/invoice2data/output/to_xml.py
index 06dd00e2..30e4ebc4 100644
--- a/src/invoice2data/output/to_xml.py
+++ b/src/invoice2data/output/to_xml.py
@@ -4,12 +4,12 @@
 
 def prettify(elem):
     """Return a pretty-printed XML string for the Element."""
-    rough_string = ET.tostring(elem, 'utf-8')
+    rough_string = ET.tostring(elem, "utf-8")
     reparsed = minidom.parseString(rough_string)
     return reparsed.toprettyxml(indent="  ")
 
 
-def write_to_file(data, path, date_format='%Y-%m-%d'):
+def write_to_file(data, path, date_format="%Y-%m-%d"):
     """Export extracted fields to xml
 
     Appends .xml to path if missing and generates xml file in specified directory, if not then in root
@@ -36,26 +36,26 @@ def write_to_file(data, path, date_format='%Y-%m-%d'):
 
     """
 
-    if path.endswith('.xml'):
+    if path.endswith(".xml"):
         filename = path
     else:
-        filename = path + '.xml'
+        filename = path + ".xml"
 
-    tag_data = ET.Element('data')
+    tag_data = ET.Element("data")
     xml_file = open(filename, "w")
     i = 0
     for line in data:
         i += 1
-        tag_item = ET.SubElement(tag_data, 'item')
-        tag_date = ET.SubElement(tag_item, 'date')
-        tag_desc = ET.SubElement(tag_item, 'desc')
-        tag_currency = ET.SubElement(tag_item, 'currency')
-        tag_amount = ET.SubElement(tag_item, 'amount')
-        tag_item.set('id', str(i))
-        tag_date.text = line['date'].strftime(date_format)
-        tag_desc.text = line['desc']
-        tag_currency.text = line['currency']
-        tag_amount.text = str(line['amount'])
+        tag_item = ET.SubElement(tag_data, "item")
+        tag_date = ET.SubElement(tag_item, "date")
+        tag_desc = ET.SubElement(tag_item, "desc")
+        tag_currency = ET.SubElement(tag_item, "currency")
+        tag_amount = ET.SubElement(tag_item, "amount")
+        tag_item.set("id", str(i))
+        tag_date.text = line["date"].strftime(date_format)
+        tag_desc.text = line["desc"]
+        tag_currency.text = line["currency"]
+        tag_amount.text = str(line["amount"])
 
     xml_file.write(prettify(tag_data))
     xml_file.close()