From a3ee22327a2dec8c561e03910536013b47d0aca8 Mon Sep 17 00:00:00 2001
From: Ryan Liu <ryan@ryanliu6.xyz>
Date: Tue, 11 Jun 2024 21:02:09 -0700
Subject: [PATCH] Refactored code to properly work with Preferences

---
 Config.py    |  19 ++++------
 Ena.py       |   5 +--
 src/api.py   |  51 ++++++++++++++++----------
 src/model.py | 102 ++++++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 130 insertions(+), 47 deletions(-)

diff --git a/Config.py b/Config.py
index b954268..3007536 100755
--- a/Config.py
+++ b/Config.py
@@ -7,8 +7,7 @@
 from typing import Dict
 from configparser import ConfigParser
 
-from src.model import Orders, Config
-
+from src.model import Orders, Preferences
 
 ROOT_PATH = os.path.dirname(os.path.abspath(__name__))
 CONFIG_FILE = os.path.join(ROOT_PATH, "preferences.ini")
@@ -28,9 +27,8 @@ def get_preferences() -> Dict:
 
     csv_order = Orders[parser.get(CONFIG_SECTION, "csv_order")]
     use_ollama = parser.getboolean(CONFIG_SECTION, "use_ollama")
-    keep_payments = parser.getboolean(CONFIG_SECTION, "keep_payments")
-    negative_expenses = parser.getboolean(CONFIG_SECTION, "negative_expenses")
-    preferences = Config(csv_order, use_ollama, keep_payments, negative_expenses)
+    positive_expenses = parser.getboolean(CONFIG_SECTION, "positive_expenses")
+    preferences = Preferences(csv_order, use_ollama, positive_expenses)
 
     return preferences
 
@@ -40,11 +38,9 @@ def get_preferences() -> Dict:
               help="CSV Order. Defaults to the simple order, which is Date, Amount, and Note (Description)")
 @click.option("-u", "--use-ollama", is_flag=True, default=False,
               help="Use Ollama and local LLM to categorize expenses. NOTE: Experimental Feature. Defaults to False.")
-@click.option("-k", "--keep-payments", is_flag=True, default=False,
-              help="Keep payments to Credit Card Bill in transactions. Defaults to False.")
-@click.option("-n", "--negative_expenses", is_flag=True, default=False,
-              help="Have expenses represented as negative values and incomes represented as positive values. Defaults to False.")
-def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses: bool):
+@click.option("-n", "--positive_expenses", is_flag=True, default=False,
+              help="Have expenses represented as positive floats and incomes represented as negative floats. Defaults to False.")
+def cli(order: Orders, use_ollama: bool, positive_expenses: bool):
     """
     Writes an .ini file dictating Ena's behavioural preferences.
     """
@@ -54,8 +50,7 @@ def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses:
     config[CONFIG_SECTION] = {
         "csv_order": order,
         "use_ollama": bool_to_str(use_ollama),
-        "keep_payments": bool_to_str(keep_payments),
-        "negative_expenses": bool_to_str(negative_expenses),
+        "positive_expenses": bool_to_str(positive_expenses),
     }
 
     with open(CONFIG_FILE, "w+") as config_file:
diff --git a/Ena.py b/Ena.py
index e7c8c12..f110c60 100755
--- a/Ena.py
+++ b/Ena.py
@@ -5,12 +5,11 @@
 import logging
 
 from src.api import Ena
-from Config import STATEMENTS_PATH
+from Config import ROOT_PATH
 
-
-ROOT_PATH = os.path.dirname(os.path.abspath(__name__))
 STATEMENTS_PATH = os.path.join(ROOT_PATH, "statements")
 
+
 @click.command()
 @click.option("-d", "--directory", "statements_dir", type=click.Path(exists=True),
               default=STATEMENTS_PATH, help="Directory where statements are. Defaults to Ena/statements")
diff --git a/src/api.py b/src/api.py
index 203a12d..1cc8e5e 100644
--- a/src/api.py
+++ b/src/api.py
@@ -10,7 +10,7 @@
 from datetime import datetime
 from dataclasses import asdict
 from collections import defaultdict
-from src.model import Category, Transaction, FIFactory, CSV_ORDERS
+from src.model import Category, Orders, Transaction, FIFactory, CSV_ORDERS
 
 
 class Ena:
@@ -49,10 +49,14 @@ def parse_statements(self):
             csv_data.sort(key=lambda x: x.date)
             file_path = os.path.join(ROOT_PATH, "output", fi_name, f"{int(datetime.today().timestamp())}.csv")
             with open(file_path, "w+", newline="") as csv_file:
-                writer = csv.DictWriter(csv_file, CSV_ORDERS[self.preferences.csv_order])
+                csv_order = CSV_ORDERS[self.preferences.csv_order]
+                writer = csv.DictWriter(csv_file, csv_order)
                 writer.writeheader()
                 for txn in csv_data:
-                    writer.writerow(asdict(txn))
+                    if csv_order == Orders.SIMPLE:
+                        writer.writerow(txn.simple_repr())
+                    else:
+                        writer.writerow(asdict(txn))
 
     def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> List[Transaction]:
         """
@@ -63,12 +67,7 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
         1. Category is added via Ollama based on available categories and confidence %.
             a. This behaviour can be disabled.
 
-        2. Payments to Credit Cards (Ena's main use-case) will be removed from the list of
-            Transactions. Per my personal use-case, payments will always be equal to opening
-            balance.
-            a. This behaviour can be disabled.
-
-        3. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an
+        2. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an
             expense tracker for Credit Cards. In the rare case that a transaction is "negative",
             for income of some sort (Cashback rewards, refunds, etc), it'll be categorized under
             Category.INCOME with a negative value.
@@ -96,11 +95,15 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
             opening_balance = processor.get_opening_balance(text)
             closing_balance = processor.get_closing_balance(text)
 
+            print(text)
+
             # debugging transaction mapping - all 3 regex in transaction have to find a result in order for it to be considered a "match"
             year_end = False
             transaction_regex = processor.get_transaction_regex()
             for match in re.finditer(transaction_regex, text, re.MULTILINE):
                 match_dict = match.groupdict()
+                print(match_dict)
+
                 date = match_dict["dates"].replace("/", " ") # change format to standard: 03/13 -> 03 13
                 date = date.split(" ")[0:2]  # Aug. 10 Aug. 13 -> ["Aug.", "10"]
                 date[0] = date[0].strip(".") # Aug. -> Aug
@@ -119,28 +122,38 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
                 if month == "01" and year_end:
                     date = date.replace(year=date.year + 1)
 
-                if (match_dict["cr"]):
-                    logging.info(f"Credit balance found in transaction: {match_dict['amount']}")
-                    amount = -float("-" + match_dict["amount"].replace("$", "").replace(",", ""))
-                else:
-                    amount = -float(match_dict["amount"].replace("$", "").replace(",", ""))
+                amount = -float(match_dict["amount"].replace("$", "").replace(",", ""))
 
                 # checks description regex
                 if ("$" in match_dict["description"]):
                     logging.info(f"$ found in description: {match_dict['description']}")
-                    newAmount = re.search(r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?", match_dict["description"])
+                    newAmount = re.search(r"(?P<amount>-?\$[\d,]+\.\d{2}-?)", match_dict["description"])
                     amount = -float(newAmount["amount"].replace("$", "").replace(",", ""))
                     match_dict["description"] = match_dict["description"].split("$", 1)[0]
 
+                # Set amount based on preferences
+                if self.preferences.positive_expenses:
+                    amount *= -1
+
                 transaction = Transaction(date=str(date.date().isoformat()),
                                           amount=amount,
                                           note=match_dict["description"].strip())
 
-                if transaction in transactions:
-                    # Assumes all duplicate transactions are valid
-                    transaction.description = transaction.description + " 2"
+                # Check if transaction should be directly categorized as income transaction
+                if processor.is_transaction_income(transaction, self.preferences.positive_expenses):
+                    transaction.category = Category.INCOME
                 else:
-                    transactions.append(transaction)
+                    if self.preferences.use_ollama:
+                        # Get category via inference
+                        ...
+
+                """
+                Transactions is represented as a List instead of Set because duplicate transactions
+                where properties are the same (Transaction.__eq__) are valid.
+
+                It's entirely possible that you make the same purchase at the same spot regularly.
+                """
+                transactions.append(transaction)
 
         processor.validate(opening_balance, closing_balance, transactions)
         return transactions
diff --git a/src/model.py b/src/model.py
index d701f54..0fba64b 100644
--- a/src/model.py
+++ b/src/model.py
@@ -3,6 +3,7 @@
 
 from enum import Enum
 from dataclasses import dataclass
+from abc import ABC, abstractmethod
 from typing import Dict, List, TypeVar
 
 
@@ -14,19 +15,18 @@ class Orders(Enum):
 
 
 CSV_ORDERS = {
-    Orders.DEFAULT: ["Date", "Amount", "Note", "Category"],
-    Orders.DIME: ["Category", "Note", "Date", "Amount"],
-    Orders.SIMPLE: ["Date", "Amount", "Note"],
+    Orders.DEFAULT: ["date", "amount", "note", "category"],
+    Orders.DIME: ["category", "note", "date", "amount"],
+    Orders.SIMPLE: ["date", "amount", "note"],
 }
 
 
 # User config
 @dataclass
-class Config:
+class Preferences:
     csv_order: Orders
     use_ollama: bool
-    keep_payments: bool
-    negative_expenses: bool
+    positive_expenses: bool
 
 
 class Category(Enum):
@@ -55,8 +55,15 @@ def __eq__(self, other):
         return isinstance(other, Transaction) and self.date == other.date and self.amount == other.amount \
             and self.note == other.note and self.category == other.category
 
+    def simple_repr(self) -> Dict:
+        return {
+            "date": self.date,
+            "amount": self.amount,
+            "note": self.note,
+        }
+
 
-class BaseFI:
+class BaseFI(ABC):
     """
     Code for Regex Expressions and validate are directly from Bizzaro:Teller
     """
@@ -73,6 +80,14 @@ def get_transaction_regex(self) -> str:
         """
         return self.regex["transaction"]
 
+    @abstractmethod
+    def is_transaction_income(self, transaction: Transaction) -> bool:
+        """
+        Must be implemented by individual FI Classes due to statements being different
+        between different FIs.
+        """
+        pass
+
     def get_start_year(self, statement: str) -> int:
         """
         Get starting year for a given statement.
@@ -172,17 +187,41 @@ def __init__(self):
         regex = {
             "transaction": (r"^(?P<dates>(?:\w{3} \d{2} ){2})"
                 r"(?P<description>.+)\s"
-                r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?"),
+                r"(?P<amount>-?\$[\d,]+\.\d{2}-?)"),
             "start_year": r"STATEMENT FROM .+(?P<year>-?\,.[0-9][0-9][0-9][0-9])",
-            "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?",
-            "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"
+            "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})",
+            "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})"
         }
 
         super().__init__(name="RBC", regex=regex)
 
+    def is_transaction_income(self, transaction: Transaction, positive_expenses: bool) -> bool:
+        """
+        Checks if a given transaction is considered an income transaction. This
+        is explicitly any transactions that show up on the CC bill that are:
+        1. Cash Back Rewards
+        2. Payment towards the bill
+        3. Refunds / Chargebacks
+
+        Args:
+            transaction (Transaction): Transaction to be checked
+            positive_expenses (bool): True if expenses are represented as positive floats,
+                False if they are represented as negative floats instead.
+
+        Returns:
+            bool: True if given transaction is considered income, False if its
+                considered expense
+        """
+        # If positive_expenses is True, then expenses are > 0 and income < 0
+        amount = transaction.amount if positive_expenses else transaction.amount * -1
+        return amount < 0
+
 
 class TD(BaseFI):
     def __init__(self):
+        """
+        NOTE: Currently unimplemented as I do not have access to a TD CC Statement.
+        """
         regex = {
             "transaction": (r"(?P<dates>(?:\w{3} \d{1,2} ){2})"
                 r"(?P<description>.+)\s"
@@ -194,20 +233,57 @@ def __init__(self):
 
         super().__init__(name="TD", regex=regex)
 
+    def is_transaction_income(self, transaction: Transaction) -> bool:
+        """
+        Checks if a given transaction is considered an income transaction. This
+        is explicitly any transactions that show up on the CC bill that are:
+        1. Cash Back Rewards
+        2. Payment towards the bill
+        3. Refunds / Chargebacks
+
+        NOTE: Currently unimplemented as I do not have access to a TD CC Statement.
+
+        Args:
+            transaction (Transaction): Transaction to be checked
+
+        Returns:
+            bool: True if given transaction is considered income, False if its
+                considered expense
+        """
+        ...
+
 
 class BNS(BaseFI):
     def __init__(self):
         regex = {
             "transaction": (r"^(?P<dates>(?:\w{3} \d{2} ){2})"
                 r"(?P<description>.+)\s"
-                r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?"),
+                r"(?P<amount>-?\$[\d,]+\.\d{2}-?)"),
             "start_year": r"STATEMENT FROM .+(?P<year>-?\,.[0-9][0-9][0-9][0-9])",
-            "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?",
-            "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"
+            "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})",
+            "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})"
         }
 
         super().__init__(name="BNS", regex=regex)
 
+    def is_transaction_income(self, transaction: Transaction) -> bool:
+        """
+        Checks if a given transaction is considered an income transaction. This
+        is explicitly any transactions that show up on the CC bill that are:
+        1. Cash Back Rewards
+        2. Payment towards the bill
+        3. Refunds / Chargebacks
+
+        Args:
+            transaction (Transaction): Transaction to be checked
+
+        Returns:
+            bool: True if given transaction is considered income, False if its
+                considered expense
+        """
+        ...
+
+
 
 class FIFactory:
     type_FI = TypeVar("type_FI", RBC, TD, BNS)