From a3ee22327a2dec8c561e03910536013b47d0aca8 Mon Sep 17 00:00:00 2001 From: Ryan Liu Date: Tue, 11 Jun 2024 21:02:09 -0700 Subject: [PATCH] Refactored code to properly work with Preferences --- Config.py | 19 ++++------ Ena.py | 5 +-- src/api.py | 51 ++++++++++++++++---------- src/model.py | 102 ++++++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 130 insertions(+), 47 deletions(-) diff --git a/Config.py b/Config.py index b954268..3007536 100755 --- a/Config.py +++ b/Config.py @@ -7,8 +7,7 @@ from typing import Dict from configparser import ConfigParser -from src.model import Orders, Config - +from src.model import Orders, Preferences ROOT_PATH = os.path.dirname(os.path.abspath(__name__)) CONFIG_FILE = os.path.join(ROOT_PATH, "preferences.ini") @@ -28,9 +27,8 @@ def get_preferences() -> Dict: csv_order = Orders[parser.get(CONFIG_SECTION, "csv_order")] use_ollama = parser.getboolean(CONFIG_SECTION, "use_ollama") - keep_payments = parser.getboolean(CONFIG_SECTION, "keep_payments") - negative_expenses = parser.getboolean(CONFIG_SECTION, "negative_expenses") - preferences = Config(csv_order, use_ollama, keep_payments, negative_expenses) + positive_expenses = parser.getboolean(CONFIG_SECTION, "positive_expenses") + preferences = Preferences(csv_order, use_ollama, positive_expenses) return preferences @@ -40,11 +38,9 @@ def get_preferences() -> Dict: help="CSV Order. Defaults to the simple order, which is Date, Amount, and Note (Description)") @click.option("-u", "--use-ollama", is_flag=True, default=False, help="Use Ollama and local LLM to categorize expenses. NOTE: Experimental Feature. Defaults to False.") -@click.option("-k", "--keep-payments", is_flag=True, default=False, - help="Keep payments to Credit Card Bill in transactions. Defaults to False.") -@click.option("-n", "--negative_expenses", is_flag=True, default=False, - help="Have expenses represented as negative values and incomes represented as positive values. Defaults to False.") -def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses: bool): +@click.option("-n", "--positive_expenses", is_flag=True, default=False, + help="Have expenses represented as positive floats and incomes represented as negative floats. Defaults to False.") +def cli(order: Orders, use_ollama: bool, positive_expenses: bool): """ Writes an .ini file dictating Ena's behavioural preferences. """ @@ -54,8 +50,7 @@ def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses: config[CONFIG_SECTION] = { "csv_order": order, "use_ollama": bool_to_str(use_ollama), - "keep_payments": bool_to_str(keep_payments), - "negative_expenses": bool_to_str(negative_expenses), + "positive_expenses": bool_to_str(positive_expenses), } with open(CONFIG_FILE, "w+") as config_file: diff --git a/Ena.py b/Ena.py index e7c8c12..f110c60 100755 --- a/Ena.py +++ b/Ena.py @@ -5,12 +5,11 @@ import logging from src.api import Ena -from Config import STATEMENTS_PATH +from Config import ROOT_PATH - -ROOT_PATH = os.path.dirname(os.path.abspath(__name__)) STATEMENTS_PATH = os.path.join(ROOT_PATH, "statements") + @click.command() @click.option("-d", "--directory", "statements_dir", type=click.Path(exists=True), default=STATEMENTS_PATH, help="Directory where statements are. Defaults to Ena/statements") diff --git a/src/api.py b/src/api.py index 203a12d..1cc8e5e 100644 --- a/src/api.py +++ b/src/api.py @@ -10,7 +10,7 @@ from datetime import datetime from dataclasses import asdict from collections import defaultdict -from src.model import Category, Transaction, FIFactory, CSV_ORDERS +from src.model import Category, Orders, Transaction, FIFactory, CSV_ORDERS class Ena: @@ -49,10 +49,14 @@ def parse_statements(self): csv_data.sort(key=lambda x: x.date) file_path = os.path.join(ROOT_PATH, "output", fi_name, f"{int(datetime.today().timestamp())}.csv") with open(file_path, "w+", newline="") as csv_file: - writer = csv.DictWriter(csv_file, CSV_ORDERS[self.preferences.csv_order]) + csv_order = CSV_ORDERS[self.preferences.csv_order] + writer = csv.DictWriter(csv_file, csv_order) writer.writeheader() for txn in csv_data: - writer.writerow(asdict(txn)) + if csv_order == Orders.SIMPLE: + writer.writerow(txn.simple_repr()) + else: + writer.writerow(asdict(txn)) def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> List[Transaction]: """ @@ -63,12 +67,7 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> 1. Category is added via Ollama based on available categories and confidence %. a. This behaviour can be disabled. - 2. Payments to Credit Cards (Ena's main use-case) will be removed from the list of - Transactions. Per my personal use-case, payments will always be equal to opening - balance. - a. This behaviour can be disabled. - - 3. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an + 2. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an expense tracker for Credit Cards. In the rare case that a transaction is "negative", for income of some sort (Cashback rewards, refunds, etc), it'll be categorized under Category.INCOME with a negative value. @@ -96,11 +95,15 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> opening_balance = processor.get_opening_balance(text) closing_balance = processor.get_closing_balance(text) + print(text) + # debugging transaction mapping - all 3 regex in transaction have to find a result in order for it to be considered a "match" year_end = False transaction_regex = processor.get_transaction_regex() for match in re.finditer(transaction_regex, text, re.MULTILINE): match_dict = match.groupdict() + print(match_dict) + date = match_dict["dates"].replace("/", " ") # change format to standard: 03/13 -> 03 13 date = date.split(" ")[0:2] # Aug. 10 Aug. 13 -> ["Aug.", "10"] date[0] = date[0].strip(".") # Aug. -> Aug @@ -119,28 +122,38 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> if month == "01" and year_end: date = date.replace(year=date.year + 1) - if (match_dict["cr"]): - logging.info(f"Credit balance found in transaction: {match_dict['amount']}") - amount = -float("-" + match_dict["amount"].replace("$", "").replace(",", "")) - else: - amount = -float(match_dict["amount"].replace("$", "").replace(",", "")) + amount = -float(match_dict["amount"].replace("$", "").replace(",", "")) # checks description regex if ("$" in match_dict["description"]): logging.info(f"$ found in description: {match_dict['description']}") - newAmount = re.search(r"(?P-?\$[\d,]+\.\d{2}-?)(?P(\-|\s?CR))?", match_dict["description"]) + newAmount = re.search(r"(?P-?\$[\d,]+\.\d{2}-?)", match_dict["description"]) amount = -float(newAmount["amount"].replace("$", "").replace(",", "")) match_dict["description"] = match_dict["description"].split("$", 1)[0] + # Set amount based on preferences + if self.preferences.positive_expenses: + amount *= -1 + transaction = Transaction(date=str(date.date().isoformat()), amount=amount, note=match_dict["description"].strip()) - if transaction in transactions: - # Assumes all duplicate transactions are valid - transaction.description = transaction.description + " 2" + # Check if transaction should be directly categorized as income transaction + if processor.is_transaction_income(transaction, self.preferences.positive_expenses): + transaction.category = Category.INCOME else: - transactions.append(transaction) + if self.preferences.use_ollama: + # Get category via inference + ... + + """ + Transactions is represented as a List instead of Set because duplicate transactions + where properties are the same (Transaction.__eq__) are valid. + + It's entirely possible that you make the same purchase at the same spot regularly. + """ + transactions.append(transaction) processor.validate(opening_balance, closing_balance, transactions) return transactions diff --git a/src/model.py b/src/model.py index d701f54..0fba64b 100644 --- a/src/model.py +++ b/src/model.py @@ -3,6 +3,7 @@ from enum import Enum from dataclasses import dataclass +from abc import ABC, abstractmethod from typing import Dict, List, TypeVar @@ -14,19 +15,18 @@ class Orders(Enum): CSV_ORDERS = { - Orders.DEFAULT: ["Date", "Amount", "Note", "Category"], - Orders.DIME: ["Category", "Note", "Date", "Amount"], - Orders.SIMPLE: ["Date", "Amount", "Note"], + Orders.DEFAULT: ["date", "amount", "note", "category"], + Orders.DIME: ["category", "note", "date", "amount"], + Orders.SIMPLE: ["date", "amount", "note"], } # User config @dataclass -class Config: +class Preferences: csv_order: Orders use_ollama: bool - keep_payments: bool - negative_expenses: bool + positive_expenses: bool class Category(Enum): @@ -55,8 +55,15 @@ def __eq__(self, other): return isinstance(other, Transaction) and self.date == other.date and self.amount == other.amount \ and self.note == other.note and self.category == other.category + def simple_repr(self) -> Dict: + return { + "date": self.date, + "amount": self.amount, + "note": self.note, + } + -class BaseFI: +class BaseFI(ABC): """ Code for Regex Expressions and validate are directly from Bizzaro:Teller """ @@ -73,6 +80,14 @@ def get_transaction_regex(self) -> str: """ return self.regex["transaction"] + @abstractmethod + def is_transaction_income(self, transaction: Transaction) -> bool: + """ + Must be implemented by individual FI Classes due to statements being different + between different FIs. + """ + pass + def get_start_year(self, statement: str) -> int: """ Get starting year for a given statement. @@ -172,17 +187,41 @@ def __init__(self): regex = { "transaction": (r"^(?P(?:\w{3} \d{2} ){2})" r"(?P.+)\s" - r"(?P-?\$[\d,]+\.\d{2}-?)(?P(\-|\s?CR))?"), + r"(?P-?\$[\d,]+\.\d{2}-?)"), "start_year": r"STATEMENT FROM .+(?P-?\,.[0-9][0-9][0-9][0-9])", - "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P-?\$[\d,]+\.\d{2})(?P(\-|\s?CR))?", - "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P-?\$[\d,]+\.\d{2})(?P(\-|\s?CR))?" + "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P-?\$[\d,]+\.\d{2})", + "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P-?\$[\d,]+\.\d{2})" } super().__init__(name="RBC", regex=regex) + def is_transaction_income(self, transaction: Transaction, positive_expenses: bool) -> bool: + """ + Checks if a given transaction is considered an income transaction. This + is explicitly any transactions that show up on the CC bill that are: + 1. Cash Back Rewards + 2. Payment towards the bill + 3. Refunds / Chargebacks + + Args: + transaction (Transaction): Transaction to be checked + positive_expenses (bool): True if expenses are represented as positive floats, + False if they are represented as negative floats instead. + + Returns: + bool: True if given transaction is considered income, False if its + considered expense + """ + # If positive_expenses is True, then expenses are > 0 and income < 0 + amount = transaction.amount if positive_expenses else transaction.amount * -1 + return amount < 0 + class TD(BaseFI): def __init__(self): + """ + NOTE: Currently unimplemented as I do not have access to a TD CC Statement. + """ regex = { "transaction": (r"(?P(?:\w{3} \d{1,2} ){2})" r"(?P.+)\s" @@ -194,20 +233,57 @@ def __init__(self): super().__init__(name="TD", regex=regex) + def is_transaction_income(self, transaction: Transaction) -> bool: + """ + Checks if a given transaction is considered an income transaction. This + is explicitly any transactions that show up on the CC bill that are: + 1. Cash Back Rewards + 2. Payment towards the bill + 3. Refunds / Chargebacks + + NOTE: Currently unimplemented as I do not have access to a TD CC Statement. + + Args: + transaction (Transaction): Transaction to be checked + + Returns: + bool: True if given transaction is considered income, False if its + considered expense + """ + ... + class BNS(BaseFI): def __init__(self): regex = { "transaction": (r"^(?P(?:\w{3} \d{2} ){2})" r"(?P.+)\s" - r"(?P-?\$[\d,]+\.\d{2}-?)(?P(\-|\s?CR))?"), + r"(?P-?\$[\d,]+\.\d{2}-?)"), "start_year": r"STATEMENT FROM .+(?P-?\,.[0-9][0-9][0-9][0-9])", - "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P-?\$[\d,]+\.\d{2})(?P(\-|\s?CR))?", - "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P-?\$[\d,]+\.\d{2})(?P(\-|\s?CR))?" + "open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P-?\$[\d,]+\.\d{2})", + "closing_balance": r"(?:NEW|CREDIT) BALANCE (?P-?\$[\d,]+\.\d{2})" } super().__init__(name="BNS", regex=regex) + def is_transaction_income(self, transaction: Transaction) -> bool: + """ + Checks if a given transaction is considered an income transaction. This + is explicitly any transactions that show up on the CC bill that are: + 1. Cash Back Rewards + 2. Payment towards the bill + 3. Refunds / Chargebacks + + Args: + transaction (Transaction): Transaction to be checked + + Returns: + bool: True if given transaction is considered income, False if its + considered expense + """ + ... + + class FIFactory: type_FI = TypeVar("type_FI", RBC, TD, BNS)