Skip to content

Commit

Permalink
Refactored code to properly work with Preferences
Browse files Browse the repository at this point in the history
  • Loading branch information
RyanLiu6 committed Jun 12, 2024
1 parent 1751d7f commit a3ee223
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 47 deletions.
19 changes: 7 additions & 12 deletions Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
from typing import Dict
from configparser import ConfigParser

from src.model import Orders, Config

from src.model import Orders, Preferences

ROOT_PATH = os.path.dirname(os.path.abspath(__name__))
CONFIG_FILE = os.path.join(ROOT_PATH, "preferences.ini")
Expand All @@ -28,9 +27,8 @@ def get_preferences() -> Dict:

csv_order = Orders[parser.get(CONFIG_SECTION, "csv_order")]
use_ollama = parser.getboolean(CONFIG_SECTION, "use_ollama")
keep_payments = parser.getboolean(CONFIG_SECTION, "keep_payments")
negative_expenses = parser.getboolean(CONFIG_SECTION, "negative_expenses")
preferences = Config(csv_order, use_ollama, keep_payments, negative_expenses)
positive_expenses = parser.getboolean(CONFIG_SECTION, "positive_expenses")
preferences = Preferences(csv_order, use_ollama, positive_expenses)

return preferences

Expand All @@ -40,11 +38,9 @@ def get_preferences() -> Dict:
help="CSV Order. Defaults to the simple order, which is Date, Amount, and Note (Description)")
@click.option("-u", "--use-ollama", is_flag=True, default=False,
help="Use Ollama and local LLM to categorize expenses. NOTE: Experimental Feature. Defaults to False.")
@click.option("-k", "--keep-payments", is_flag=True, default=False,
help="Keep payments to Credit Card Bill in transactions. Defaults to False.")
@click.option("-n", "--negative_expenses", is_flag=True, default=False,
help="Have expenses represented as negative values and incomes represented as positive values. Defaults to False.")
def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses: bool):
@click.option("-n", "--positive_expenses", is_flag=True, default=False,
help="Have expenses represented as positive floats and incomes represented as negative floats. Defaults to False.")
def cli(order: Orders, use_ollama: bool, positive_expenses: bool):
"""
Writes an .ini file dictating Ena's behavioural preferences.
"""
Expand All @@ -54,8 +50,7 @@ def cli(order: Orders, use_ollama: bool, keep_payments: bool, negative_expenses:
config[CONFIG_SECTION] = {
"csv_order": order,
"use_ollama": bool_to_str(use_ollama),
"keep_payments": bool_to_str(keep_payments),
"negative_expenses": bool_to_str(negative_expenses),
"positive_expenses": bool_to_str(positive_expenses),
}

with open(CONFIG_FILE, "w+") as config_file:
Expand Down
5 changes: 2 additions & 3 deletions Ena.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
import logging

from src.api import Ena
from Config import STATEMENTS_PATH
from Config import ROOT_PATH


ROOT_PATH = os.path.dirname(os.path.abspath(__name__))
STATEMENTS_PATH = os.path.join(ROOT_PATH, "statements")


@click.command()
@click.option("-d", "--directory", "statements_dir", type=click.Path(exists=True),
default=STATEMENTS_PATH, help="Directory where statements are. Defaults to Ena/statements")
Expand Down
51 changes: 32 additions & 19 deletions src/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datetime import datetime
from dataclasses import asdict
from collections import defaultdict
from src.model import Category, Transaction, FIFactory, CSV_ORDERS
from src.model import Category, Orders, Transaction, FIFactory, CSV_ORDERS


class Ena:
Expand Down Expand Up @@ -49,10 +49,14 @@ def parse_statements(self):
csv_data.sort(key=lambda x: x.date)
file_path = os.path.join(ROOT_PATH, "output", fi_name, f"{int(datetime.today().timestamp())}.csv")
with open(file_path, "w+", newline="") as csv_file:
writer = csv.DictWriter(csv_file, CSV_ORDERS[self.preferences.csv_order])
csv_order = CSV_ORDERS[self.preferences.csv_order]
writer = csv.DictWriter(csv_file, csv_order)
writer.writeheader()
for txn in csv_data:
writer.writerow(asdict(txn))
if csv_order == Orders.SIMPLE:
writer.writerow(txn.simple_repr())
else:
writer.writerow(asdict(txn))

def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) -> List[Transaction]:
"""
Expand All @@ -63,12 +67,7 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
1. Category is added via Ollama based on available categories and confidence %.
a. This behaviour can be disabled.
2. Payments to Credit Cards (Ena's main use-case) will be removed from the list of
Transactions. Per my personal use-case, payments will always be equal to opening
balance.
a. This behaviour can be disabled.
3. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an
2. Transactions will all have "positive" value, ie, > 0, as Ena is designed to be an
expense tracker for Credit Cards. In the rare case that a transaction is "negative",
for income of some sort (Cashback rewards, refunds, etc), it'll be categorized under
Category.INCOME with a negative value.
Expand Down Expand Up @@ -96,11 +95,15 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
opening_balance = processor.get_opening_balance(text)
closing_balance = processor.get_closing_balance(text)

print(text)

# debugging transaction mapping - all 3 regex in transaction have to find a result in order for it to be considered a "match"
year_end = False
transaction_regex = processor.get_transaction_regex()
for match in re.finditer(transaction_regex, text, re.MULTILINE):
match_dict = match.groupdict()
print(match_dict)

date = match_dict["dates"].replace("/", " ") # change format to standard: 03/13 -> 03 13
date = date.split(" ")[0:2] # Aug. 10 Aug. 13 -> ["Aug.", "10"]
date[0] = date[0].strip(".") # Aug. -> Aug
Expand All @@ -119,28 +122,38 @@ def _parse_statement(self, processor: FIFactory.type_FI, statement_path: str) ->
if month == "01" and year_end:
date = date.replace(year=date.year + 1)

if (match_dict["cr"]):
logging.info(f"Credit balance found in transaction: {match_dict['amount']}")
amount = -float("-" + match_dict["amount"].replace("$", "").replace(",", ""))
else:
amount = -float(match_dict["amount"].replace("$", "").replace(",", ""))
amount = -float(match_dict["amount"].replace("$", "").replace(",", ""))

# checks description regex
if ("$" in match_dict["description"]):
logging.info(f"$ found in description: {match_dict['description']}")
newAmount = re.search(r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?", match_dict["description"])
newAmount = re.search(r"(?P<amount>-?\$[\d,]+\.\d{2}-?)", match_dict["description"])
amount = -float(newAmount["amount"].replace("$", "").replace(",", ""))
match_dict["description"] = match_dict["description"].split("$", 1)[0]

# Set amount based on preferences
if self.preferences.positive_expenses:
amount *= -1

transaction = Transaction(date=str(date.date().isoformat()),
amount=amount,
note=match_dict["description"].strip())

if transaction in transactions:
# Assumes all duplicate transactions are valid
transaction.description = transaction.description + " 2"
# Check if transaction should be directly categorized as income transaction
if processor.is_transaction_income(transaction, self.preferences.positive_expenses):
transaction.category = Category.INCOME
else:
transactions.append(transaction)
if self.preferences.use_ollama:
# Get category via inference
...

"""
Transactions is represented as a List instead of Set because duplicate transactions
where properties are the same (Transaction.__eq__) are valid.
It's entirely possible that you make the same purchase at the same spot regularly.
"""
transactions.append(transaction)

processor.validate(opening_balance, closing_balance, transactions)
return transactions
102 changes: 89 additions & 13 deletions src/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from enum import Enum
from dataclasses import dataclass
from abc import ABC, abstractmethod
from typing import Dict, List, TypeVar


Expand All @@ -14,19 +15,18 @@ class Orders(Enum):


CSV_ORDERS = {
Orders.DEFAULT: ["Date", "Amount", "Note", "Category"],
Orders.DIME: ["Category", "Note", "Date", "Amount"],
Orders.SIMPLE: ["Date", "Amount", "Note"],
Orders.DEFAULT: ["date", "amount", "note", "category"],
Orders.DIME: ["category", "note", "date", "amount"],
Orders.SIMPLE: ["date", "amount", "note"],
}


# User config
@dataclass
class Config:
class Preferences:
csv_order: Orders
use_ollama: bool
keep_payments: bool
negative_expenses: bool
positive_expenses: bool


class Category(Enum):
Expand Down Expand Up @@ -55,8 +55,15 @@ def __eq__(self, other):
return isinstance(other, Transaction) and self.date == other.date and self.amount == other.amount \
and self.note == other.note and self.category == other.category

def simple_repr(self) -> Dict:
return {
"date": self.date,
"amount": self.amount,
"note": self.note,
}


class BaseFI:
class BaseFI(ABC):
"""
Code for Regex Expressions and validate are directly from Bizzaro:Teller
"""
Expand All @@ -73,6 +80,14 @@ def get_transaction_regex(self) -> str:
"""
return self.regex["transaction"]

@abstractmethod
def is_transaction_income(self, transaction: Transaction) -> bool:
"""
Must be implemented by individual FI Classes due to statements being different
between different FIs.
"""
pass

def get_start_year(self, statement: str) -> int:
"""
Get starting year for a given statement.
Expand Down Expand Up @@ -172,17 +187,41 @@ def __init__(self):
regex = {
"transaction": (r"^(?P<dates>(?:\w{3} \d{2} ){2})"
r"(?P<description>.+)\s"
r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?"),
r"(?P<amount>-?\$[\d,]+\.\d{2}-?)"),
"start_year": r"STATEMENT FROM .+(?P<year>-?\,.[0-9][0-9][0-9][0-9])",
"open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?",
"closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"
"open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})",
"closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})"
}

super().__init__(name="RBC", regex=regex)

def is_transaction_income(self, transaction: Transaction, positive_expenses: bool) -> bool:
"""
Checks if a given transaction is considered an income transaction. This
is explicitly any transactions that show up on the CC bill that are:
1. Cash Back Rewards
2. Payment towards the bill
3. Refunds / Chargebacks
Args:
transaction (Transaction): Transaction to be checked
positive_expenses (bool): True if expenses are represented as positive floats,
False if they are represented as negative floats instead.
Returns:
bool: True if given transaction is considered income, False if its
considered expense
"""
# If positive_expenses is True, then expenses are > 0 and income < 0
amount = transaction.amount if positive_expenses else transaction.amount * -1
return amount < 0


class TD(BaseFI):
def __init__(self):
"""
NOTE: Currently unimplemented as I do not have access to a TD CC Statement.
"""
regex = {
"transaction": (r"(?P<dates>(?:\w{3} \d{1,2} ){2})"
r"(?P<description>.+)\s"
Expand All @@ -194,20 +233,57 @@ def __init__(self):

super().__init__(name="TD", regex=regex)

def is_transaction_income(self, transaction: Transaction) -> bool:
"""
Checks if a given transaction is considered an income transaction. This
is explicitly any transactions that show up on the CC bill that are:
1. Cash Back Rewards
2. Payment towards the bill
3. Refunds / Chargebacks
NOTE: Currently unimplemented as I do not have access to a TD CC Statement.
Args:
transaction (Transaction): Transaction to be checked
Returns:
bool: True if given transaction is considered income, False if its
considered expense
"""
...


class BNS(BaseFI):
def __init__(self):
regex = {
"transaction": (r"^(?P<dates>(?:\w{3} \d{2} ){2})"
r"(?P<description>.+)\s"
r"(?P<amount>-?\$[\d,]+\.\d{2}-?)(?P<cr>(\-|\s?CR))?"),
r"(?P<amount>-?\$[\d,]+\.\d{2}-?)"),
"start_year": r"STATEMENT FROM .+(?P<year>-?\,.[0-9][0-9][0-9][0-9])",
"open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?",
"closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"
"open_balance": r"(PREVIOUS|Previous) (STATEMENT|ACCOUNT|Account) (BALANCE|Balance) (?P<balance>-?\$[\d,]+\.\d{2})",
"closing_balance": r"(?:NEW|CREDIT) BALANCE (?P<balance>-?\$[\d,]+\.\d{2})"
}

super().__init__(name="BNS", regex=regex)

def is_transaction_income(self, transaction: Transaction) -> bool:
"""
Checks if a given transaction is considered an income transaction. This
is explicitly any transactions that show up on the CC bill that are:
1. Cash Back Rewards
2. Payment towards the bill
3. Refunds / Chargebacks
Args:
transaction (Transaction): Transaction to be checked
Returns:
bool: True if given transaction is considered income, False if its
considered expense
"""
...



class FIFactory:
type_FI = TypeVar("type_FI", RBC, TD, BNS)
Expand Down

0 comments on commit a3ee223

Please sign in to comment.