From d487caf3046bba81e73c09f103831a4e6de75824 Mon Sep 17 00:00:00 2001 From: Daniel Hauser <36034483+daniel-hauser@users.noreply.github.com> Date: Fri, 3 May 2024 14:08:09 +0300 Subject: [PATCH] Create a better identifier (to replace `hash`) (#269) * Add `transactionUniqueId` * Add uniqueId field to TransactionRow interface * Remove unused const FileHeaders * Use the uniqueId as the identity * Add deprecation warning for old transaction hash field * Move date to the start of the string to make it sortable * Fix double counting of existing transactions in GoogleSheetsStorage --- README.md | 13 +- src/config.ts | 16 +- src/messages.test.ts | 2 + src/notifier.ts | 16 ++ src/storage/__snapshots__/utils.test.ts.snap | 3 + src/storage/index.ts | 3 +- src/storage/sheets.ts | 29 +++- src/storage/utils.test.ts | 154 ++++++++++++++++--- src/storage/utils.ts | 32 +++- src/storage/ynab.ts | 16 +- src/types.ts | 1 + 11 files changed, 233 insertions(+), 52 deletions(-) create mode 100644 src/storage/__snapshots__/utils.test.ts.snap diff --git a/README.md b/README.md index 24a109ad..bc0570df 100644 --- a/README.md +++ b/README.md @@ -88,12 +88,13 @@ Example: #### Other configurations -| env variable name | default | description | -| -------------------- | ------------------ | ------------------------------------------------------------------------------------------------------------ | -| `ACCOUNTS_TO_SCRAPE` | `""` | A comma separated list of providers to take from `ACCOUNTS_JSON`. if empty, all accounts will be used | -| `DAYS_BACK` | `10` | The amount of days back to scrape | -| `TZ` | `'Asia/Jerusalem'` | A timezone for the process - used for the formatting of the timestamp | -| `FUTURE_MONTHS` | `1` | The amount of months that will be scrapped in the future, starting from the day calculated using `DAYS_BACK` | +| env variable name | default | description | +| ----------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------- | +| `ACCOUNTS_TO_SCRAPE` | `""` | A comma separated list of providers to take from `ACCOUNTS_JSON`. if empty, all accounts will be used | +| `DAYS_BACK` | `10` | The amount of days back to scrape | +| `TZ` | `'Asia/Jerusalem'` | A timezone for the process - used for the formatting of the timestamp | +| `FUTURE_MONTHS` | `1` | The amount of months that will be scrapped in the future, starting from the day calculated using `DAYS_BACK` | +| `TRANSACTION_HASH_TYPE` | `` | The hash type to use for the transaction hash. Can be `moneyman` or empty. The default will be changed to `moneyman` in the upcoming versions | ### Get notified in telegram diff --git a/src/config.ts b/src/config.ts index 3ccd21e9..b874d1fc 100644 --- a/src/config.ts +++ b/src/config.ts @@ -23,6 +23,7 @@ const { BUXFER_USER_NAME = "", BUXFER_PASSWORD = "", BUXFER_ACCOUNTS = "", + TRANSACTION_HASH_TYPE = "", } = process.env; /** @@ -47,6 +48,7 @@ export { BUXFER_USER_NAME, BUXFER_PASSWORD, BUXFER_ACCOUNTS, + TRANSACTION_HASH_TYPE, }; export const systemName = "moneyman"; export const currentDate = format(Date.now(), "yyyy-MM-dd"); @@ -58,20 +60,6 @@ export const accounts = parseAccounts(ACCOUNTS_JSON).filter( accountsToScrape.includes(account.companyId), ); -export const FileHeaders = [ - "date", - "amount", - "description", - "memo", - "category", - "account", - "hash", - "comment", - "scraped at", - "scraped by", - "identifier", -]; - function parseAccounts(accountsJson?: string): Array { try { const parsed = JSON.parse(accountsJson!); diff --git a/src/messages.test.ts b/src/messages.test.ts index 8d0a0c0e..e4467edc 100644 --- a/src/messages.test.ts +++ b/src/messages.test.ts @@ -97,6 +97,7 @@ describe("messages", () => { processedDate: new Date().toISOString(), description: "description1", originalAmount: 10, + uniqueId: "uniqueId1", originalCurrency: "ILS", chargedAmount: 10, status: TransactionStatuses.Completed, @@ -204,6 +205,7 @@ describe("messages", () => { account: "account1", companyId: CompanyTypes.max, hash: "hash1", + uniqueId: "uniqueId1", ...t, })), }, diff --git a/src/notifier.ts b/src/notifier.ts index 40a12fc2..325be9b0 100644 --- a/src/notifier.ts +++ b/src/notifier.ts @@ -61,3 +61,19 @@ export function sendError(message: any, caller: string = "") { )}`.trim(), ); } + +const deprecationMessages = { + ["hashFiledChange"]: `This run is using the old transaction hash field, please update to the new one (it might require manual de-duping of some transactions). See https://github.com/daniel-hauser/moneyman/issues/268 for more details.`, +} as const; +const sentDeprecationMessages = new Set(); +export function sendDeprecationMessage( + messageId: keyof typeof deprecationMessages, +) { + if (sentDeprecationMessages.has(messageId)) { + return; + } + // Avoid sending the same message multiple times + sentDeprecationMessages.add(messageId); + return send(`⚠️ Deprecation warning: +${deprecationMessages[messageId]}`); +} diff --git a/src/storage/__snapshots__/utils.test.ts.snap b/src/storage/__snapshots__/utils.test.ts.snap new file mode 100644 index 00000000..427610a9 --- /dev/null +++ b/src/storage/__snapshots__/utils.test.ts.snap @@ -0,0 +1,3 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`transactionUniqueId should return the same unique id for the same transaction 1`] = `"2021-01-01_hapoalim_123_1000_identifier1"`; diff --git a/src/storage/index.ts b/src/storage/index.ts index 9136a292..ab68a63f 100644 --- a/src/storage/index.ts +++ b/src/storage/index.ts @@ -3,7 +3,7 @@ import type { AccountScrapeResult, TransactionRow } from "../types.js"; import { LocalJsonStorage } from "./json.js"; import { GoogleSheetsStorage } from "./sheets.js"; import { AzureDataExplorerStorage } from "./azure-data-explorer.js"; -import { transactionHash } from "./utils.js"; +import { transactionHash, transactionUniqueId } from "./utils.js"; import { YNABStorage } from "./ynab.js"; import { BuxferStorage } from "./buxfer.js"; @@ -56,6 +56,7 @@ function resultsToTransactions( account: account.accountNumber, companyId, hash: transactionHash(tx, companyId, account.accountNumber), + uniqueId: transactionUniqueId(tx, companyId, account.accountNumber), }); } } diff --git a/src/storage/sheets.ts b/src/storage/sheets.ts index 074211ab..ac52ee88 100644 --- a/src/storage/sheets.ts +++ b/src/storage/sheets.ts @@ -10,6 +10,7 @@ import { worksheetName, currentDate, systemName, + TRANSACTION_HASH_TYPE, } from "./../config.js"; import type { TransactionRow, @@ -17,6 +18,7 @@ import type { SaveStats, } from "../types.js"; import { TransactionStatuses } from "israeli-bank-scrapers/lib/transactions.js"; +import { sendDeprecationMessage } from "../notifier.js"; const logger = createLogger("GoogleSheetsStorage"); @@ -93,9 +95,26 @@ export class GoogleSheetsStorage implements TransactionStorage { } satisfies SaveStats; for (let tx of txns) { + if (TRANSACTION_HASH_TYPE === "moneyman") { + // Use the new uniqueId as the unique identifier for the transactions if the hash type is moneyman + if (this.existingTransactionsHashes.has(tx.uniqueId)) { + stats.existing++; + stats.skipped++; + continue; + } + } + if (this.existingTransactionsHashes.has(tx.hash)) { - stats.existing++; - stats.skipped++; + if (TRANSACTION_HASH_TYPE === "moneyman") { + logger(`Skipping, old hash ${tx.hash} is already in the sheet`); + } + + // To avoid double counting, skip if the new hash is already in the sheet + if (!this.existingTransactionsHashes.has(tx.uniqueId)) { + stats.existing++; + stats.skipped++; + } + continue; } @@ -112,6 +131,10 @@ export class GoogleSheetsStorage implements TransactionStorage { if (rows.length) { stats.added = rows.length; await this.sheet?.addRows(rows); + + if (TRANSACTION_HASH_TYPE !== "moneyman") { + sendDeprecationMessage("hashFiledChange"); + } } return stats; @@ -168,7 +191,7 @@ export class GoogleSheetsStorage implements TransactionStorage { memo: tx.memo ?? "", category: tx.category ?? "", account: tx.account, - hash: tx.hash, + hash: TRANSACTION_HASH_TYPE === "moneyman" ? tx.uniqueId : tx.hash, comment: "", "scraped at": currentDate, "scraped by": systemName, diff --git a/src/storage/utils.test.ts b/src/storage/utils.test.ts index e73e294a..2e3611dc 100644 --- a/src/storage/utils.test.ts +++ b/src/storage/utils.test.ts @@ -3,34 +3,36 @@ import { TransactionStatuses, TransactionTypes, } from "israeli-bank-scrapers/lib/transactions"; -import { transactionHash } from "./utils"; +import { transactionUniqueId, transactionHash } from "./utils"; import { CompanyTypes } from "israeli-bank-scrapers"; -describe("transactionHash", () => { - const transaction1: Transaction = { - date: "2021-01-01", - processedDate: "2021-01-01", - chargedAmount: 1, - description: "desc 1", - memo: "memo 1", - originalAmount: 1, - originalCurrency: "ILS", - status: TransactionStatuses.Completed, - type: TransactionTypes.Normal, - }; - - const transaction2: Transaction = { - date: "2021-01-01", - processedDate: "2021-01-01", - chargedAmount: 1, - description: "desc 2", - memo: "memo 2", - originalAmount: 1, - originalCurrency: "ILS", - status: TransactionStatuses.Completed, - type: TransactionTypes.Normal, - }; +const transaction1: Transaction = { + date: "2021-01-01", + processedDate: "2021-01-01", + chargedAmount: 1000, + description: "desc 1", + memo: "memo 1", + originalAmount: 1000, + originalCurrency: "ILS", + identifier: "identifier1", + status: TransactionStatuses.Completed, + type: TransactionTypes.Normal, +}; + +const transaction2: Transaction = { + date: "2021-01-01", + processedDate: "2021-01-01", + chargedAmount: 999, + description: "desc 2", + memo: "memo 2", + originalAmount: 999, + originalCurrency: "ILS", + identifier: "identifier2", + status: TransactionStatuses.Completed, + type: TransactionTypes.Normal, +}; +describe("transactionHash", () => { it("should return the same hash for the same transaction", () => { const companyId = CompanyTypes.hapoalim; const accountNumber = "123"; @@ -101,3 +103,105 @@ describe("transactionHash", () => { } }); }); + +describe("transactionUniqueId", () => { + it("should return the same unique id for the same transaction", () => { + const companyId = CompanyTypes.hapoalim; + const accountNumber = "123"; + const id1 = transactionUniqueId(transaction1, companyId, accountNumber); + const id2 = transactionUniqueId(transaction1, companyId, accountNumber); + + expect(id1).toBe(id2); + expect(id1).toMatchSnapshot(); + }); + + it("should return different unique ids for different transactions", () => { + const companyId = CompanyTypes.hapoalim; + const accountNumber = "123"; + const id1 = transactionUniqueId(transaction1, companyId, accountNumber); + const id2 = transactionUniqueId(transaction2, companyId, accountNumber); + + expect(id1).not.toBe(id2); + }); + + it("should return different unique ids for different company ids", () => { + const accountNumber = "123"; + const id1 = transactionUniqueId( + transaction1, + CompanyTypes.leumi, + accountNumber, + ); + + const id2 = transactionUniqueId( + transaction1, + CompanyTypes.isracard, + accountNumber, + ); + + expect(id1).not.toBe(id2); + }); + + it("should return different unique ids for different account numbers", () => { + const companyId = CompanyTypes.hapoalim; + const id1 = transactionUniqueId(transaction1, companyId, "123"); + const id2 = transactionUniqueId(transaction1, companyId, "456"); + + expect(id1).not.toBe(id2); + }); + + it("should convert nullish values to empty strings", () => { + const transactionWithUndefined = { + ...transaction1, + chargedAmount: undefined, + description: undefined, + memo: undefined, + }; + + const transactionWithNull = { + ...transaction1, + chargedAmount: null, + description: null, + memo: null, + }; + + for (const transaction of [transactionWithNull, transactionWithUndefined]) { + const hash = transactionHash( + transaction as any, + CompanyTypes.leumi, + "123", + ); + + expect(hash).not.toContain("null"); + expect(hash).not.toContain("undefined"); + } + }); + + it('should return different unique ids for transactions with different "identifier" values', () => { + const companyId = CompanyTypes.hapoalim; + const accountNumber = "123"; + const tx1 = { ...transaction1, identifier: "1" }; + const tx2 = { ...transaction1, identifier: "2" }; + + // The old hash will be the same for both transactions because it doesn't include the identifier + expect(transactionHash(tx1, companyId, accountNumber)).toBe( + transactionHash(tx2, companyId, accountNumber), + ); + + // The unique id will be different because it includes the identifier + expect(transactionUniqueId(tx1, companyId, accountNumber)).not.toBe( + transactionUniqueId(tx2, companyId, accountNumber), + ); + }); + + it.each([undefined, null, ""])( + 'should fallback to "description" + "memo" if "identifier" is %o', + (identifier) => { + const tx = { + ...transaction1, + identifier, + }; + const id = transactionUniqueId(tx as any, CompanyTypes.hapoalim, "123"); + expect(id).toContain(`${tx.description}_${tx.memo}`); + }, + ); +}); diff --git a/src/storage/utils.ts b/src/storage/utils.ts index 85f3743a..9feeddc8 100644 --- a/src/storage/utils.ts +++ b/src/storage/utils.ts @@ -1,7 +1,11 @@ -import { parseISO, roundToNearestMinutes } from "date-fns"; +import { formatISO, parseISO, roundToNearestMinutes } from "date-fns"; import type { CompanyTypes } from "israeli-bank-scrapers"; import type { Transaction } from "israeli-bank-scrapers/lib/transactions"; +/** + * Generates a hash for a transaction that can be used to ~uniquely identify it. + * The hash is backwards compatible with the caspion hash. + */ export function transactionHash( tx: Transaction, companyId: CompanyTypes, @@ -19,3 +23,29 @@ export function transactionHash( return parts.map((p) => String(p ?? "")).join("_"); } + +/** + * + * @param tx + * @param companyId + * @param accountNumber + * @returns A unique id for a transaction + */ +export function transactionUniqueId( + tx: Transaction, + companyId: CompanyTypes, + accountNumber: string, +) { + const date = formatISO(tx.date, { + representation: "date", + }); + + const parts = [ + date, + companyId, + accountNumber, + tx.chargedAmount, + tx.identifier || `${tx.description}_${tx.memo}`, + ]; + return parts.map((p) => String(p ?? "").trim()).join("_"); +} diff --git a/src/storage/ynab.ts b/src/storage/ynab.ts index 68ecd9b8..dfab073c 100644 --- a/src/storage/ynab.ts +++ b/src/storage/ynab.ts @@ -1,10 +1,16 @@ -import { YNAB_TOKEN, YNAB_BUDGET_ID, YNAB_ACCOUNTS } from "../config.js"; +import { + YNAB_TOKEN, + YNAB_BUDGET_ID, + YNAB_ACCOUNTS, + TRANSACTION_HASH_TYPE, +} from "../config.js"; import { SaveStats, TransactionRow, TransactionStorage } from "../types.js"; import { createLogger } from "./../utils/logger.js"; import { parseISO, format } from "date-fns"; import * as ynab from "ynab"; import hash from "hash-it"; import { TransactionStatuses } from "israeli-bank-scrapers/lib/transactions.js"; +import { sendDeprecationMessage } from "../notifier.js"; const YNAB_DATE_FORMAT = "yyyy-MM-dd"; const logger = createLogger("YNABStorage"); @@ -85,6 +91,10 @@ export class YNABStorage implements TransactionStorage { stats.added = resp.data.transactions?.length ?? 0; stats.existing = resp.data.duplicate_import_ids?.length ?? 0; stats.skipped += stats.existing; + + if (TRANSACTION_HASH_TYPE !== "moneyman") { + sendDeprecationMessage("hashFiledChange"); + } } if (missingAccounts.size > 0) { @@ -131,7 +141,9 @@ export class YNABStorage implements TransactionStorage { ? ynab.TransactionClearedStatus.Cleared : undefined, approved: false, - import_id: hash(tx.hash).toString(), + import_id: hash( + TRANSACTION_HASH_TYPE === "moneyman" ? tx.uniqueId : tx.hash, + ).toString(), memo: tx.memo, }; } diff --git a/src/types.ts b/src/types.ts index dc64c3fb..cb414c08 100644 --- a/src/types.ts +++ b/src/types.ts @@ -14,6 +14,7 @@ export interface TransactionRow extends Transaction { account: string; companyId: CompanyTypes; hash: string; + uniqueId: string; } export interface AccountScrapeResult {