Skip to content

Commit

Permalink
sheets: refactor (#369)
Browse files Browse the repository at this point in the history
* Call `loadInfo` again after creating the sheet

* Simpler flow

* Move row schema to own file

* Retry getting the sheet

* Better auth

* Fix service account setup link

* Even nicer

* Smaller payload in hashes

* Remove retry + sheet creation
  • Loading branch information
daniel-hauser authored Nov 1, 2024
1 parent 4704322 commit 54ffc57
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 105 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,12 @@ WIP

### Export to google sheets

1. Follow the instructions [here](https://theoephraim.github.io/node-google-spreadsheet/#/getting-started/authentication?id=service-account) to create a google service account.
1. Follow the instructions [here](https://theoephraim.github.io/node-google-spreadsheet/#/guides/authentication?id=setting-up-your-quotapplicationquot) to create a google service account.
2. Create a [new sheet](https://sheets.new/) and share it with your service account using the `GOOGLE_SERVICE_ACCOUNT_EMAIL`.
3. Create a sheet with a name (i.e. `_moneyman`)
4. Add the following headers to the sheet:
| date | amount | description | memo | category | account | hash | comment | scraped at | scraped by | identifier | chargedCurrency |
| ---- | ------ | ----------- | ---- | -------- | ------- | ---- | ------- | ---------- | ---------- | ---------- | --------------- |

Use the following env vars to setup:

Expand Down
154 changes: 52 additions & 102 deletions src/bot/storage/sheets.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import { createLogger } from "../../utils/logger.js";
import { columnToLetter } from "google-spreadsheet/src/lib/utils.js";
import {
GoogleSpreadsheet,
GoogleSpreadsheetWorksheet,
} from "google-spreadsheet";
import { GoogleAuth, JWT } from "google-auth-library";
import { format, parseISO } from "date-fns";
import { systemName } from "../../config.js";
import { GoogleAuth } from "google-auth-library";
import type { TransactionRow, TransactionStorage } from "../../types.js";
import { TransactionStatuses } from "israeli-bank-scrapers/lib/transactions.js";
import { sendDeprecationMessage } from "../notifier.js";
import { normalizeCurrency } from "../../utils/currency.js";
import { createSaveStats } from "../saveStats.js";
import { TableRow, tableRow, TableHeaders } from "../transactionTableRow.js";

const logger = createLogger("GoogleSheetsStorage");

Expand All @@ -22,63 +21,7 @@ const {
TRANSACTION_HASH_TYPE,
} = process.env;

const currentDate = format(Date.now(), "yyyy-MM-dd");

export type SheetRow = {
date: string;
amount: number;
description: string;
memo: string;
category: string;
account: string;
hash: string;
comment: string;
"scraped at": string;
"scraped by": string;
identifier: string;
chargedCurrency: string;
};

export function transactionRow(tx: TransactionRow): SheetRow {
return {
date: format(parseISO(tx.date), "dd/MM/yyyy", {}),
amount: tx.chargedAmount,
description: tx.description,
memo: tx.memo ?? "",
category: tx.category ?? "",
account: tx.account,
hash: TRANSACTION_HASH_TYPE === "moneyman" ? tx.uniqueId : tx.hash,
comment: "",
"scraped at": currentDate,
"scraped by": systemName,
identifier: `${tx.identifier ?? ""}`,
// Assuming the transaction is not pending, so we can use the original currency as the charged currency
chargedCurrency:
normalizeCurrency(tx.chargedCurrency) ||
normalizeCurrency(tx.originalCurrency),
};
}

export class GoogleSheetsStorage implements TransactionStorage {
static FileHeaders: Array<keyof SheetRow> = [
"date",
"amount",
"description",
"memo",
"category",
"account",
"hash",
"comment",
"scraped at",
"scraped by",
"identifier",
"chargedCurrency",
];

existingTransactionsHashes = new Set<string>();

private sheet: null | GoogleSpreadsheetWorksheet = null;

canSave() {
return Boolean(
GOOGLE_SERVICE_ACCOUNT_EMAIL && GOOGLE_SERVICE_ACCOUNT_PRIVATE_KEY,
Expand All @@ -89,33 +32,46 @@ export class GoogleSheetsStorage implements TransactionStorage {
txns: Array<TransactionRow>,
onProgress: (status: string) => Promise<void>,
) {
const rows: SheetRow[] = [];
await Promise.all([onProgress("Initializing"), this.initDocAndSheet()]);
await Promise.all([onProgress("Loading hashes"), this.loadHashes()]);
const [doc] = await Promise.all([this.getDoc(), onProgress("Getting doc")]);

await onProgress("Getting sheet");
const sheet = doc.sheetsByTitle[WORKSHEET_NAME];
if (!sheet) {
await onProgress("Sheet not found");
throw new Error(
`sheet not found. sheets: ${Object.keys(doc.sheetsByTitle)}`,
);
}

const [existingHashes] = await Promise.all([
this.loadHashes(sheet),
onProgress("Loading hashes"),
]);

const stats = createSaveStats("Google Sheets", WORKSHEET_NAME, txns, {
highlightedTransactions: {
Added: [] as Array<TransactionRow>,
},
});

const rows: TableRow[] = [];
for (let tx of txns) {
if (TRANSACTION_HASH_TYPE === "moneyman") {
// Use the new uniqueId as the unique identifier for the transactions if the hash type is moneyman
if (this.existingTransactionsHashes.has(tx.uniqueId)) {
if (existingHashes.has(tx.uniqueId)) {
stats.existing++;
stats.skipped++;
continue;
}
}

if (this.existingTransactionsHashes.has(tx.hash)) {
if (existingHashes.has(tx.hash)) {
if (TRANSACTION_HASH_TYPE === "moneyman") {
logger(`Skipping, old hash ${tx.hash} is already in the sheet`);
}

// To avoid double counting, skip if the new hash is already in the sheet
if (!this.existingTransactionsHashes.has(tx.uniqueId)) {
if (!existingHashes.has(tx.uniqueId)) {
stats.existing++;
stats.skipped++;
}
Expand All @@ -128,13 +84,13 @@ export class GoogleSheetsStorage implements TransactionStorage {
continue;
}

rows.push(transactionRow(tx));
rows.push(tableRow(tx));
stats.highlightedTransactions.Added.push(tx);
}

if (rows.length) {
stats.added = rows.length;
await Promise.all([onProgress("Saving"), this.sheet?.addRows(rows)]);
await Promise.all([onProgress("Saving"), sheet.addRows(rows)]);
if (TRANSACTION_HASH_TYPE !== "moneyman") {
sendDeprecationMessage("hashFiledChange");
}
Expand All @@ -143,46 +99,40 @@ export class GoogleSheetsStorage implements TransactionStorage {
return stats;
}

private async loadHashes() {
const rows = await this.sheet?.getRows<SheetRow>();
for (let row of rows!) {
this.existingTransactionsHashes.add(row.get("hash"));
}
logger(`${this.existingTransactionsHashes.size} hashes loaded`);
private async getDoc() {
const auth = new GoogleAuth({
scopes: ["https://www.googleapis.com/auth/spreadsheets"],
credentials: {
client_email: GOOGLE_SERVICE_ACCOUNT_EMAIL,
private_key: GOOGLE_SERVICE_ACCOUNT_PRIVATE_KEY,
},
});

const doc = new GoogleSpreadsheet(GOOGLE_SHEET_ID, auth);
await doc.loadInfo();
return doc;
}

private async initDocAndSheet() {
const {
GOOGLE_SERVICE_ACCOUNT_EMAIL: client_email,
GOOGLE_SERVICE_ACCOUNT_PRIVATE_KEY: private_key,
} = process.env;

// By default, try to automatically get credentials
// (maybe we're running in Google Cloud, who knows)
let authToken: JWT | GoogleAuth<any> = new GoogleAuth({
scopes: [
"https://www.googleapis.com/auth/spreadsheets",
"https://www.googleapis.com/auth/drive.file",
],
});
if (client_email && private_key) {
logger("Using ServiceAccountAuth");
authToken = new JWT({
email: client_email,
key: private_key,
scopes: ["https://www.googleapis.com/auth/spreadsheets"],
});
/**
* Load hashes from the "hash" column, assuming the first row is a header row
*/
private async loadHashes(sheet: GoogleSpreadsheetWorksheet) {
const hashColumnNumber = sheet.headerValues.indexOf("hash");
if (hashColumnNumber === -1) {
throw new Error("Hash column not found");
}
const doc = new GoogleSpreadsheet(GOOGLE_SHEET_ID, authToken);

await doc.loadInfo();
const hashColumnLetter = columnToLetter(hashColumnNumber + 1);
const range = `${hashColumnLetter}2:${hashColumnLetter}`;

const columns = await sheet.getCellsInRange(range, {
majorDimension: "COLUMNS",
});

if (!(WORKSHEET_NAME in doc.sheetsByTitle)) {
logger("Creating new sheet");
const sheet = await doc.addSheet({ title: WORKSHEET_NAME });
await sheet.setHeaderRow(GoogleSheetsStorage.FileHeaders);
if (Array.isArray(columns)) {
return new Set(columns[0] as string[]);
}

this.sheet = doc.sheetsByTitle[WORKSHEET_NAME];
throw new Error("loadHashesBetter: getCellsInRange returned non-array");
}
}
4 changes: 2 additions & 2 deletions src/bot/storage/web-post.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { createLogger } from "../../utils/logger.js";
import type { TransactionRow, TransactionStorage } from "../../types.js";
import { TransactionStatuses } from "israeli-bank-scrapers/lib/transactions.js";
import { transactionRow } from "./sheets.js";
import { tableRow } from "../transactionTableRow.js";
import { createSaveStats } from "../saveStats.js";

const logger = createLogger("WebPostStorage");
Expand Down Expand Up @@ -31,7 +31,7 @@ export class WebPostStorage implements TransactionStorage {
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(nonPendingTxns.map((tx) => transactionRow(tx))),
body: JSON.stringify(nonPendingTxns.map((tx) => tableRow(tx))),
}),
onProgress("Sending"),
]);
Expand Down
49 changes: 49 additions & 0 deletions src/bot/transactionTableRow.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { format, parseISO } from "date-fns";
import { systemName } from "../config.js";
import type { TransactionRow } from "../types.js";
import { normalizeCurrency } from "../utils/currency.js";

const currentDate = format(Date.now(), "yyyy-MM-dd");
const { TRANSACTION_HASH_TYPE } = process.env;

export const TableHeaders = [
"date",
"amount",
"description",
"memo",
"category",
"account",
"hash",
"comment",
"scraped at",
"scraped by",
"identifier",
"chargedCurrency",
] as const;

export type TableRow = Omit<
Record<(typeof TableHeaders)[number], string>,
"amount"
> & {
amount: number;
};

export function tableRow(tx: TransactionRow): TableRow {
return {
date: format(parseISO(tx.date), "dd/MM/yyyy", {}),
amount: tx.chargedAmount,
description: tx.description,
memo: tx.memo ?? "",
category: tx.category ?? "",
account: tx.account,
hash: TRANSACTION_HASH_TYPE === "moneyman" ? tx.uniqueId : tx.hash,
comment: "",
"scraped at": currentDate,
"scraped by": systemName,
identifier: `${tx.identifier ?? ""}`,
// Assuming the transaction is not pending, so we can use the original currency as the charged currency
chargedCurrency:
normalizeCurrency(tx.chargedCurrency) ||
normalizeCurrency(tx.originalCurrency),
};
}

0 comments on commit 54ffc57

Please sign in to comment.