Skip to content

Commit

Permalink
blob erasure is not synced, need to clean them up before each content…
Browse files Browse the repository at this point in the history
… hash check
  • Loading branch information
zadam committed Jul 27, 2023
1 parent 8edb542 commit ce3834e
Show file tree
Hide file tree
Showing 8 changed files with 201 additions and 183 deletions.
4 changes: 2 additions & 2 deletions db/migrations/0210__consistency_checks.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ module.exports = async () => {
const beccaLoader = require("../../src/becca/becca_loader");
const log = require("../../src/services/log");
const consistencyChecks = require("../../src/services/consistency_checks");
const noteService = require("../../src/services/notes");
const eraseService = require("../../src/services/erase");

await cls.init(async () => {
// precaution for the 0211 migration
noteService.eraseDeletedNotesNow();
eraseService.eraseDeletedNotesNow();

beccaLoader.load();

Expand Down
2 changes: 2 additions & 0 deletions src/becca/entities/abstract_becca_entity.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ class AbstractBeccaEntity {
}

sql.execute("DELETE FROM blobs WHERE blobId = ?", [oldBlobId]);
// blobs are not marked as erased in entity_changes, they are just purged completely
// this is because technically every keystroke can create a new blob and there would be just too many
sql.execute("DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId = ?", [oldBlobId]);
}

Expand Down
4 changes: 2 additions & 2 deletions src/routes/api/branches.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const sql = require('../../services/sql');
const utils = require('../../services/utils');
const entityChangesService = require('../../services/entity_changes');
const treeService = require('../../services/tree');
const noteService = require('../../services/notes');
const eraseService = require('../../services/erase');
const becca = require('../../becca/becca');
const TaskContext = require('../../services/task_context');
const branchService = require("../../services/branches");
Expand Down Expand Up @@ -193,7 +193,7 @@ function deleteBranch(req) {
if (eraseNotes) {
// erase automatically means deleting all clones + note itself
branch.getNote().deleteNote(deleteId, taskContext);
noteService.eraseNotesWithDeleteId(deleteId);
eraseService.eraseNotesWithDeleteId(deleteId);
noteDeleted = true;
} else {
noteDeleted = branch.deleteBranch(deleteId, taskContext);
Expand Down
7 changes: 4 additions & 3 deletions src/routes/api/notes.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"use strict";

const noteService = require('../../services/notes');
const eraseService = require('../../services/erase');
const treeService = require('../../services/tree');
const sql = require('../../services/sql');
const utils = require('../../services/utils');
Expand Down Expand Up @@ -65,7 +66,7 @@ function deleteNote(req) {
note.deleteNote(deleteId, taskContext);

if (eraseNotes) {
noteService.eraseNotesWithDeleteId(deleteId);
eraseService.eraseNotesWithDeleteId(deleteId);
}

if (last) {
Expand Down Expand Up @@ -150,11 +151,11 @@ function duplicateSubtree(req) {
}

function eraseDeletedNotesNow() {
noteService.eraseDeletedNotesNow();
eraseService.eraseDeletedNotesNow();
}

function eraseUnusedAttachmentsNow() {
noteService.eraseUnusedAttachmentsNow();
eraseService.eraseUnusedAttachmentsNow();
}

function getDeleteNotesPreview(req) {
Expand Down
4 changes: 4 additions & 0 deletions src/services/content_hash.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
const sql = require('./sql');
const utils = require('./utils');
const log = require('./log');
const eraseService = require("./erase");

function getEntityHashes() {
// blob erasure is not synced, we should check before each sync if there's some blob to erase
eraseService.eraseUnusedBlobs();

const startTime = new Date();

const hashRows = sql.getRawRows(`
Expand Down
2 changes: 1 addition & 1 deletion src/services/entity_changes.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function addEntityChangesForSector(entityName, sector) {
}
});

log.info(`Added sector ${sector} of '${entityName}' to sync queue in ${Date.now() - startTime}ms.`);
log.info(`Added sector ${sector} of '${entityName}' (${entityChanges.length} entities) to sync queue in ${Date.now() - startTime}ms.`);
}

function cleanupEntityChangesForMissingEntities(entityName, entityPrimaryKey) {
Expand Down
186 changes: 186 additions & 0 deletions src/services/erase.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
const sql = require("./sql.js");
const revisionService = require("./revisions.js");
const log = require("./log.js");
const entityChangesService = require("./entity_changes.js");
const optionService = require("./options.js");
const dateUtils = require("./date_utils.js");
const sqlInit = require("./sql_init.js");
const cls = require("./cls.js");

function eraseNotes(noteIdsToErase) {
if (noteIdsToErase.length === 0) {
return;
}

sql.executeMany(`DELETE FROM notes WHERE noteId IN (???)`, noteIdsToErase);
setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'notes' AND entityId IN (???)`, noteIdsToErase));

// we also need to erase all "dependent" entities of the erased notes
const branchIdsToErase = sql.getManyRows(`SELECT branchId FROM branches WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.branchId);

eraseBranches(branchIdsToErase);

const attributeIdsToErase = sql.getManyRows(`SELECT attributeId FROM attributes WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.attributeId);

eraseAttributes(attributeIdsToErase);

const revisionIdsToErase = sql.getManyRows(`SELECT revisionId FROM revisions WHERE noteId IN (???)`, noteIdsToErase)
.map(row => row.revisionId);

revisionService.eraseRevisions(revisionIdsToErase);

log.info(`Erased notes: ${JSON.stringify(noteIdsToErase)}`);
}

function setEntityChangesAsErased(entityChanges) {
for (const ec of entityChanges) {
ec.isErased = true;

entityChangesService.addEntityChange(ec);
}
}

function eraseBranches(branchIdsToErase) {
if (branchIdsToErase.length === 0) {
return;
}

sql.executeMany(`DELETE FROM branches WHERE branchId IN (???)`, branchIdsToErase);

setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'branches' AND entityId IN (???)`, branchIdsToErase));

log.info(`Erased branches: ${JSON.stringify(branchIdsToErase)}`);
}

function eraseAttributes(attributeIdsToErase) {
if (attributeIdsToErase.length === 0) {
return;
}

sql.executeMany(`DELETE FROM attributes WHERE attributeId IN (???)`, attributeIdsToErase);

setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attributes' AND entityId IN (???)`, attributeIdsToErase));

log.info(`Erased attributes: ${JSON.stringify(attributeIdsToErase)}`);
}

function eraseAttachments(attachmentIdsToErase) {
if (attachmentIdsToErase.length === 0) {
return;
}

sql.executeMany(`DELETE FROM attachments WHERE attachmentId IN (???)`, attachmentIdsToErase);

setEntityChangesAsErased(sql.getManyRows(`SELECT * FROM entity_changes WHERE entityName = 'attachments' AND entityId IN (???)`, attachmentIdsToErase));

log.info(`Erased attachments: ${JSON.stringify(attachmentIdsToErase)}`);
}

function eraseUnusedBlobs() {
const unusedBlobIds = sql.getColumn(`
SELECT blobs.blobId
FROM blobs
LEFT JOIN notes ON notes.blobId = blobs.blobId
LEFT JOIN attachments ON attachments.blobId = blobs.blobId
LEFT JOIN revisions ON revisions.blobId = blobs.blobId
WHERE notes.noteId IS NULL
AND attachments.attachmentId IS NULL
AND revisions.revisionId IS NULL`);

if (unusedBlobIds.length === 0) {
return;
}

sql.executeMany(`DELETE FROM blobs WHERE blobId IN (???)`, unusedBlobIds);
// blobs are not marked as erased in entity_changes, they are just purged completely
// this is because technically every keystroke can create a new blob and there would be just too many
sql.executeMany(`DELETE FROM entity_changes WHERE entityName = 'blobs' AND entityId IN (???)`, unusedBlobIds);

log.info(`Erased unused blobs: ${JSON.stringify(unusedBlobIds)}`);
}

function eraseDeletedEntities(eraseEntitiesAfterTimeInSeconds = null) {
// this is important also so that the erased entity changes are sent to the connected clients
sql.transactional(() => {
if (eraseEntitiesAfterTimeInSeconds === null) {
eraseEntitiesAfterTimeInSeconds = optionService.getOptionInt('eraseEntitiesAfterTimeInSeconds');
}

const cutoffDate = new Date(Date.now() - eraseEntitiesAfterTimeInSeconds * 1000);

const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);

eraseNotes(noteIdsToErase);

const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);

eraseBranches(branchIdsToErase);

const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);

eraseAttributes(attributeIdsToErase);

const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND utcDateModified <= ?", [dateUtils.utcDateTimeStr(cutoffDate)]);

eraseAttachments(attachmentIdsToErase);

eraseUnusedBlobs();
});
}

function eraseNotesWithDeleteId(deleteId) {
const noteIdsToErase = sql.getColumn("SELECT noteId FROM notes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);

eraseNotes(noteIdsToErase);

const branchIdsToErase = sql.getColumn("SELECT branchId FROM branches WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);

eraseBranches(branchIdsToErase);

const attributeIdsToErase = sql.getColumn("SELECT attributeId FROM attributes WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);

eraseAttributes(attributeIdsToErase);

const attachmentIdsToErase = sql.getColumn("SELECT attachmentId FROM attachments WHERE isDeleted = 1 AND deleteId = ?", [deleteId]);

eraseAttachments(attachmentIdsToErase);

eraseUnusedBlobs();
}

function eraseDeletedNotesNow() {
eraseDeletedEntities(0);
}

function eraseUnusedAttachmentsNow() {
eraseScheduledAttachments(0);
}

function eraseScheduledAttachments(eraseUnusedAttachmentsAfterSeconds = null) {
if (eraseUnusedAttachmentsAfterSeconds === null) {
eraseUnusedAttachmentsAfterSeconds = optionService.getOptionInt('eraseUnusedAttachmentsAfterSeconds');
}

const cutOffDate = dateUtils.utcDateTimeStr(new Date(Date.now() - (eraseUnusedAttachmentsAfterSeconds * 1000)));
const attachmentIdsToErase = sql.getColumn('SELECT attachmentId FROM attachments WHERE utcDateScheduledForErasureSince < ?', [cutOffDate]);

eraseAttachments(attachmentIdsToErase);
}

sqlInit.dbReady.then(() => {
// first cleanup kickoff 5 minutes after startup
setTimeout(cls.wrap(() => eraseDeletedEntities()), 5 * 60 * 1000);
setTimeout(cls.wrap(() => eraseScheduledAttachments()), 6 * 60 * 1000);

setInterval(cls.wrap(() => eraseDeletedEntities()), 4 * 3600 * 1000);
setInterval(cls.wrap(() => eraseScheduledAttachments()), 3600 * 1000);
});

module.exports = {
eraseDeletedNotesNow,
eraseUnusedAttachmentsNow,
eraseNotesWithDeleteId,
eraseUnusedBlobs
};
Loading

0 comments on commit ce3834e

Please sign in to comment.