Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refact: optimize stelae database by hashing composite keys; small ergonomic improvements #44

Merged
merged 7 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "stelae"
description = "A collection of tools in Rust and Python for preserving, authenticating, and accessing laws in perpetuity."
version = "0.3.0-alpha.3"
version = "0.3.0-alpha.4"
edition = "2021"
readme = "README.md"
license = "AGPL-3.0"
Expand All @@ -16,6 +16,7 @@ actix-web = "4"
actix-service = "2.0"
actix-http = "3.2"
async-trait = "0.1.77"
md-5 = "0.10.6"
mime = "0.3.17"
mime_guess = "2.0.4"
anyhow = "1.0"
Expand Down
2 changes: 1 addition & 1 deletion migrations/sqlite/20240115152953_initial_db.down.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ DROP TABLE IF EXISTS publication_has_publication_versions;
DROP TABLE IF EXISTS publication_version;
DROP TABLE IF EXISTS publication;
DROP TABLE IF EXISTS version;
DROP TABLE IF EXISTS library_document;
DROP TABLE IF EXISTS library;
DROP TABLE IF EXISTS document_element;
DROP TABLE IF EXISTS document;
DROP TABLE IF EXISTS stele;

Expand Down
103 changes: 44 additions & 59 deletions migrations/sqlite/20240115152953_initial_db.up.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,113 +7,98 @@ CREATE TABLE stele (
CREATE TABLE document (
doc_id TEXT PRIMARY KEY
);
CREATE TABLE library (
mpath TEXT PRIMARY KEY
);
CREATE TABLE library_document (
collection_mpath TEXT,
CREATE TABLE document_element (
doc_mpath TEXT,
url TEXT,
doc_id TEXT,
start DATE,
end DATE,
CONSTRAINT fk_coll_mpath
FOREIGN KEY (collection_mpath)
REFERENCES library(mpath),
CONSTRAINT fk_doc_id
FOREIGN KEY (doc_id)
REFERENCES document(doc_id),
PRIMARY KEY (collection_mpath, doc_id)
PRIMARY KEY (doc_mpath)
);
CREATE TABLE library (
mpath TEXT PRIMARY KEY,
url TEXT
);
CREATE TABLE publication (
id TEXT,
name TEXT,
date INTEGER,
stele TEXT,
revoked INTEGER,
last_valid_publication_name TEXT,
last_valid_publication_id TEXT,
last_valid_version TEXT,
CONSTRAINT fk_last_valid_version
FOREIGN KEY (last_valid_version)
REFERENCES version(codified_date),
CONSTRAINT fk_last_valid_publication
FOREIGN KEY (last_valid_publication_name, stele)
REFERENCES publication(name, stele),
FOREIGN KEY (last_valid_publication_id)
REFERENCES publication(id),
CONSTRAINT fk_stele
FOREIGN KEY (stele)
REFERENCES stele(name)
ON DELETE CASCADE,
PRIMARY KEY (name, stele)
PRIMARY KEY (id)
);
CREATE TABLE version(
codified_date TEXT PRIMARY KEY
);
CREATE TABLE publication_version (
id TEXT,
version TEXT,
publication TEXT,
stele TEXT,
publication_id TEXT,
build_reason TEXT,
CONSTRAINT fk_publication
FOREIGN KEY (publication, stele)
REFERENCES publication(name, stele)
FOREIGN KEY (publication_id)
REFERENCES publication(id)
ON DELETE CASCADE,
CONSTRAINT fk_version
FOREIGN KEY (version)
REFERENCES version(codified_date),
PRIMARY KEY (publication, version, stele)
PRIMARY KEY (id)
);
CREATE TABLE publication_has_publication_versions (
publication TEXT,
referenced_publication TEXT,
referenced_version TEXT,
stele TEXT,
CONSTRAINT fk_publication FOREIGN KEY (publication, stele) REFERENCES publication(name, stele) ON DELETE CASCADE,
CONSTRAINT fk_referenced_publication FOREIGN KEY (referenced_publication, referenced_version, stele) REFERENCES publication_version(publication, version, stele) ON DELETE CASCADE,
PRIMARY KEY (publication, referenced_publication, referenced_version, stele)
);
CREATE TABLE version(
codified_date TEXT PRIMARY KEY
publication_id TEXT,
publication_version_id TEXT,
CONSTRAINT fk_publication FOREIGN KEY (publication_id) REFERENCES publication(id) ON DELETE CASCADE,
CONSTRAINT fk_referenced_publication_version FOREIGN KEY (publication_version_id) REFERENCES publication_version(id) ON DELETE CASCADE,
PRIMARY KEY (publication_id, publication_version_id)
);
CREATE TABLE document_change (
doc_mpath TEXT,
status TEXT,
url TEXT,
id TEXT,
status INTEGER,
change_reason TEXT,
publication TEXT,
version TEXT,
stele TEXT,
doc_id TEXT,
CONSTRAINT fk_doc_id
FOREIGN KEY (doc_id)
REFERENCES document(doc_id)
publication_version_id TEXT,
doc_mpath TEXT,
CONSTRAINT fk_doc_el
FOREIGN KEY (doc_mpath)
REFERENCES document_element(doc_mpath)
ON DELETE CASCADE,
CONSTRAINT fk_publication_version
FOREIGN KEY (publication, version, stele)
REFERENCES publication_version(publication, version, stele)
FOREIGN KEY (publication_version_id)
REFERENCES publication_version(id)
ON DELETE CASCADE,
PRIMARY KEY (doc_mpath, status, publication, version, stele)
PRIMARY KEY (id)
);
CREATE INDEX document_change_doc_mpath_idx ON document_change(doc_mpath COLLATE NOCASE);
CREATE TABLE library_change (
publication TEXT,
version TEXT,
stele TEXT,
publication_version_id TEXT,
status TEXT,
library_mpath TEXT,
url TEXT,
CONSTRAINT fk_publication_version
FOREIGN KEY (publication, version, stele)
REFERENCES publication_version(publication, version, stele)
FOREIGN KEY (publication_version_id)
REFERENCES publication_version(id)
ON DELETE CASCADE,
PRIMARY KEY (publication, version, stele, library_mpath, status)
PRIMARY KEY (publication_version_id, library_mpath, status)
);
CREATE TABLE changed_library_document (
publication TEXT,
version TEXT,
stele TEXT,
doc_mpath TEXT,
status TEXT,
library_mpath TEXT,
url TEXT,
document_change_id TEXT,
CONSTRAINT fk_document_change
FOREIGN KEY (publication, version, stele, doc_mpath, status)
REFERENCES document_change(publication, version, stele, doc_mpath, status)
FOREIGN KEY (document_change_id)
REFERENCES document_change(id)
ON DELETE CASCADE,
PRIMARY KEY (publication, version, stele, library_mpath, doc_mpath, status)
PRIMARY KEY (document_change_id, library_mpath)
);
CREATE INDEX library_change_library_mpath_idx ON library_change(library_mpath COLLATE NOCASE);
CREATE INDEX changed_library_document_library_mpath_idx ON changed_library_document(library_mpath COLLATE NOCASE);
Expand Down
40 changes: 38 additions & 2 deletions src/db/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Database related module.
#![allow(clippy::unreachable)]
use async_trait::async_trait;
use sqlx::Transaction;
use std::str::FromStr;

use sqlx::any::{self, AnyPoolOptions};
Expand All @@ -12,8 +13,6 @@ use tracing::instrument;
pub mod init;
/// Models for the database.
pub mod models;
/// Statements for the database.
pub mod statements;

#[async_trait]
/// Generic Database
Expand All @@ -25,6 +24,17 @@ pub trait Db {
async fn connect(url: &str) -> anyhow::Result<DatabaseConnection>;
}

#[async_trait]
/// Generic transaction
pub trait Tx {
/// Begin a transaction.
async fn begin(pool: AnyPool) -> anyhow::Result<DatabaseTransaction>;
/// Commit a transaction.
async fn commit(self) -> anyhow::Result<()>;
/// Rollback a transaction.
async fn rollback(self) -> anyhow::Result<()>;
}

/// Type of database connection.
#[derive(Debug, Clone)]
pub enum DatabaseKind {
Expand All @@ -43,6 +53,12 @@ pub struct DatabaseConnection {
pub kind: DatabaseKind,
}

/// Database transaction.
pub struct DatabaseTransaction {
/// Database transaction.
pub tx: Transaction<'static, sqlx::Any>,
}

#[async_trait]
impl Db for DatabaseConnection {
/// Connects to a database.
Expand Down Expand Up @@ -72,3 +88,23 @@ impl Db for DatabaseConnection {
Ok(connection)
}
}

#[async_trait]
impl Tx for DatabaseTransaction {
/// Begin a transaction.
async fn begin(pool: AnyPool) -> anyhow::Result<Self> {
let tx = pool.begin().await?;
Ok(Self { tx })
}
/// Commit a transaction.
async fn commit(self) -> anyhow::Result<()> {
self.tx.commit().await?;
Ok(())
}

/// Rollback a transaction.
async fn rollback(self) -> anyhow::Result<()> {
self.tx.rollback().await?;
Ok(())
}
}
21 changes: 0 additions & 21 deletions src/db/models/changed_library_document.rs

This file was deleted.

31 changes: 31 additions & 0 deletions src/db/models/changed_library_document/manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//! Manager for the changed library document model.
use super::ChangedLibraryDocument;
use crate::db::{models::BATCH_SIZE, DatabaseTransaction};
use async_trait::async_trait;
use sqlx::QueryBuilder;

#[async_trait]
impl super::TxManager for DatabaseTransaction {
/// Upsert a bulk of changed library documents into the database.
///
/// # Errors
/// Errors if the changed library documents cannot be inserted into the database.
async fn insert_bulk(
&mut self,
changed_library_document: Vec<ChangedLibraryDocument>,
) -> anyhow::Result<()> {
let mut query_builder = QueryBuilder::new(
"INSERT OR IGNORE INTO changed_library_document ( library_mpath, document_change_id ) ",
);
for chunk in changed_library_document.chunks(BATCH_SIZE) {
query_builder.push_values(chunk, |mut bindings, cl| {
bindings
.push_bind(&cl.library_mpath)
.push_bind(&cl.document_change_id);
});
let query = query_builder.build();
query.execute(&mut *self.tx).await?;
}
Ok(())
}
}
34 changes: 34 additions & 0 deletions src/db/models/changed_library_document/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use async_trait::async_trait;
use serde::{Deserialize, Serialize};

pub mod manager;

/// Trait for managing transactional changed library documents.
#[async_trait]
pub trait TxManager {
/// Insert bulk of changed library documents.
async fn insert_bulk(
&mut self,
changed_library_document: Vec<ChangedLibraryDocument>,
) -> anyhow::Result<()>;
}

#[derive(sqlx::FromRow, Deserialize, Serialize)]
/// Model for library (collection) change events.
pub struct ChangedLibraryDocument {
/// Foreign key reference to `document_change` id.
pub document_change_id: String,
/// Materialized path to the library
pub library_mpath: String,
}

impl ChangedLibraryDocument {
/// Create a new library change.
#[must_use]
pub const fn new(document_change_id: String, library_mpath: String) -> Self {
Self {
document_change_id,
library_mpath,
}
}
}
8 changes: 0 additions & 8 deletions src/db/models/document.rs

This file was deleted.

23 changes: 23 additions & 0 deletions src/db/models/document/manager.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//! Manager for the document model.
use crate::db::DatabaseTransaction;
use async_trait::async_trait;

#[async_trait]
impl super::TxManager for DatabaseTransaction {
/// Upsert a new document into the database.
///
/// # Errors
/// Errors if the document cannot be inserted into the database.
async fn create(&mut self, doc_id: &str) -> anyhow::Result<Option<i64>> {
let statement = "
INSERT OR IGNORE INTO document ( doc_id )
VALUES ( $1 )
";
let id = sqlx::query(statement)
.bind(doc_id)
.execute(&mut *self.tx)
.await?
.last_insert_id();
Ok(id)
}
}
Loading
Loading