From 851267caebc718c0816ef0b5ab1fd017a9f9d60b Mon Sep 17 00:00:00 2001 From: n-dusan Date: Thu, 20 Jun 2024 13:48:39 +0200 Subject: [PATCH 1/6] fix(rdf): reset `QueryBuilder` and log rdf triples in errors --- src/db/models/changed_library_document/manager.rs | 1 + src/db/models/library/manager.rs | 1 + src/db/models/library_change/manager.rs | 1 + src/history/rdf/graph.rs | 4 +++- 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/db/models/changed_library_document/manager.rs b/src/db/models/changed_library_document/manager.rs index 0ab29f2..68ec188 100644 --- a/src/db/models/changed_library_document/manager.rs +++ b/src/db/models/changed_library_document/manager.rs @@ -25,6 +25,7 @@ impl super::TxManager for DatabaseTransaction { }); let query = query_builder.build(); query.execute(&mut *self.tx).await?; + query_builder.reset(); } Ok(()) } diff --git a/src/db/models/library/manager.rs b/src/db/models/library/manager.rs index 3197beb..c343256 100644 --- a/src/db/models/library/manager.rs +++ b/src/db/models/library/manager.rs @@ -44,6 +44,7 @@ impl super::TxManager for DatabaseTransaction { }); let query = query_builder.build(); query.execute(&mut *self.tx).await?; + query_builder.reset(); } Ok(()) } diff --git a/src/db/models/library_change/manager.rs b/src/db/models/library_change/manager.rs index 03a859c..1f83e29 100644 --- a/src/db/models/library_change/manager.rs +++ b/src/db/models/library_change/manager.rs @@ -85,6 +85,7 @@ impl super::TxManager for DatabaseTransaction { }); let query = query_builder.build(); query.execute(&mut *self.tx).await?; + query_builder.reset(); } Ok(()) } diff --git a/src/history/rdf/graph.rs b/src/history/rdf/graph.rs index ba838ed..6f9e53e 100644 --- a/src/history/rdf/graph.rs +++ b/src/history/rdf/graph.rs @@ -115,7 +115,9 @@ impl StelaeGraph { let triple = self .triples_matching_inner(subject, predicate, object) .next() - .context("Expected to find triple matching")?; + .context(format!( + "Expected to find triple matching s={subject:?}, p={predicate:?}, o={object:?}" + ))?; Ok(triple?) } From 252f80b70381590821792b843bc3e21deaa0bd10 Mon Sep 17 00:00:00 2001 From: n-dusan Date: Thu, 27 Jun 2024 15:47:34 +0200 Subject: [PATCH 2/6] fix: add `stele` attribute to `library` model The query on the `library` model that resolves `url` to `mpath` needs to also include the stele that we're querying for. --- migrations/sqlite/20240115152953_initial_db.up.sql | 4 +++- src/db/models/library/manager.rs | 13 +++++++++---- src/db/models/library/mod.rs | 8 +++++--- src/history/changes.rs | 6 +++++- src/server/api/versions/mod.rs | 2 +- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/migrations/sqlite/20240115152953_initial_db.up.sql b/migrations/sqlite/20240115152953_initial_db.up.sql index a681473..8d10d78 100644 --- a/migrations/sqlite/20240115152953_initial_db.up.sql +++ b/migrations/sqlite/20240115152953_initial_db.up.sql @@ -18,7 +18,9 @@ CREATE TABLE document_element ( ); CREATE TABLE library ( mpath TEXT PRIMARY KEY, - url TEXT + url TEXT, + stele TEXT, + CONSTRAINT fk_stele FOREIGN KEY (stele) REFERENCES stele(name) ON DELETE CASCADE ); CREATE TABLE publication ( id TEXT, diff --git a/src/db/models/library/manager.rs b/src/db/models/library/manager.rs index c343256..bba67e9 100644 --- a/src/db/models/library/manager.rs +++ b/src/db/models/library/manager.rs @@ -10,11 +10,11 @@ impl super::Manager for DatabaseConnection { /// /// # Errors /// Errors if can't establish a connection to the database. - async fn find_lib_mpath_by_url(&self, url: &str) -> anyhow::Result { + async fn find_lib_mpath_by_url(&self, url: &str, stele: &str) -> anyhow::Result { let statement = " SELECT l.mpath FROM library l - WHERE l.url = $1 + WHERE l.url = $1 AND l.stele = $2 LIMIT 1 "; let row = match self.kind { @@ -22,6 +22,7 @@ impl super::Manager for DatabaseConnection { let mut connection = self.pool.acquire().await?; sqlx::query_as::<_, (String,)>(statement) .bind(url) + .bind(stele) .fetch_one(&mut *connection) .await? } @@ -37,10 +38,14 @@ impl super::TxManager for DatabaseTransaction { /// # Errors /// Errors if the libraries cannot be inserted into the database. async fn insert_bulk(&mut self, libraries: Vec) -> anyhow::Result<()> { - let mut query_builder = QueryBuilder::new("INSERT OR IGNORE INTO library ( mpath, url ) "); + let mut query_builder = + QueryBuilder::new("INSERT OR IGNORE INTO library ( mpath, url, stele ) "); for chunk in libraries.chunks(BATCH_SIZE) { query_builder.push_values(chunk, |mut bindings, lb| { - bindings.push_bind(&lb.mpath).push_bind(&lb.url); + bindings + .push_bind(&lb.mpath) + .push_bind(&lb.url) + .push_bind(&lb.stele); }); let query = query_builder.build(); query.execute(&mut *self.tx).await?; diff --git a/src/db/models/library/mod.rs b/src/db/models/library/mod.rs index e1cf49f..ebab47e 100644 --- a/src/db/models/library/mod.rs +++ b/src/db/models/library/mod.rs @@ -7,7 +7,7 @@ pub mod manager; #[async_trait] pub trait Manager { /// Find one library materialized path by url. - async fn find_lib_mpath_by_url(&self, url: &str) -> anyhow::Result; + async fn find_lib_mpath_by_url(&self, url: &str, stele: &str) -> anyhow::Result; } /// Trait for managing transactions on publication versions. @@ -24,12 +24,14 @@ pub struct Library { pub mpath: String, /// Url to the collection. pub url: String, + /// Reference to the stele. + pub stele: String, } impl Library { /// Create a new library. #[must_use] - pub const fn new(mpath: String, url: String) -> Self { - Self { mpath, url } + pub const fn new(mpath: String, url: String, stele: String) -> Self { + Self { mpath, url, stele } } } diff --git a/src/history/changes.rs b/src/history/changes.rs index a8d1950..f9ac3dc 100644 --- a/src/history/changes.rs +++ b/src/history/changes.rs @@ -425,7 +425,11 @@ async fn insert_library_changes( let el_status = pub_graph.literal_from_triple_matching(Some(version), Some(oll::status), None)?; let library_status = Status::from_string(&el_status)?; - library_bulk.push(Library::new(library_mpath.clone(), url.clone())); + library_bulk.push(Library::new( + library_mpath.clone(), + url.clone(), + publication.stele.clone(), + )); let pub_version_hash = md5::compute(publication.name.clone() + &codified_date + &publication.stele); library_changes_bulk.push(LibraryChange::new( diff --git a/src/server/api/versions/mod.rs b/src/server/api/versions/mod.rs index 05e4c15..8433cf3 100644 --- a/src/server/api/versions/mod.rs +++ b/src/server/api/versions/mod.rs @@ -169,7 +169,7 @@ async fn publication_versions( .unwrap_or_default(); versions = doc_versions.into_iter().map(Into::into).collect(); } else { - let lib_mpath = library::Manager::find_lib_mpath_by_url(db, &url).await; + let lib_mpath = library::Manager::find_lib_mpath_by_url(db, &url, &publication.stele).await; if let Ok(mpath) = lib_mpath { let coll_versions = library_change::Manager::find_all_collection_versions_by_mpath_and_publication( From 8fadcb755c0d9e54b76eca7ff4d4e20e35148103 Mon Sep 17 00:00:00 2001 From: n-dusan Date: Thu, 27 Jun 2024 15:48:39 +0200 Subject: [PATCH 3/6] fix(rdf): skip versions which don't have a `hasChanges` attribute --- src/history/changes.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/history/changes.rs b/src/history/changes.rs index f9ac3dc..a1eb522 100644 --- a/src/history/changes.rs +++ b/src/history/changes.rs @@ -351,8 +351,11 @@ async fn insert_document_changes( let doc_id = pub_graph.literal_from_triple_matching(Some(version), Some(oll::docId), None)?; document::TxManager::create(tx, &doc_id).await?; - let changes_uri = - pub_graph.iri_from_triple_matching(Some(version), Some(oll::hasChanges), None)?; + let Ok(changes_uri) = + pub_graph.iri_from_triple_matching(Some(version), Some(oll::hasChanges), None) + else { + continue; + }; let changes = Bag::new(pub_graph, changes_uri); for change in changes.items()? { let doc_mpath = pub_graph.literal_from_triple_matching( From 4aa463efeb0be094d31b524ef2b3cffde761a842 Mon Sep 17 00:00:00 2001 From: n-dusan Date: Sat, 29 Jun 2024 20:03:50 +0200 Subject: [PATCH 4/6] fix(db): add more indexes to optimize versions endpoint performance For document or collection versions we want the query times to be really fast. Since for both types of queries we do LEFT JOINS to retrieve relevant versions, index the `publication_version` and `publication_has_publication_versions` ids. Add a couple of additional indexes for the `document_change` and `library_change` tables. --- migrations/sqlite/20240115152953_initial_db.down.sql | 10 ++++++++++ migrations/sqlite/20240115152953_initial_db.up.sql | 12 ++++++++++++ 2 files changed, 22 insertions(+) diff --git a/migrations/sqlite/20240115152953_initial_db.down.sql b/migrations/sqlite/20240115152953_initial_db.down.sql index f6141c9..9f8cf7a 100644 --- a/migrations/sqlite/20240115152953_initial_db.down.sql +++ b/migrations/sqlite/20240115152953_initial_db.down.sql @@ -2,7 +2,17 @@ PRAGMA foreign_keys = OFF; DROP INDEX IF EXISTS changed_library_document_library_mpath_idx; +DROP INDEX IF EXISTS library_change_status_idx; +DROP INDEX IF EXISTS library_change_publication_version_idx; DROP INDEX IF EXISTS library_change_library_mpath_idx; + +DROP INDEX IF EXISTS publication_version_id_idx; + +DROP INDEX IF EXISTS publication_has_publication_versions_publication_version_id_idx; +DROP INDEX IF EXISTS publication_has_publication_versions_publication_id_idx; + +DROP INDEX IF EXISTS document_change_doc_mpath_pub_id_status_idx; +DROP INDEX IF EXISTS document_change_publication_version_idx; DROP INDEX IF EXISTS document_change_doc_mpath_idx; DROP TABLE IF EXISTS changed_library_document; diff --git a/migrations/sqlite/20240115152953_initial_db.up.sql b/migrations/sqlite/20240115152953_initial_db.up.sql index 8d10d78..e70b0f8 100644 --- a/migrations/sqlite/20240115152953_initial_db.up.sql +++ b/migrations/sqlite/20240115152953_initial_db.up.sql @@ -82,7 +82,16 @@ CREATE TABLE document_change ( ON DELETE CASCADE, PRIMARY KEY (id) ); + CREATE INDEX document_change_doc_mpath_idx ON document_change(doc_mpath COLLATE NOCASE); +CREATE INDEX document_change_publication_version_idx ON document_change(publication_version_id); +CREATE INDEX document_change_doc_mpath_pub_id_status_idx ON document_change(doc_mpath COLLATE NOCASE, status); + +CREATE INDEX publication_has_publication_versions_publication_id_idx ON publication_has_publication_versions(publication_id); +CREATE INDEX publication_has_publication_versions_publication_version_id_idx ON publication_has_publication_versions(publication_version_id); + +CREATE INDEX publication_version_id_idx ON publication_version(id); + CREATE TABLE library_change ( publication_version_id TEXT, status TEXT, @@ -103,6 +112,9 @@ CREATE TABLE changed_library_document ( PRIMARY KEY (document_change_id, library_mpath) ); CREATE INDEX library_change_library_mpath_idx ON library_change(library_mpath COLLATE NOCASE); +CREATE INDEX library_change_publication_version_idx ON library_change(publication_version_id); +CREATE INDEX library_change_status_idx ON library_change(library_mpath COLLATE NOCASE, status); + CREATE INDEX changed_library_document_library_mpath_idx ON changed_library_document(library_mpath COLLATE NOCASE); PRAGMA optimize; \ No newline at end of file From ac6b0afc8d53f4746b8b387ad4c72b0c53f50294 Mon Sep 17 00:00:00 2001 From: n-dusan Date: Mon, 1 Jul 2024 18:52:49 +0200 Subject: [PATCH 5/6] fix(db): map document change urls to stele It can happen (although highly unlikely) that urls aren't unique. In those cases, resolving the document_mpath will not resolve to the correct stele. To resolve, add a `stele` column to `document_element`, the same as we did for `library` model. --- migrations/sqlite/20240115152953_initial_db.up.sql | 4 +++- src/db/models/document_element/manager.rs | 13 ++++++++----- src/db/models/document_element/mod.rs | 7 +++++-- src/history/changes.rs | 1 + src/server/api/versions/mod.rs | 3 ++- 5 files changed, 19 insertions(+), 9 deletions(-) diff --git a/migrations/sqlite/20240115152953_initial_db.up.sql b/migrations/sqlite/20240115152953_initial_db.up.sql index e70b0f8..854e6a7 100644 --- a/migrations/sqlite/20240115152953_initial_db.up.sql +++ b/migrations/sqlite/20240115152953_initial_db.up.sql @@ -11,10 +11,12 @@ CREATE TABLE document_element ( doc_mpath TEXT, url TEXT, doc_id TEXT, + stele TEXT, CONSTRAINT fk_doc_id FOREIGN KEY (doc_id) REFERENCES document(doc_id), - PRIMARY KEY (doc_mpath) + PRIMARY KEY (doc_mpath), + CONSTRAINT fk_stele FOREIGN KEY (stele) REFERENCES stele(name) ON DELETE CASCADE ); CREATE TABLE library ( mpath TEXT PRIMARY KEY, diff --git a/src/db/models/document_element/manager.rs b/src/db/models/document_element/manager.rs index c4e213c..3b7e991 100644 --- a/src/db/models/document_element/manager.rs +++ b/src/db/models/document_element/manager.rs @@ -12,11 +12,11 @@ impl super::Manager for DatabaseConnection { /// /// # Errors /// Errors if can't establish a connection to the database. - async fn find_doc_mpath_by_url(&self, url: &str) -> anyhow::Result { + async fn find_doc_mpath_by_url(&self, url: &str, stele: &str) -> anyhow::Result { let statement = " SELECT de.doc_mpath FROM document_element de - WHERE de.url = $1 + WHERE de.url = $1 AND de.stele = $2 LIMIT 1 "; let row = match self.kind { @@ -24,6 +24,7 @@ impl super::Manager for DatabaseConnection { let mut connection = self.pool.acquire().await?; sqlx::query_as::<_, (String,)>(statement) .bind(url) + .bind(stele) .fetch_one(&mut *connection) .await? } @@ -39,14 +40,16 @@ impl super::TxManager for DatabaseTransaction { /// # Errors /// Errors if the document elements cannot be inserted into the database. async fn insert_bulk(&mut self, document_elements: Vec) -> anyhow::Result<()> { - let mut query_builder = - QueryBuilder::new("INSERT OR IGNORE INTO document_element ( doc_mpath, url, doc_id ) "); + let mut query_builder = QueryBuilder::new( + "INSERT OR IGNORE INTO document_element ( doc_mpath, url, doc_id, stele ) ", + ); for chunk in document_elements.chunks(BATCH_SIZE) { query_builder.push_values(chunk, |mut bindings, de| { bindings .push_bind(&de.doc_mpath) .push_bind(&de.url) - .push_bind(&de.doc_id); + .push_bind(&de.doc_id) + .push_bind(&de.stele); }); let query = query_builder.build(); query.execute(&mut *self.tx).await?; diff --git a/src/db/models/document_element/mod.rs b/src/db/models/document_element/mod.rs index fa03832..2d989f3 100644 --- a/src/db/models/document_element/mod.rs +++ b/src/db/models/document_element/mod.rs @@ -7,7 +7,7 @@ pub mod manager; #[async_trait] pub trait Manager { /// Find one document materialized path by url. - async fn find_doc_mpath_by_url(&self, url: &str) -> anyhow::Result; + async fn find_doc_mpath_by_url(&self, url: &str, stele: &str) -> anyhow::Result; } /// Trait for managing transactional document elements. @@ -26,16 +26,19 @@ pub struct DocumentElement { pub url: String, /// Unique document identifier. pub doc_id: String, + /// Reference to the stele. + pub stele: String, } impl DocumentElement { /// Create a new document element. #[must_use] - pub const fn new(doc_mpath: String, url: String, doc_id: String) -> Self { + pub const fn new(doc_mpath: String, url: String, doc_id: String, stele: String) -> Self { Self { doc_mpath, url, doc_id, + stele, } } } diff --git a/src/history/changes.rs b/src/history/changes.rs index a1eb522..355931e 100644 --- a/src/history/changes.rs +++ b/src/history/changes.rs @@ -369,6 +369,7 @@ async fn insert_document_changes( doc_mpath.clone(), url.clone(), doc_id.clone(), + publication.stele.clone(), )); let reason = pub_graph .literal_from_triple_matching(Some(&change), Some(oll::reason), None) diff --git a/src/server/api/versions/mod.rs b/src/server/api/versions/mod.rs index 8433cf3..a26f148 100644 --- a/src/server/api/versions/mod.rs +++ b/src/server/api/versions/mod.rs @@ -157,7 +157,8 @@ async fn publication_versions( ) -> Vec { tracing::debug!("Fetching publication versions for '{url}'"); let mut versions = vec![]; - let doc_mpath = document_element::Manager::find_doc_mpath_by_url(db, &url).await; + let doc_mpath = + document_element::Manager::find_doc_mpath_by_url(db, &url, &publication.stele).await; if let Ok(mpath) = doc_mpath { let doc_versions = document_change::Manager::find_all_document_versions_by_mpath_and_publication( From ad7c12ab2ab2692069120271d095472bbdaa076e Mon Sep 17 00:00:00 2001 From: n-dusan Date: Mon, 1 Jul 2024 18:57:21 +0200 Subject: [PATCH 6/6] chore: bump pre-release version --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d036e9d..12d1ead 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3110,7 +3110,7 @@ dependencies = [ [[package]] name = "stelae" -version = "0.3.0-alpha.4" +version = "0.3.0-alpha.5" dependencies = [ "actix-http", "actix-service", diff --git a/Cargo.toml b/Cargo.toml index c7956d4..eb13f82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "stelae" description = "A collection of tools in Rust and Python for preserving, authenticating, and accessing laws in perpetuity." -version = "0.3.0-alpha.4" +version = "0.3.0-alpha.5" edition = "2021" readme = "README.md" license = "AGPL-3.0"