diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dc239da9..bee3a970a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) - Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) +- Blobs directory configuration [#549](https://github.com/p2panda/aquadoggo/pull/549) ### Changed diff --git a/README.md b/README.md index d65e45fcb..9ae74aac3 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ If you are not working with Rust you can create FFI bindings from the `aquadoggo As an application developer the interface to `aquadoggo` you are likely to use the most is the GraphQL query API. For whichever schema your node supports a custom query api is generated, you use this to fetch data into your app. Results from a collection query can be paginated, filtered. -Fetch one "mushroom" by it's id, returning values for only the selected fields: +Fetch one "mushroom" by its id, returning values for only the selected fields: ```graphql { mushroom: mushroom_0020c3accb0b0c8822ecc0309190e23de5f7f6c82f660ce08023a1d74e055a3d7c4d( diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 9b1f72b13..0cf5acaec 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -68,7 +68,6 @@ sqlx = { version = "0.6.1", features = [ "sqlite", "runtime-tokio-rustls", ] } -tempfile = "3.7.0" thiserror = "1.0.39" tokio = { version = "1.28.2", features = [ "macros", @@ -112,5 +111,6 @@ rstest = "0.15.0" rstest_reuse = "0.3.0" serde_bytes = "0.11.12" serde_json = "1.0.85" +tempfile = "3.7.0" tower = "0.4.13" tower-service = "0.3.2" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 4614ccecf..eba85da0a 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -6,9 +6,6 @@ use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; -/// Blobs directory name. -pub const BLOBS_DIR_NAME: &str = "blobs"; - /// Configuration object holding all important variables throughout the application. #[derive(Debug, Clone)] pub struct Configuration { @@ -26,9 +23,6 @@ pub struct Configuration { /// _not_ recommended for production settings. pub allow_schema_ids: AllowList, - /// Path to blobs directory. - pub blob_dir: Option, - /// URL / connection string to PostgreSQL or SQLite database. pub database_url: String, @@ -44,6 +38,12 @@ pub struct Configuration { /// 2020. pub http_port: u16, + /// Path to folder where blobs (binary files) are kept and served from. + /// + /// **Warning**: When set to a temporary directory, make sure that also the database itself is + /// not persisted, otherwise you will run into data inconsistencies. + pub blobs_base_path: PathBuf, + /// Number of concurrent workers which defines the maximum of materialization tasks which can /// be worked on simultaneously. /// @@ -59,10 +59,10 @@ impl Default for Configuration { fn default() -> Self { Self { allow_schema_ids: AllowList::Wildcard, - blob_dir: None, database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, + blobs_base_path: PathBuf::new(), worker_pool_size: 16, network: NetworkConfiguration::default(), } diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index 73f1153a3..a33b33081 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -21,7 +21,7 @@ const BLOB_QUERY_PAGE_SIZE: u64 = 10; pub type BlobData = Vec; impl SqlStore { - /// Get the data for one blob from the store, identified by it's document id. + /// Get the data for one blob from the store, identified by its document id. pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { // Get the root blob document let blob_document = match self.get_document(id).await? { @@ -36,7 +36,7 @@ impl SqlStore { document_to_blob_data(self, blob_document).await } - /// Get the data for one blob from the store, identified by it's document view id. + /// Get the data for one blob from the store, identified by its document view id. pub async fn get_blob_by_view_id( &self, view_id: &DocumentViewId, @@ -62,7 +62,7 @@ impl SqlStore { // If there are no documents referring to the blob then we continue with the purge. if blob_reverse_relations.is_empty() { - // Collect the document view ids of all pieces this blob has ever referred to in it's + // Collect the document view ids of all pieces this blob has ever referred to in its // `pieces` let blob_piece_ids: Vec = query_scalar( " diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 6aa16d3e0..6df98d61d 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -9,7 +9,7 @@ //! themselves. On completion, the resultant documents are stored and can be retrieved using the //! methods defined here. //! -//! The whole document store can be seen as a live cache. All it's content is derived from +//! The whole document store can be seen as a live cache. All its content is derived from //! operations already stored on the node. It allows easy and quick access to current or pinned //! values. //! @@ -23,12 +23,11 @@ //! state, we call these states document views. When a document is updated it gets a new state, or //! view, which can be referred to by a globally unique document view id. //! -//! The getter methods allow retrieving a document by it's `DocumentId` or it's -//! `DocumentViewId`. The former always returns the most current document state, the latter -//! returns the specific document view if it has already been materialised and stored. Although it -//! is possible to construct a document at any point in it's history if all operations are -//! retained, we use a system of "pinned relations" to identify and materialise only views we -//! explicitly wish to keep. +//! The getter methods allow retrieving a document by its `DocumentId` or its `DocumentViewId`. The +//! former always returns the most current document state, the latter returns the specific document +//! view if it has already been materialised and stored. Although it is possible to construct a +//! document at any point in its history if all operations are retained, we use a system of "pinned +//! relations" to identify and materialise only views we explicitly wish to keep. use async_trait::async_trait; use log::debug; use p2panda_rs::document::traits::AsDocument; @@ -49,9 +48,9 @@ use crate::db::SqlStore; impl DocumentStore for SqlStore { type Document = StorageDocument; - /// Get a document from the store by it's `DocumentId`. + /// Get a document from the store by its `DocumentId`. /// - /// Retrieves a document in it's most current state from the store. Ignores documents which + /// Retrieves a document in its most current state from the store. Ignores documents which /// contain a DELETE operation. /// /// An error is returned only if a fatal database error occurs. @@ -113,7 +112,7 @@ impl DocumentStore for SqlStore { /// Get a document from the database by `DocumentViewId`. /// - /// Get's a document at a specific point in it's history. Only returns views that have already + /// Get's a document at a specific point in its history. Only returns views that have already /// been materialised and persisted in the store. These are likely to be "pinned views" which /// are relations from other documents, in which case the materialiser service will have /// identified and materialised them ready for querying. @@ -276,7 +275,7 @@ impl SqlStore { /// current view and field values into the `document_views` and `document_view_fields` tables /// respectively. /// - /// If the document already existed in the store then it's current view and view id will be + /// If the document already existed in the store then its current view and view id will be /// updated with those contained on the passed document. /// /// If any of the operations fail all insertions are rolled back. @@ -374,11 +373,11 @@ impl SqlStore { ) -> Result, DocumentStorageError> { let document_view_ids: Vec = query_scalar( " - SELECT + SELECT document_views.document_view_id - FROM + FROM document_views - WHERE + WHERE document_views.document_id = $1 ", ) @@ -404,18 +403,18 @@ impl SqlStore { ) -> Result, DocumentStorageError> { let document_view_ids: Vec = query_scalar( " - SELECT DISTINCT + SELECT DISTINCT document_views.document_id FROM document_views - WHERE - document_views.document_view_id + WHERE + document_views.document_view_id IN ( SELECT operation_fields_v1.value - FROM + FROM document_view_fields - LEFT JOIN + LEFT JOIN operation_fields_v1 ON document_view_fields.operation_id = operation_fields_v1.operation_id @@ -423,7 +422,7 @@ impl SqlStore { document_view_fields.name = operation_fields_v1.name WHERE operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') - AND + AND document_view_fields.document_view_id = $1 ) ", @@ -456,14 +455,14 @@ impl SqlStore { // view of a document, the deletion will not go ahead. let result = query( " - DELETE FROM + DELETE FROM document_views WHERE document_views.document_view_id = $1 AND NOT EXISTS ( - SELECT - document_view_fields.document_view_id - FROM + SELECT + document_view_fields.document_view_id + FROM document_view_fields LEFT JOIN operation_fields_v1 @@ -473,12 +472,12 @@ impl SqlStore { document_view_fields.name = operation_fields_v1.name WHERE operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') - AND + AND operation_fields_v1.value = $1 ) AND NOT EXISTS ( SELECT documents.document_id FROM documents - WHERE documents.document_view_id = $1 + WHERE documents.document_view_id = $1 ) " ) @@ -497,7 +496,7 @@ impl SqlStore { } } - /// Check if this view is the current view of it's document. + /// Check if this view is the current view of its document. pub async fn is_current_view( &self, document_view_id: &DocumentViewId, @@ -516,7 +515,7 @@ impl SqlStore { Ok(document_view_id.is_some()) } - /// Purge a document from the store by it's id. + /// Purge a document from the store by its id. /// /// This removes entries, operations and any materialized documents which exist. /// @@ -550,7 +549,7 @@ impl SqlStore { // Delete rows from `entries` table. query( " - DELETE FROM entries + DELETE FROM entries WHERE entries.entry_hash IN ( SELECT operations_v1.operation_id FROM operations_v1 WHERE operations_v1.document_id = $1 @@ -593,7 +592,7 @@ async fn get_document_view_field_rows( // // This query performs a join against the `operation_fields_v1` table as this is where the // actual field values live. The `document_view_fields` table defines relations between a - // document view and the operation values which hold it's field values. + // document view and the operation values which hold its field values. // // Each field has one row, or in the case of list values (pinned relations, or relation lists) // then one row exists for every item in the list. The `list_index` column is used for @@ -608,7 +607,7 @@ async fn get_document_view_field_rows( operation_fields_v1.list_index, operation_fields_v1.field_type, operation_fields_v1.value - FROM + FROM document_view_fields LEFT JOIN operation_fields_v1 @@ -616,7 +615,7 @@ async fn get_document_view_field_rows( document_view_fields.operation_id = operation_fields_v1.operation_id AND document_view_fields.name = operation_fields_v1.name - LEFT JOIN + LEFT JOIN document_views ON document_view_fields.document_view_id = document_views.document_view_id @@ -724,7 +723,7 @@ async fn insert_document( .await .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; - // If the document is not deleted, then we also want to insert it's view and fields. + // If the document is not deleted, then we also want to insert its view and fields. if !document.is_deleted() && document.view().is_some() { // Construct the view, unwrapping the document view fields as we checked they exist above. let document_view = @@ -800,7 +799,7 @@ mod tests { let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // Find the "CREATE" operation and get it's id. + // Find the "CREATE" operation and get its id. let create_operation = WithId::::id( operations .iter() @@ -826,9 +825,9 @@ mod tests { .await; assert!(result.is_ok()); - // We should be able to retrieve the document at either of it's views now. + // We should be able to retrieve the document at either of its views now. - // Here we request the document with it's initial state. + // Here we request the document with its initial state. let retrieved_document = node .context .store @@ -842,7 +841,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document_at_view_1.view_id()); assert_eq!(retrieved_document.fields(), document_at_view_1.fields()); - // Here we request it at it's current state. + // Here we request it at its current state. let retrieved_document = node .context .store @@ -856,7 +855,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document.view_id()); assert_eq!(retrieved_document.fields(), document.fields()); - // If we retrieve the document by it's id, we expect the current state. + // If we retrieve the document by its id, we expect the current state. let retrieved_document = node .context .store @@ -875,8 +874,7 @@ mod tests { #[rstest] fn document_view_does_not_exist(random_document_view_id: DocumentViewId) { test_runner(|node: TestNode| async move { - // We try to retrieve a document view by it's id but no view - // with that id exists. + // We try to retrieve a document view by its id but no view with that id exists. let view_does_not_exist = node .context .store @@ -925,20 +923,20 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); // Build the document. let document = build_document(&node.context.store, &document_id).await; - // The document is successfully inserted into the database, this - // relies on the operations already being present and would fail - // if they were not. + // The document is successfully inserted into the database, this relies on the + // operations already being present and would fail if they were not. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We can retrieve the most recent document view for this document by it's id. + // We can retrieve the most recent document view for this document by its id. let retrieved_document = node .context .store @@ -947,8 +945,8 @@ mod tests { .unwrap() .unwrap(); - // We can retrieve a specific document view for this document by it's view_id. - // In this case, that should be the same as the view retrieved above. + // We can retrieve a specific document view for this document by its view_id. In this + // case, that should be the same as the view retrieved above. let specific_document = node .context .store @@ -985,7 +983,8 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); @@ -997,12 +996,12 @@ mod tests { // As it has been deleted, there should be no view. assert!(document.view().is_none()); - // Here we insert the document. This action also sets it's most recent view. + // Here we insert the document. This action also sets its most recent view. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We retrieve the most recent view for this document by it's document id, - // but as the document is deleted, we should get a none value back. + // We retrieve the most recent view for this document by its document id, but as the + // document is deleted, we should get a none value back. let document = node .context .store @@ -1011,8 +1010,8 @@ mod tests { .unwrap(); assert!(document.is_none()); - // We also try to retrieve the specific document view by it's view id. - // This should also return none as it is deleted. + // We also try to retrieve the specific document view by its view id. This should also + // return none as it is deleted. let document = node .context .store @@ -1090,14 +1089,14 @@ mod tests { .build() .expect("Build document"); - // Insert it to the database, this should also update it's view. + // Insert it to the database, this should also update its view. node.context .store .insert_document(&document) .await .expect("Insert document"); - // We can retrieve the document by it's document id. + // We can retrieve the document by its document id. let retrieved_document = node .context .store @@ -1106,7 +1105,7 @@ mod tests { .expect("Get document") .expect("Unwrap document"); - // And also directly by it's document view id. + // And also directly by its document view id. let specific_document = node .context .store diff --git a/aquadoggo/src/db/stores/entry.rs b/aquadoggo/src/db/stores/entry.rs index e9c3d7978..ebdf47fd3 100644 --- a/aquadoggo/src/db/stores/entry.rs +++ b/aquadoggo/src/db/stores/entry.rs @@ -74,7 +74,7 @@ impl EntryStore for SqlStore { Ok(()) } - /// Get an entry from storage by it's hash id. + /// Get an entry from storage by its hash id. /// /// Returns a result containing the entry wrapped in an option if it was found successfully. /// Returns `None` if the entry was not found in storage. Errors when a fatal storage error @@ -186,7 +186,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_entries_by_schema( &self, @@ -222,7 +222,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema. /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_paginated_log_entries( &self, @@ -444,7 +444,7 @@ mod tests { .await; assert!(result.is_ok()); - // Retrieve the entry again by it's hash. + // Retrieve the entry again by its hash let retrieved_entry = node .context .store @@ -622,7 +622,7 @@ mod tests { .expect("At least one key pair") .public_key(); - // We should be able to get each entry by it's public_key, log_id and seq_num. + // We should be able to get each entry by its public_key, log_id and seq_num. for seq_num in 1..10 { let seq_num = SeqNum::new(seq_num).unwrap(); diff --git a/aquadoggo/src/db/stores/operation.rs b/aquadoggo/src/db/stores/operation.rs index d70e438b1..73550759f 100644 --- a/aquadoggo/src/db/stores/operation.rs +++ b/aquadoggo/src/db/stores/operation.rs @@ -25,8 +25,8 @@ use crate::db::SqlStore; /// the required `AsVerifiedOperation` trait. /// /// There are several intermediary structs defined in `db/models/` which represent rows from tables -/// in the database where this entry, it's fields and opreation relations are stored. These are -/// used in conjunction with the `sqlx` library to coerce raw values into structs when querying the +/// in the database where this entry, its fields and opreation relations are stored. These are used +/// in conjunction with the `sqlx` library to coerce raw values into structs when querying the /// database. #[async_trait] impl OperationStore for SqlStore { @@ -77,7 +77,7 @@ impl OperationStore for SqlStore { .await } - /// Get an operation identified by it's `OperationId`. + /// Get an operation identified by its `OperationId`. /// /// Returns a result containing an `VerifiedOperation` wrapped in an option, if no operation /// with this id was found, returns none. Errors if a fatal storage error occured. @@ -269,7 +269,7 @@ impl SqlStore { Ok(()) } - /// Insert an operation as well as the index for it's position in the document after + /// Insert an operation as well as the index for its position in the document after /// materialization has occurred. async fn insert_operation_with_index( &self, @@ -467,7 +467,7 @@ mod tests { .await; assert!(result.is_ok()); - // Request the previously inserted operation by it's id. + // Request the previously inserted operation by its id. let returned_operation = node .context .store diff --git a/aquadoggo/src/db/stores/schema.rs b/aquadoggo/src/db/stores/schema.rs index 59a17077c..e00f99f67 100644 --- a/aquadoggo/src/db/stores/schema.rs +++ b/aquadoggo/src/db/stores/schema.rs @@ -14,7 +14,7 @@ use crate::db::errors::SchemaStoreError; use crate::db::SqlStore; impl SqlStore { - /// Get a Schema from the database by it's document view id. + /// Get a Schema from the database by its document view id. /// /// Internally, this method performs three steps: /// - fetch the document view for the schema definition @@ -52,7 +52,7 @@ impl SqlStore { // We silently ignore errors as we are assuming views we retrieve from the database // themselves are valid, meaning any error in constructing the schema must be because some - // of it's fields are simply missing from our database. + // of its fields are simply missing from our database. let schema = Schema::from_views(schema_view, schema_fields).ok(); Ok(schema) @@ -205,7 +205,7 @@ mod tests { ) .await; - // Retrieve the schema by it's document view id. We unwrap here as we expect an `Ok` + // Retrieve the schema by its document view id. We unwrap here as we expect an `Ok` // result for the succeeding db query, even though the schema could not be built. let schema = node .context diff --git a/aquadoggo/src/db/types/entry.rs b/aquadoggo/src/db/types/entry.rs index bda3ad030..5d093c3f1 100644 --- a/aquadoggo/src/db/types/entry.rs +++ b/aquadoggo/src/db/types/entry.rs @@ -9,9 +9,9 @@ use p2panda_rs::operation::EncodedOperation; use crate::db::models::EntryRow; -/// A signed entry and it's encoded operation. Entries are the lowest level data type on the -/// p2panda network, they are signed by authors and form bamboo append only logs. The operation is -/// an entries' payload, it contains the data mutations which authors publish. +/// A signed entry and its encoded operation. Entries are the lowest level data type on the p2panda +/// network, they are signed by authors and form bamboo append only logs. The operation is an +/// entries' payload, it contains the data mutations which authors publish. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StorageEntry { /// PublicKey of this entry. diff --git a/aquadoggo/src/db/types/operation.rs b/aquadoggo/src/db/types/operation.rs index 2af007136..6142cb2a0 100644 --- a/aquadoggo/src/db/types/operation.rs +++ b/aquadoggo/src/db/types/operation.rs @@ -36,7 +36,7 @@ pub struct StorageOperation { /// Index for the position of this operation once topological sorting of the operation graph /// has been performed. /// - /// Is `None` when the operation has not been materialized into it's document yet. + /// Is `None` when the operation has not been materialized into its document yet. pub(crate) sorted_index: Option, } diff --git a/aquadoggo/src/graphql/constants.rs b/aquadoggo/src/graphql/constants.rs index 9fb0f4cf7..22250bad4 100644 --- a/aquadoggo/src/graphql/constants.rs +++ b/aquadoggo/src/graphql/constants.rs @@ -58,10 +58,10 @@ pub const ORDER_DIRECTION_ARG: &str = "orderDirection"; /// Name of field where a collection of documents can be accessed. pub const DOCUMENTS_FIELD: &str = "documents"; -/// Name of field on a document where it's fields can be accessed. +/// Name of field on a document where its fields can be accessed. pub const FIELDS_FIELD: &str = "fields"; -/// Name of field on a document where it's meta data can be accessed. +/// Name of field on a document where its meta data can be accessed. pub const META_FIELD: &str = "meta"; /// Name of field on a document where pagination cursor can be accessed. diff --git a/aquadoggo/src/graphql/mutations/publish.rs b/aquadoggo/src/graphql/mutations/publish.rs index 745603ed9..d2e47caa3 100644 --- a/aquadoggo/src/graphql/mutations/publish.rs +++ b/aquadoggo/src/graphql/mutations/publish.rs @@ -240,7 +240,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); let response = context.schema.execute(publish_request).await; @@ -305,7 +305,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); let response = context @@ -337,7 +337,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); context.schema.execute(publish_request).await; diff --git a/aquadoggo/src/graphql/objects/document.rs b/aquadoggo/src/graphql/objects/document.rs index 0a86feb44..6f894daad 100644 --- a/aquadoggo/src/graphql/objects/document.rs +++ b/aquadoggo/src/graphql/objects/document.rs @@ -13,7 +13,7 @@ use crate::graphql::utils::{collection_item_name, fields_name}; /// schema. /// /// Constructs resolvers for both `fields` and `meta` fields. The former simply passes up the query -/// arguments to it's children query fields. The latter calls the `resolve` method defined on +/// arguments to its children query fields. The latter calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_document_object(schema: &Schema) -> Object { let fields = Object::new(schema.id().to_string()); @@ -24,7 +24,7 @@ pub fn build_document_object(schema: &Schema) -> Object { /// schema and are contained in a paginated collection. /// /// Contains resolvers for `cursor`, `fields` and `meta`. `fields` simply passes up the query -/// arguments to it's children query fields. `meta` calls the `resolve` method defined on +/// arguments to its children query fields. `meta` calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_paginated_document_object(schema: &Schema) -> Object { let fields = Object::new(collection_item_name(schema.id())); @@ -56,7 +56,7 @@ pub fn build_paginated_document_object(schema: &Schema) -> Object { /// Add application `fields` and `meta` fields to a GraphQL object. fn with_document_fields(fields: Object, schema: &Schema) -> Object { fields - // The `fields` field passes down the parent value to it's children + // The `fields` field passes down the parent value to its children .field( Field::new( constants::FIELDS_FIELD, diff --git a/aquadoggo/src/graphql/queries/collection.rs b/aquadoggo/src/graphql/queries/collection.rs index 44655b9d1..6d7d6ff73 100644 --- a/aquadoggo/src/graphql/queries/collection.rs +++ b/aquadoggo/src/graphql/queries/collection.rs @@ -921,7 +921,7 @@ mod tests { // That's more my style, so let's get the lyrics for this song. But there are a lot, // so I'll just get the first 2 lines. - // We can identify the song by it's id and then paginate the lyrics field which is a + // We can identify the song by its id and then paginate the lyrics field which is a // relation list of song lyric lines. let oh_bondage_up_yours_id = oh_bondage_up_yours["meta"]["documentId"].as_str().unwrap(); diff --git a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs index ef522b908..4cdeb0a57 100644 --- a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs +++ b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs @@ -7,7 +7,7 @@ use dynamic_graphql::{Error, Result, Scalar, ScalarValue, Value}; use p2panda_rs::document::DocumentViewId; /// The document view id of a p2panda document. Refers to a specific point in a documents history -/// and can be used to deterministically reconstruct it's state at that time. +/// and can be used to deterministically reconstruct its state at that time. #[derive(Scalar, Clone, Debug, Eq, PartialEq)] #[graphql(name = "DocumentViewId", validator(validate))] pub struct DocumentViewIdScalar(DocumentViewId); diff --git a/aquadoggo/src/graphql/schema.rs b/aquadoggo/src/graphql/schema.rs index 50244d7e9..e16aee61d 100644 --- a/aquadoggo/src/graphql/schema.rs +++ b/aquadoggo/src/graphql/schema.rs @@ -111,7 +111,7 @@ pub async fn build_root_schema( .register(filter_input); // Add a query for each schema. It offers an interface to retrieve a single document of - // this schema by it's document id or view id. Its resolver parses and validates the passed + // this schema by its document id or view id. Its resolver parses and validates the passed // parameters, then forwards them up to the children query fields root_query = build_document_query(root_query, &schema); diff --git a/aquadoggo/src/http/api.rs b/aquadoggo/src/http/api.rs index cab488964..c29a47d6f 100644 --- a/aquadoggo/src/http/api.rs +++ b/aquadoggo/src/http/api.rs @@ -60,7 +60,7 @@ pub async fn handle_blob_document( return Err(BlobHttpError::NotFound); } - respond_with_blob(if_none_match, context.blob_dir_path, document).await + respond_with_blob(if_none_match, context.blobs_base_path, document).await } /// Handle requests for a blob document view served via HTTP. @@ -87,7 +87,7 @@ pub async fn handle_blob_view( return Err(BlobHttpError::NotFound); } - respond_with_blob(if_none_match, context.blob_dir_path, document).await + respond_with_blob(if_none_match, context.blobs_base_path, document).await } /// Returns HTTP response with the contents, ETag and given MIME type of a blob. @@ -95,7 +95,7 @@ pub async fn handle_blob_view( /// Supports basic caching by handling "IfNoneMatch" headers matching the latest ETag. async fn respond_with_blob( if_none_match: IfNoneMatch, - blob_dir_path: PathBuf, + blobs_base_path: PathBuf, document: impl AsDocument, ) -> Result { let view_id = document.view_id(); @@ -120,7 +120,7 @@ async fn respond_with_blob( }?; // Get body from read-stream of stored file on file system - let mut file_path = blob_dir_path; + let mut file_path = blobs_base_path; file_path.push(format!("{view_id}")); match File::open(&file_path).await { Ok(file) => { diff --git a/aquadoggo/src/http/context.rs b/aquadoggo/src/http/context.rs index f431ee7c9..01015bf5c 100644 --- a/aquadoggo/src/http/context.rs +++ b/aquadoggo/src/http/context.rs @@ -13,16 +13,16 @@ pub struct HttpServiceContext { /// Dynamic GraphQL schema manager. pub schema: GraphQLSchemaManager, - /// Path of the directory where blobs should be served from - pub blob_dir_path: PathBuf, + /// Path of the directory where blobs should be served from. + pub blobs_base_path: PathBuf, } impl HttpServiceContext { - pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blob_dir_path: PathBuf) -> Self { + pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blobs_base_path: PathBuf) -> Self { Self { store, schema, - blob_dir_path, + blobs_base_path, } } } diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 7c045e50b..855d61193 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -61,13 +61,13 @@ pub async fn http_service( let graphql_schema_manager = GraphQLSchemaManager::new(context.store.clone(), tx, context.schema_provider.clone()).await; - let blob_dir_path = context.config.blob_dir.as_ref().expect("Base path not set"); + let blobs_base_path = &context.config.blobs_base_path; // Introduce a new context for all HTTP routes let http_context = HttpServiceContext::new( context.store.clone(), graphql_schema_manager, - blob_dir_path.to_owned(), + blobs_base_path.to_owned(), ); // Start HTTP server with given port and re-attempt with random port if it was taken already @@ -105,7 +105,6 @@ mod tests { use serde_json::json; use tokio::sync::broadcast; - use crate::config::BLOBS_DIR_NAME; use crate::graphql::GraphQLSchemaManager; use crate::http::context::HttpServiceContext; use crate::schema::SchemaProvider; @@ -124,7 +123,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), graphql_schema_manager, - BLOBS_DIR_NAME.into(), + node.context.config.blobs_base_path.clone(), ); let client = TestClient::new(build_server(context)); diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs index 8366cbba6..133ea795c 100644 --- a/aquadoggo/src/materializer/tasks/blob.rs +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -63,7 +63,7 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult TaskResult base_path, - None => return Err(TaskError::Critical("No base path configured".to_string())), - }; - let blob_view_path = base_path.join(blob_document.view_id().to_string()); + // Compose, and when needed create, the path for the blob file + let blob_view_path = context + .config + .blobs_base_path + .join(blob_document.view_id().to_string()); - // Write the blob to the filesystem. + // Write the blob to the filesystem info!("Creating blob at path {}", blob_view_path.display()); let mut file = File::create(&blob_view_path).await.map_err(|err| { @@ -175,7 +174,7 @@ mod tests { assert!(result.unwrap().is_none()); // Construct the expected path to the blob view file - let base_path = node.context.config.blob_dir.as_ref().unwrap(); + let base_path = &node.context.config.blobs_base_path; let blob_path = base_path.join(blob_view_id.to_string()); // Read from this file diff --git a/aquadoggo/src/materializer/tasks/dependency.rs b/aquadoggo/src/materializer/tasks/dependency.rs index 83c3d58d4..e9d1de9c3 100644 --- a/aquadoggo/src/materializer/tasks/dependency.rs +++ b/aquadoggo/src/materializer/tasks/dependency.rs @@ -894,7 +894,7 @@ mod tests { assert_eq!(tasks[0].worker_name(), &String::from("dependency")); // 2. The "dependency" task will try to resolve the pinned document view pointing at - // the "post" document in it's version 2 + // the "post" document in its version 2 let tasks = dependency_task(node_b.context.clone(), tasks[0].input().clone()) .await .unwrap(); diff --git a/aquadoggo/src/materializer/tasks/garbage_collection.rs b/aquadoggo/src/materializer/tasks/garbage_collection.rs index 6138273cc..7e6901e8e 100644 --- a/aquadoggo/src/materializer/tasks/garbage_collection.rs +++ b/aquadoggo/src/materializer/tasks/garbage_collection.rs @@ -38,7 +38,7 @@ pub async fn garbage_collection_task(context: Context, input: TaskInput) -> Task let mut all_effected_child_relations = vec![]; let mut deleted_views_count = 0; for document_view_id in &all_document_view_ids { - // Check if this is the current view of it's document. This will still return true + // Check if this is the current view of its document. This will still return true // if the document in question is deleted. let is_current_view = context .store @@ -90,8 +90,8 @@ pub async fn garbage_collection_task(context: Context, input: TaskInput) -> Task .expect("Operation exists in store"); if let SchemaId::Blob(_) = operation.schema_id() { - // Purge the blob and all it's pieces. This only succeeds if no document - // refers to the blob document by either a relation or pinned relation. + // Purge the blob and all its pieces. This only succeeds if no document refers + // to the blob document by either a relation or pinned relation. context .store .purge_blob(&document_id) diff --git a/aquadoggo/src/materializer/tasks/reduce.rs b/aquadoggo/src/materializer/tasks/reduce.rs index f24aeb3c5..ac12d2a51 100644 --- a/aquadoggo/src/materializer/tasks/reduce.rs +++ b/aquadoggo/src/materializer/tasks/reduce.rs @@ -223,7 +223,7 @@ async fn reduce_document + WithPublicKey>( .map_err(|err| TaskError::Critical(err.to_string()))?; } - // Insert this document into storage. If it already existed, this will update it's + // Insert this document into storage. If it already existed, this will update its // current view context .store @@ -453,7 +453,7 @@ mod tests { .unwrap() .sorted(); - // Reduce document to it's current view and insert into database + // Reduce document to its current view and insert into database let input = TaskInput::DocumentId(document_id.clone()); assert!(reduce_task(node.context.clone(), input).await.is_ok()); @@ -585,11 +585,11 @@ mod tests { ) { // Prepare empty database. test_runner(move |node: TestNode| async move { - // Dispatch a reduce task for a document which doesn't exist by it's document id. + // Dispatch a reduce task for a document which doesn't exist by its document id let input = TaskInput::DocumentId(document_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); - // Dispatch a reduce task for a document which doesn't exist by it's document view id. + // Dispatch a reduce task for a document which doesn't exist by its document view id let input = TaskInput::DocumentViewId(document_view_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); }); diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 200523262..720d8e36f 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -200,7 +200,7 @@ pub async fn connect_to_relay( // our local public address and (b) enable a freshly started relay to learn its public address. swarm.dial(relay_address.clone())?; - // Wait to get confirmation that we told the relay node it's public address and that they told + // Wait to get confirmation that we told the relay node its public address and that they told // us ours. let mut learned_observed_addr = false; let mut told_relay_observed_addr = false; diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index c92fd32cc..3bd70c151 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -2,11 +2,9 @@ use anyhow::Result; use p2panda_rs::identity::KeyPair; -use tempfile::TempDir; -use tokio::fs; use crate::bus::ServiceMessage; -use crate::config::{Configuration, BLOBS_DIR_NAME}; +use crate::config::Configuration; use crate::context::Context; use crate::db::SqlStore; use crate::db::{connection_pool, create_database, run_pending_migrations, Pool}; @@ -47,7 +45,7 @@ pub struct Node { impl Node { /// Start p2panda node with your configuration. This method can be used to run the node within /// other applications. - pub async fn start(key_pair: KeyPair, mut config: Configuration) -> Self { + pub async fn start(key_pair: KeyPair, config: Configuration) -> Self { // Initialize database and get connection pool let pool = initialize_db(&config) .await @@ -64,15 +62,6 @@ impl Node { let schema_provider = SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); - // Create temporary dirs for blob storage. - // - // @TODO: Implement configuring this path for persistent storage, see related issue: - // https://github.com/p2panda/aquadoggo/issues/542 - let tmp_dir = TempDir::new().unwrap(); - let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(&blob_dir_path).await.unwrap(); - config.blob_dir = Some(blob_dir_path); - // Create service manager with shared data between services let context = Context::new(store, key_pair, config, schema_provider); let mut manager = diff --git a/aquadoggo/src/proptests/document_strategies.rs b/aquadoggo/src/proptests/document_strategies.rs index eb15d455f..ef7c86550 100644 --- a/aquadoggo/src/proptests/document_strategies.rs +++ b/aquadoggo/src/proptests/document_strategies.rs @@ -15,7 +15,7 @@ const MAX_DOCUMENTS_PER_ROOT_SCHEMA: usize = 15; /// Same as above, this is a shrinking value. const MAX_DOCUMENTS_PER_RELATION_LIST: usize = 2; -/// AST representing a document and it's relations. +/// AST representing a document and its relations. #[derive(Debug, Clone)] pub struct DocumentAST { pub schema_id: SchemaId, diff --git a/aquadoggo/src/replication/manager.rs b/aquadoggo/src/replication/manager.rs index 964d6fdaa..a0f9092ef 100644 --- a/aquadoggo/src/replication/manager.rs +++ b/aquadoggo/src/replication/manager.rs @@ -743,7 +743,7 @@ mod tests { .unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait for a - // response from it's original request. + // response from its original request assert_eq!(result.messages.len(), 0); // Peer A has two sessions running: The one initiated by Peer B and the one it @@ -943,7 +943,7 @@ mod tests { let response = result.unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait - // for a response from it's original request. + // for a response from its original request assert_eq!(response.messages.len(), 0); // Both peers have exactly one session running. diff --git a/aquadoggo/src/replication/strategies/diff.rs b/aquadoggo/src/replication/strategies/diff.rs index f3c7a39cc..7d46f0312 100644 --- a/aquadoggo/src/replication/strategies/diff.rs +++ b/aquadoggo/src/replication/strategies/diff.rs @@ -16,7 +16,7 @@ fn remote_requires_entries( remote_log_heights: &HashMap, ) -> Option<(LogId, SeqNum)> { trace!("Local log height: {:?} {:?}", log_id, local_seq_num); - // Get height of the remote log by it's id. + // Get height of the remote log by its id let remote_log_height = remote_log_heights.get(log_id); match remote_log_height { @@ -30,7 +30,7 @@ fn remote_requires_entries( // We increment the seq num as we want it to represent an inclusive lower // bound. // - // We can unwrap as we are incrementing the lower remote seq num which means it's + // We can unwrap as we are incrementing the lower remote seq num which means it // will not reach max seq number. let from_seq_num = remote_seq_num.clone().next().unwrap(); diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 14262249c..d1edfd4fb 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -81,7 +81,7 @@ impl SchemaProvider { /// Inserts or updates the given schema in this provider. /// - /// Returns `true` if a schema was updated or it already existed in it's current state, and + /// Returns `true` if a schema was updated or it already existed in its current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { if let AllowList::Set(allow_schema_ids) = &self.allow_schema_ids { @@ -94,7 +94,7 @@ impl SchemaProvider { let schema_exists = schemas.get(schema.id()).is_some(); if schema_exists { - // Return true here as the schema already exists in it's current state so we don't need + // Return true here as the schema already exists in its current state so we don't need // to mutate the schema store or announce any change. return Ok(true); } diff --git a/aquadoggo/src/test_utils/client.rs b/aquadoggo/src/test_utils/client.rs index b8a4b29fb..005efcbbb 100644 --- a/aquadoggo/src/test_utils/client.rs +++ b/aquadoggo/src/test_utils/client.rs @@ -80,7 +80,7 @@ pub async fn http_test_client(node: &TestNode) -> TestClient { let http_context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); TestClient::new(build_server(http_context)) diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index 7349a3287..9f1080e71 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -1,14 +1,12 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use std::sync::Arc; -use std::{fs, panic}; use futures::Future; use p2panda_rs::identity::KeyPair; use tokio::runtime::Builder; use tokio::sync::Mutex; -use crate::config::BLOBS_DIR_NAME; use crate::context::Context; use crate::db::Pool; use crate::db::SqlStore; @@ -102,21 +100,20 @@ pub fn test_runner(test: F) { let (_config, pool) = initialize_db().await; let store = SqlStore::new(pool); - // Construct temporary directory for the test runner - let tmp_dir = tempfile::TempDir::new().unwrap(); - let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(&blob_dir_path).unwrap(); + // Construct temporary blobs directory for the test runner + let temp_dir = tempfile::TempDir::new() + .expect("Could not create temporary test directory for blobs storage"); // Construct node config supporting any schema - let mut cfg = Configuration::default(); - cfg.blob_dir = Some(blob_dir_path); + let mut config = Configuration::default(); + config.blobs_base_path = temp_dir.path().to_path_buf(); // Construct the actual test node let node = TestNode { context: Context::new( store.clone(), KeyPair::new(), - cfg, + config, SchemaProvider::default(), ), }; @@ -142,7 +139,7 @@ pub fn test_runner(test: F) { // there, we need to propagate it further to inform the test runtime about the result match result { Ok(_) => (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } @@ -163,7 +160,7 @@ pub fn test_runner_with_manager (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } diff --git a/aquadoggo/src/tests.rs b/aquadoggo/src/tests.rs index 2aea479f5..40a3b9b3d 100644 --- a/aquadoggo/src/tests.rs +++ b/aquadoggo/src/tests.rs @@ -156,7 +156,7 @@ async fn e2e() { // Query a document. // // Now that the cafe has been created and updated we can query it from the client. We do can do - // this using it's schema id and document or view id. + // this using its schema id and document or view id. let panda_cafe = query(&client, &panda_cafe_view_id, &cafe_schema_id).await; @@ -175,7 +175,7 @@ async fn e2e() { aquadoggo.shutdown().await; } -/// Publish an entry and it's operation to a node. +/// Publish an entry and its operation to a node. async fn publish(client: &Client, key_pair: &KeyPair, operation: &Operation) -> DocumentViewId { // Publishing operations. // diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index c60207e19..327417209 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -29,15 +29,13 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" log = "0.4.20" +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } +tempfile = "3.7.0" tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" [dependencies.aquadoggo] version = "~0.5.0" path = "../aquadoggo" - -[dev-dependencies] -tempfile = "3.4.0" diff --git a/aquadoggo_cli/README.md b/aquadoggo_cli/README.md index 8317da499..394fdf68f 100644 --- a/aquadoggo_cli/README.md +++ b/aquadoggo_cli/README.md @@ -85,7 +85,7 @@ depending on your needs. #### Support only certain schemas > "I want to run a node which only replicates and serves data from a limited -> set of schemas. In this case it's schemas required by a mushroom sighting +> set of schemas. In this case its schemas required by a mushroom sighting > app." ```toml @@ -156,8 +156,8 @@ direct_node_addresses = [ #### Persist node identity and database -> "I want my node to persist it's identity and database on the filesystem and -> retreive them whenever it runs again." +> "I want my node to persist its identity, uploaded files and database on the +> filesystem and retreive them whenever it runs again." ```toml # Persist node private key at given location (using Linux XDG paths as an example) @@ -165,6 +165,9 @@ private_key = "$HOME/.local/share/aquadoggo/private-key.txt" # Persist SQLite database at given location database_url = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" + +# Persist blobs (large binary files) at given location +blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" ``` ### Configuration @@ -219,6 +222,13 @@ Options: QUIC port for node-node communication and data replication. Defaults to 2022 + -f, --blobs-base-path + Path to folder where blobs (large binary files) are persisted. + Defaults to a temporary directory. + + WARNING: By default your node will not persist any blobs after + shutdown. Set a path for production settings to not loose data. + -k, --private-key Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this current session. diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 20a9d0055..26ce05b03 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -88,6 +88,22 @@ http_port = 2020 # quic_port = 2022 +# ゚・。+☆ +# BLOBS +# ゚・。+☆ + +# Path to folder where blobs (large binary files) are persisted. Defaults to a +# temporary directory. +# +# WARNING: By default your node will not persist any blobs after shutdown. Set +# a path for production settings to not loose data. +# +# WARNING: This setting should reflect the `database_url` configuration. If the +# database is set to be stored somewhere permamently, you should do the same +# for blob files to not run into data inconsistencies. +# +# blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" + # ゚・。+☆+。・ # IDENTITY # ゚・。+☆+。・ diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 8b146e38b..9d1e32b52 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -121,6 +121,15 @@ struct Cli { #[serde(skip_serializing_if = "Option::is_none")] quic_port: Option, + /// Path to folder where blobs (large binary files) are persisted. Defaults to a temporary + /// directory. + /// + /// WARNING: By default your node will not persist any blobs after shutdown. Set a path for + /// production settings to not loose data. + #[arg(short = 'f', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + blobs_base_path: Option, + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this /// current session. /// @@ -267,6 +276,7 @@ pub struct Configuration { pub database_max_connections: u32, pub http_port: u16, pub quic_port: u16, + pub blobs_base_path: Option, pub private_key: Option, pub mdns: bool, pub direct_node_addresses: Vec, @@ -286,6 +296,7 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, quic_port: 2022, + blobs_base_path: None, mdns: true, private_key: None, direct_node_addresses: vec![], @@ -338,12 +349,22 @@ impl TryFrom for NodeConfiguration { } }; + // Create a temporary blobs directory when none was given + let blobs_base_path = match value.blobs_base_path { + Some(path) => path, + None => { + let tmp_dir = tempfile::TempDir::new() + .map_err(|_| anyhow!("Could not create temporary directory to store blobs"))?; + tmp_dir.path().to_path_buf() + } + }; + Ok(NodeConfiguration { allow_schema_ids, - blob_dir: None, database_url: value.database_url, database_max_connections: value.database_max_connections, http_port: value.http_port, + blobs_base_path, worker_pool_size: value.worker_pool_size, network: NetworkConfiguration { quic_port: value.quic_port, diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 4635d2b83..9585c5ca6 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs::{self, File}; +use std::fs::File; use std::io::{Read, Write}; #[cfg(target_os = "unix")] use std::os::unix::fs::PermissionsExt; @@ -39,8 +39,6 @@ pub fn generate_ephemeral_key_pair() -> KeyPair { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; let mut file = File::create(&path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; @@ -57,9 +55,7 @@ fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; - let mut file = File::create(&path)?; + let mut file = File::create(path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index d02d22735..41d1e9717 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -20,7 +20,11 @@ async fn main() -> anyhow::Result<()> { // Load configuration from command line arguments, environment variables and .toml file let (config_file_path, config) = load_config().context("Could not load configuration")?; - // Set log verbosity based on config. By default scope it always to the "aquadoggo" module. + // Remember if user did not set a blobs directory path, which means that it will default to a + // temporary one + let is_temporary_blobs_path = config.blobs_base_path.is_none(); + + // Set log verbosity based on config. By default scope it always to the "aquadoggo" module let mut builder = env_logger::Builder::new(); let builder = match LevelFilter::from_str(&config.log_level) { Ok(log_level) => builder.filter(Some("aquadoggo"), log_level), @@ -50,7 +54,7 @@ async fn main() -> anyhow::Result<()> { "{}", print_config(key_pair_path, config_file_path, &node_config) ); - show_warnings(&node_config); + show_warnings(&node_config, is_temporary_blobs_path); // Start p2panda node in async runtime let node = Node::start(key_pair, node_config).await; @@ -68,19 +72,34 @@ async fn main() -> anyhow::Result<()> { } /// Show some hopefully helpful warnings around common configuration issues. -fn show_warnings(config: &Configuration) { +fn show_warnings(config: &Configuration, is_temporary_blobs_path: bool) { match &config.allow_schema_ids { AllowList::Set(values) => { if values.is_empty() && !config.network.relay_mode { - warn!("Your node was set to not allow any schema ids which is only useful in combination with enabling relay mode. With this setting you will not be able to interact with any client or node."); + warn!( + "Your node was set to not allow any schema ids which is only useful in + combination with enabling relay mode. With this setting you will not be able to + interact with any client or node." + ); } } AllowList::Wildcard => { - warn!("Allowed schema ids is set to wildcard. Your node will support _any_ schemas it will encounter on the network. This is useful for experimentation and local development but _not_ recommended for production settings."); + warn!( + "Allowed schema ids is set to wildcard. Your node will support _any_ schemas it + will encounter on the network. This is useful for experimentation and local + development but _not_ recommended for production settings." + ); } } if !config.network.relay_addresses.is_empty() && config.network.relay_mode { - warn!("Will not connect to given relay addresses when relay mode is enabled"); + warn!("Will not connect to given relay addresses when relay mode is enabled."); + } + + if config.database_url != "sqlite::memory:" && is_temporary_blobs_path { + warn!("Your database is persisted but blobs _are not_ which might result in unrecoverable + data inconsistency (blob operations are stored but the files themselves are _not_). It is + recommended to either set both values (`database_url` and `blobs_base_path`) to an + temporary value or set both to persist all data."); } }