-
Notifications
You must be signed in to change notification settings - Fork 5
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reschedule pending tasks during startup #168
Changes from 25 commits
1265022
ed25e10
2dd9e93
cf772ba
511c722
e61a1c6
5e02d4f
4270a36
cc36001
9c04573
1114cb1
72a454f
a53e0a3
db73617
fe5f97f
7e61ee4
2600052
f27656e
e9966d8
85472b1
4d950e8
88e7fc0
a45d453
225010d
709eacb
18e0367
821ff15
5117402
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
CREATE TABLE IF NOT EXISTS tasks ( | ||
name TEXT NOT NULL, | ||
document_id TEXT NULL, | ||
document_view_id TEXT NULL, | ||
PRIMARY KEY (name, document_id, document_view_id) | ||
); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
use serde::Serialize; | ||
use sqlx::FromRow; | ||
|
||
/// Representation of a row from the `tasks` table as stored in the database. | ||
/// | ||
/// This table holds all "pending" tasks of the materialization service worker. | ||
#[derive(FromRow, Debug, Serialize, Clone, PartialEq)] | ||
#[serde(rename_all = "camelCase")] | ||
pub struct TaskRow { | ||
/// Name of the task worker. | ||
pub name: String, | ||
|
||
/// `DocumentId` of the task input. | ||
pub document_id: Option<String>, | ||
|
||
/// `DocumentViewId` of the task input. | ||
pub document_view_id: Option<String>, | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ mod entry; | |
mod log; | ||
mod operation; | ||
mod schema; | ||
mod task; | ||
#[cfg(test)] | ||
pub mod test_utils; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
// SPDX-License-Identifier: AGPL-3.0-or-later | ||
|
||
use anyhow::Result; | ||
use p2panda_rs::document::{DocumentId, DocumentViewId}; | ||
use sqlx::{query, query_as}; | ||
|
||
use crate::db::errors::SqlStorageError; | ||
use crate::db::models::TaskRow; | ||
use crate::db::provider::SqlStorage; | ||
use crate::materializer::{Task, TaskInput}; | ||
|
||
/// Methods to interact with the `tasks` table in the database. | ||
impl SqlStorage { | ||
/// Inserts a "pending" task into the database. | ||
pub async fn insert_task(&self, task: &Task<TaskInput>) -> Result<(), SqlStorageError> { | ||
// Convert task input to correct database types | ||
let task_input = task.input(); | ||
let document_id = task_input.document_id.as_ref().map(|id| id.as_str()); | ||
let document_view_id = task_input | ||
.document_view_id | ||
.as_ref() | ||
.map(|view_id| view_id.as_str()); | ||
|
||
// Check first if this task already exists, to avoid duplicate rows | ||
let task_row = query_as::<_, TaskRow>( | ||
" | ||
SELECT | ||
name, | ||
document_id, | ||
document_view_id | ||
FROM | ||
tasks | ||
", | ||
) | ||
.fetch_optional(&self.pool) | ||
.await | ||
.map_err(|err| SqlStorageError::Transaction(err.to_string()))?; | ||
|
||
// If yes, we are already done here | ||
if task_row.is_some() { | ||
return Ok(()); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We could spare this query with a unique constraint on the tasks table couldn't we? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, No 😅 When the pending tasks are loaded into the queue during rescheduling, they are not being removed from the database (yet). The worker itself will fire The worker itself makes sure that no duplicates exist in the queues, but thats not the case with the database, this is why I've added that check before insertion. I guess one could take the rows out of the database after we run |
||
|
||
// Insert task into database | ||
let result = query( | ||
" | ||
INSERT INTO | ||
tasks ( | ||
name, | ||
document_id, | ||
document_view_id | ||
) | ||
VALUES | ||
($1, $2, $3) | ||
", | ||
) | ||
.bind(task.worker_name()) | ||
.bind(document_id) | ||
.bind(document_view_id) | ||
.execute(&self.pool) | ||
.await | ||
.map_err(|err| SqlStorageError::Transaction(err.to_string()))?; | ||
|
||
if result.rows_affected() != 1 { | ||
Err(SqlStorageError::Insertion("tasks".into())) | ||
} else { | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// Removes a "pending" task from the database. | ||
pub async fn remove_task(&self, task: &Task<TaskInput>) -> Result<(), SqlStorageError> { | ||
// Convert task input to correct database types | ||
let task_input = task.input(); | ||
let document_id = task_input.document_id.as_ref().map(|id| id.as_str()); | ||
let document_view_id = task_input | ||
.document_view_id | ||
.as_ref() | ||
.map(|view_id| view_id.as_str()); | ||
|
||
// Remove task from database | ||
let result = query( | ||
" | ||
DELETE FROM | ||
tasks | ||
WHERE | ||
name = $1 | ||
AND document_id IS $2 | ||
AND document_view_id IS $3 | ||
", | ||
) | ||
.bind(task.worker_name()) | ||
.bind(document_id) | ||
.bind(document_view_id) | ||
.execute(&self.pool) | ||
.await | ||
.map_err(|err| SqlStorageError::Transaction(err.to_string()))?; | ||
|
||
if result.rows_affected() != 1 { | ||
Err(SqlStorageError::Deletion("tasks".into())) | ||
} else { | ||
Ok(()) | ||
} | ||
} | ||
|
||
/// Returns "pending" tasks of the materialization service worker. | ||
pub async fn get_tasks(&self) -> Result<Vec<Task<TaskInput>>, SqlStorageError> { | ||
let task_rows = query_as::<_, TaskRow>( | ||
" | ||
SELECT | ||
name, | ||
document_id, | ||
document_view_id | ||
FROM | ||
tasks | ||
", | ||
) | ||
.fetch_all(&self.pool) | ||
.await | ||
.map_err(|err| SqlStorageError::Transaction(err.to_string()))?; | ||
|
||
// Convert database rows into correct p2panda types | ||
let mut tasks: Vec<Task<TaskInput>> = Vec::new(); | ||
for task in task_rows { | ||
let document_id: Option<DocumentId> = task.document_id.map(|id| { | ||
id.parse() | ||
.unwrap_or_else(|_| panic!("Invalid document id stored in database {}", id)) | ||
}); | ||
|
||
let document_view_id: Option<DocumentViewId> = task.document_view_id.map(|view_id| { | ||
view_id.parse().unwrap_or_else(|_| { | ||
panic!("Invalid document view id stored in database: {}", view_id) | ||
}) | ||
}); | ||
|
||
tasks.push(Task::new( | ||
&task.name, | ||
TaskInput::new(document_id, document_view_id), | ||
)); | ||
} | ||
|
||
Ok(tasks) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use p2panda_rs::document::{DocumentId, DocumentViewId}; | ||
use p2panda_rs::test_utils::fixtures::{document_id, document_view_id}; | ||
use rstest::rstest; | ||
|
||
use crate::db::stores::test_utils::{test_db, TestSqlStore}; | ||
use crate::materializer::{Task, TaskInput}; | ||
|
||
#[rstest] | ||
#[tokio::test] | ||
async fn insert_get_remove_tasks( | ||
document_view_id: DocumentViewId, | ||
#[from(test_db)] | ||
#[future] | ||
db: TestSqlStore, | ||
) { | ||
let db = db.await; | ||
|
||
// Prepare test data | ||
let task = Task::new("reduce", TaskInput::new(None, Some(document_view_id))); | ||
|
||
// Insert task | ||
let result = db.store.insert_task(&task).await; | ||
assert!(result.is_ok(), "{:?}", result); | ||
|
||
// Check if task exists in database | ||
let result = db.store.get_tasks().await; | ||
assert_eq!(result.unwrap(), vec![task.clone()]); | ||
|
||
// Remove task | ||
let result = db.store.remove_task(&task).await; | ||
assert!(result.is_ok(), "{:?}", result); | ||
|
||
// Check if all tasks got removed | ||
let result = db.store.get_tasks().await; | ||
assert_eq!(result.unwrap(), vec![]); | ||
} | ||
|
||
#[rstest] | ||
#[tokio::test] | ||
async fn avoid_duplicates( | ||
document_id: DocumentId, | ||
#[from(test_db)] | ||
#[future] | ||
db: TestSqlStore, | ||
) { | ||
let db = db.await; | ||
|
||
// Prepare test data | ||
let task = Task::new("reduce", TaskInput::new(Some(document_id), None)); | ||
|
||
// Insert task | ||
let result = db.store.insert_task(&task).await; | ||
assert!(result.is_ok(), "{:?}", result); | ||
|
||
// Insert the same thing again, it should silently fail | ||
let result = db.store.insert_task(&task).await; | ||
assert!(result.is_ok(), "{:?}", result); | ||
|
||
// Check for duplicates | ||
let result = db.store.get_tasks().await; | ||
assert_eq!(result.unwrap().len(), 1); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,4 @@ mod worker; | |
|
||
pub use input::TaskInput; | ||
pub use service::materializer_service; | ||
pub use worker::Task; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was just stumbling about this primary key with
null
columns. If I understand correctly this is actually only possible in Sqlite:https://www.sqlite.org/lang_createtable.html