From 48ba9fbe65fe14c1daadadece5498cabfbcf4778 Mon Sep 17 00:00:00 2001 From: Rosa Gutierrez Date: Wed, 10 Jan 2024 15:37:10 +0100 Subject: [PATCH 1/5] Add missing index for release to blocked_executions table I inadvertently lost this one on #57. --- ...20240110143450_add_missing_index_to_blocked_executions.rb | 5 +++++ test/dummy/db/schema.rb | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20240110143450_add_missing_index_to_blocked_executions.rb diff --git a/db/migrate/20240110143450_add_missing_index_to_blocked_executions.rb b/db/migrate/20240110143450_add_missing_index_to_blocked_executions.rb new file mode 100644 index 00000000..c6417000 --- /dev/null +++ b/db/migrate/20240110143450_add_missing_index_to_blocked_executions.rb @@ -0,0 +1,5 @@ +class AddMissingIndexToBlockedExecutions < ActiveRecord::Migration[7.1] + def change + add_index :solid_queue_blocked_executions, [ :concurrency_key, :priority, :job_id ], name: "index_solid_queue_blocked_executions_for_release" + end +end diff --git a/test/dummy/db/schema.rb b/test/dummy/db/schema.rb index 04eb1ef9..c60d200c 100644 --- a/test/dummy/db/schema.rb +++ b/test/dummy/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2023_12_11_200639) do +ActiveRecord::Schema[7.1].define(version: 2024_01_10_143450) do create_table "job_results", charset: "utf8mb4", collation: "utf8mb4_0900_ai_ci", force: :cascade do |t| t.string "queue_name" t.string "status" @@ -26,6 +26,7 @@ t.string "concurrency_key", null: false t.datetime "expires_at", null: false t.datetime "created_at", null: false + t.index ["concurrency_key", "priority", "job_id"], name: "index_solid_queue_blocked_executions_for_release" t.index ["expires_at", "concurrency_key"], name: "index_solid_queue_blocked_executions_for_maintenance" t.index ["job_id"], name: "index_solid_queue_blocked_executions_on_job_id", unique: true end From be474c12e51bd001b13f3e3e5c5b77a8c812a3b2 Mon Sep 17 00:00:00 2001 From: Rosa Gutierrez Date: Wed, 10 Jan 2024 16:02:54 +0100 Subject: [PATCH 2/5] Only try to release next unblocked job if the job actually completed Either successfully or failing, but it needs to have completed. Otherwise, we might be still claimed but signal the semaphore regardless, so it'd be lying about how many jobs are in progress. A good example where this might happen is when the worker is sent a QUIT signal to exit right away, and the job gets an exit! midway. As the worker would try to release claimed executions after the shutdown, the claimed execution that holds the semaphore could be potentially blocked because the semaphore is held at least by itself. Then, depending on the order of the thread pool shutting down and the worker being deregistered, we could end up with a job trying to unblock itself and a stuck semaphore. --- app/models/solid_queue/claimed_execution.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/solid_queue/claimed_execution.rb b/app/models/solid_queue/claimed_execution.rb index 9af5fe38..f2da6634 100644 --- a/app/models/solid_queue/claimed_execution.rb +++ b/app/models/solid_queue/claimed_execution.rb @@ -33,7 +33,7 @@ def perform else failed_with(result.error) end - ensure + job.unblock_next_blocked_job end From 21a8ac075cead17ba7a13053b87ac3394f71625a Mon Sep 17 00:00:00 2001 From: Rosa Gutierrez Date: Wed, 10 Jan 2024 20:07:59 +0100 Subject: [PATCH 3/5] Don't go through the general dispatch flow when releasing claimed executions That's it, don't try to gain the concurrency lock, because claimed executions with concurrency limits that are released would most likely be holding the semaphore themselves, as it's released after completing. This means these claimed executions would go to blocked upon release, leaving the semaphore busy. Just assume that if a job has a claimed execution, it's because it already gained the lock when going to ready. --- app/models/solid_queue/claimed_execution.rb | 2 +- app/models/solid_queue/job/executable.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/app/models/solid_queue/claimed_execution.rb b/app/models/solid_queue/claimed_execution.rb index f2da6634..94fd794d 100644 --- a/app/models/solid_queue/claimed_execution.rb +++ b/app/models/solid_queue/claimed_execution.rb @@ -39,7 +39,7 @@ def perform def release transaction do - job.prepare_for_execution + job.dispatch_bypassing_concurrency_limits destroy! end end diff --git a/app/models/solid_queue/job/executable.rb b/app/models/solid_queue/job/executable.rb index b9205486..4dac9d0b 100644 --- a/app/models/solid_queue/job/executable.rb +++ b/app/models/solid_queue/job/executable.rb @@ -73,6 +73,10 @@ def dispatch end end + def dispatch_bypassing_concurrency_limits + ready + end + def finished! if preserve_finished_jobs? touch(:finished_at) From ce1a0546937bb3c9f031fd81d58349197b9d6134 Mon Sep 17 00:00:00 2001 From: Rosa Gutierrez Date: Wed, 10 Jan 2024 20:10:25 +0100 Subject: [PATCH 4/5] Use `exit` instead `exit!` on immediate termination So that `at_exit` hooks are run if needed. Besides, remove logging for failing to deregister a process as it just adds noise, and we were re-raising the exception anyway. --- app/models/solid_queue/process.rb | 3 --- lib/solid_queue/processes/supervised.rb | 2 +- lib/solid_queue/supervisor.rb | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/app/models/solid_queue/process.rb b/app/models/solid_queue/process.rb index adb79dc6..e0f22758 100644 --- a/app/models/solid_queue/process.rb +++ b/app/models/solid_queue/process.rb @@ -21,8 +21,5 @@ def heartbeat def deregister destroy! - rescue Exception - SolidQueue.logger.error("[SolidQueue] Error deregistering process #{id} - #{metadata}") - raise end end diff --git a/lib/solid_queue/processes/supervised.rb b/lib/solid_queue/processes/supervised.rb index 0d37d5e5..1756a13f 100644 --- a/lib/solid_queue/processes/supervised.rb +++ b/lib/solid_queue/processes/supervised.rb @@ -31,7 +31,7 @@ def register_signal_handlers end trap(:QUIT) do - exit! + exit end end end diff --git a/lib/solid_queue/supervisor.rb b/lib/solid_queue/supervisor.rb index 35a8218f..5e19f331 100644 --- a/lib/solid_queue/supervisor.rb +++ b/lib/solid_queue/supervisor.rb @@ -163,7 +163,7 @@ def wait_until(timeout, condition, &block) if timeout > 0 deadline = monotonic_time_now + timeout - while monotonic_time_now < deadline && !condition.call + while monotonic_time_now <= deadline && !condition.call sleep 0.1 block.call end From 1d246da64fabe1d0845d2b24bb7ac02411469177 Mon Sep 17 00:00:00 2001 From: Rosa Gutierrez Date: Wed, 10 Jan 2024 21:37:22 +0100 Subject: [PATCH 5/5] Don't run shutdown in forks when exit is requested before we get there That's it, via an `exit` call that becomes SystemExit, only run it when we're shutting down and exiting the loop because of that. It that's not the case, rely on the supervisor to do the clean-up. --- lib/solid_queue/processes/runnable.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/solid_queue/processes/runnable.rb b/lib/solid_queue/processes/runnable.rb index 433fce72..ace4f53e 100644 --- a/lib/solid_queue/processes/runnable.rb +++ b/lib/solid_queue/processes/runnable.rb @@ -48,7 +48,7 @@ def do_start_loop run end end - ensure + run_callbacks(:shutdown) { shutdown } end