From a52c5dcc0b161195ddf2e31d8ac3c9a011df4880 Mon Sep 17 00:00:00 2001
From: Harry Lascelles <harry@harryl.com>
Date: Sun, 10 Mar 2024 18:40:51 +0000
Subject: [PATCH] Add primary key to audit table

In some circumstances it is useful or even necessary for tables to have a primary key, eg [Blue / Green deployment](https://docs.aws.amazon.com/whitepapers/latest/overview-deployment-options/bluegreen-deployments.html).

The `que_scheduler_audit_enqueued` does not have a primary key, so this PR adds one. This will require a migration so a new major version of que-scheduler is required. Note this may be a DB intensive operation that can take some time if you have been running que-scheduler for a while. Unless you are running the "cleanup job" (`QueSchedulerAuditClearDownJob`), you will have about 1 row per minute since the audit started.

The scheduler will pause until the migration to version 8 is completed. This may be fine if you have only tens or hundreds of thousands of rows. If you have millions, and don't wish to have any scheduler pause, it may be best to run a clear down with `QueSchedulerAuditClearDownJob` first (and maybe on an ongoing basis). See the README.md for more details.
---
 CHANGELOG.md                            |  4 +++
 README.md                               | 26 +++++++++---------
 lib/que/scheduler/migrations/8/down.sql |  1 +
 lib/que/scheduler/migrations/8/up.sql   |  1 +
 lib/que/scheduler/state_checks.rb       |  2 +-
 spec/que/scheduler/audit_spec.rb        |  3 +++
 spec/que/scheduler/migrations_spec.rb   | 35 +++++++++++++++++++++++++
 spec/support/db_support.rb              |  8 ++++++
 8 files changed, 67 insertions(+), 13 deletions(-)
 create mode 100644 lib/que/scheduler/migrations/8/down.sql
 create mode 100644 lib/que/scheduler/migrations/8/up.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 04961862..a9c19a01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## Unreleased
+
+- Add primary key to audit table [#456](https://github.com/hlascelles/que-scheduler/pull/456)
+
 ## 4.6.0 (2024-03-10)
 
 - Add tests for ruby 3.2 and 3.3 and remove ruby 2.7 and AR 5 [#453](https://github.com/hlascelles/que-scheduler/pull/453)
diff --git a/README.md b/README.md
index 1b824e0f..848350bf 100644
--- a/README.md
+++ b/README.md
@@ -25,9 +25,9 @@ resque-scheduler files, but with additional features.
    will fail if Que is set to execute jobs synchronously, i.e. `Que::Job.run_synchronously = true`.
 
     ```ruby
-    class CreateQueSchedulerSchema < ActiveRecord::Migration
+    class CreateQueSchedulerSchema < ActiveRecord::Migration[6.0]
       def change
-        Que::Scheduler::Migrations.migrate!(version: 7)
+        Que::Scheduler::Migrations.migrate!(version: 8)
       end
     end
     ```
@@ -210,23 +210,25 @@ ie, This will perform all migrations necessary up to the latest version, skippin
 performed.
 
 ```ruby
-class CreateQueSchedulerSchema < ActiveRecord::Migration
+class CreateQueSchedulerSchema < ActiveRecord::Migration[6.0]
   def change
-    Que::Scheduler::Migrations.migrate!(version: 7)
+    Que::Scheduler::Migrations.migrate!(version: 8)
   end
 end
 ```
 
 The changes in past migrations were: 
 
-| Version | Changes                                                                         |
-|:-------:|---------------------------------------------------------------------------------|
-|    1    | Enqueued the main Que::Scheduler. This is the job that performs the scheduling. |
-|    2    | Added the audit table `que_scheduler_audit`.                                    |
-|    3    | Added the audit table `que_scheduler_audit_enqueued`.                           |
-|    4    | Updated the the audit tables to use bigints                                     |
-|    5    | Dropped an unnecessary index                                                    |
-|    6    | Enforced single scheduler job at the trigger level                              |
+| Version | Changes                                                                             |
+|:-------:|-------------------------------------------------------------------------------------|
+|    1    | Enqueued the main Que::Scheduler. This is the job that performs the scheduling.     |
+|    2    | Added the audit table `que_scheduler_audit`.                                        |
+|    3    | Added the audit table `que_scheduler_audit_enqueued`.                               |
+|    4    | Updated the the audit tables to use bigints                                         |
+|    5    | Dropped an unnecessary index                                                        |
+|    6    | Enforced single scheduler job at the trigger level                                  |
+|    7    | Prevent accidental deletion of scheduler job                                        |
+|    8    | Add primary key to audit. Note, this can be a slow migration if you have many rows! |
 
 The changes to the DB ([DDL](https://en.wikipedia.org/wiki/Data_definition_language)) are all 
 captured in the structure.sql so will be re-run in correctly if squashed - except for the actual 
diff --git a/lib/que/scheduler/migrations/8/down.sql b/lib/que/scheduler/migrations/8/down.sql
new file mode 100644
index 00000000..22f8f226
--- /dev/null
+++ b/lib/que/scheduler/migrations/8/down.sql
@@ -0,0 +1 @@
+ALTER TABLE que_scheduler_audit_enqueued DROP COLUMN "id";
diff --git a/lib/que/scheduler/migrations/8/up.sql b/lib/que/scheduler/migrations/8/up.sql
new file mode 100644
index 00000000..58dd70af
--- /dev/null
+++ b/lib/que/scheduler/migrations/8/up.sql
@@ -0,0 +1 @@
+ALTER TABLE que_scheduler_audit_enqueued ADD COLUMN id BIGSERIAL PRIMARY KEY;
diff --git a/lib/que/scheduler/state_checks.rb b/lib/que/scheduler/state_checks.rb
index ca5128d7..0dc10c58 100644
--- a/lib/que/scheduler/state_checks.rb
+++ b/lib/que/scheduler/state_checks.rb
@@ -47,7 +47,7 @@ def check
             To bring the db version up to the current one required, add a migration like this. It
             is cumulative, so one line is sufficient to perform all necessary steps.
 
-            class UpdateQueSchedulerSchema < ActiveRecord::Migration
+            class UpdateQueSchedulerSchema < ActiveRecord::Migration[6.0]
               def change
                 Que::Scheduler::Migrations.migrate!(version: #{Que::Scheduler::Migrations::MAX_VERSION})
               end
diff --git a/spec/que/scheduler/audit_spec.rb b/spec/que/scheduler/audit_spec.rb
index e6b60d55..e22bf50a 100644
--- a/spec/que/scheduler/audit_spec.rb
+++ b/spec/que/scheduler/audit_spec.rb
@@ -35,6 +35,7 @@ def append_test_jobs(scheduler_job_id, executed_at, enqueued)
         expect(db_jobs).to eq(
           [
             {
+              id: 1,
               scheduler_job_id: scheduler_job_id,
               job_class: "HalfHourlyTestJob",
               queue: handles_queue_name ? "something1" : nil,
@@ -44,6 +45,7 @@ def append_test_jobs(scheduler_job_id, executed_at, enqueued)
               run_at: jobs_set_to_run_at,
             },
             {
+              id: 2,
               scheduler_job_id: scheduler_job_id,
               job_class: "HalfHourlyTestJob",
               queue: handles_queue_name ? Que::Scheduler.configuration.que_scheduler_queue : nil,
@@ -53,6 +55,7 @@ def append_test_jobs(scheduler_job_id, executed_at, enqueued)
               run_at: jobs_set_to_run_at,
             },
             {
+              id: 3,
               scheduler_job_id: scheduler_job_id,
               job_class: "DailyTestJob",
               queue: handles_queue_name ? "something3" : nil,
diff --git a/spec/que/scheduler/migrations_spec.rb b/spec/que/scheduler/migrations_spec.rb
index 3921c08f..49a8a4c2 100644
--- a/spec/que/scheduler/migrations_spec.rb
+++ b/spec/que/scheduler/migrations_spec.rb
@@ -16,6 +16,14 @@ def check_index_existence(index_name, expect)
       ::Que::Scheduler::SchedulerJob.enqueue
       ::Que::Scheduler::StateChecks.check
 
+      expect(described_class.db_version).to eq(8)
+
+      # Check 8 change down
+      # Drops the PRIMARY KEY constraint on que_scheduler_audit_enqueued
+      expect(DbSupport.primary_key_exists?("que_scheduler_audit_enqueued")).to be true
+      described_class.migrate!(version: 7)
+      expect(DbSupport.primary_key_exists?("que_scheduler_audit_enqueued")).to be false
+
       expect(described_class.db_version).to eq(7)
 
       # Check 7 change down
@@ -118,6 +126,12 @@ def check_index_existence(index_name, expect)
       described_class.migrate!(version: 7)
       expect(described_class.db_version).to eq(7)
 
+      # Check 8 change up
+      expect(DbSupport.primary_key_exists?("que_scheduler_audit_enqueued")).to be false
+      described_class.migrate!(version: 8)
+      expect(DbSupport.primary_key_exists?("que_scheduler_audit_enqueued")).to be true
+      expect(described_class.db_version).to eq(8)
+
       Que::Scheduler::StateChecks.check
     end
     # rubocop:enable RSpec/MultipleExpectations
@@ -179,5 +193,26 @@ def check_index_existence(index_name, expect)
       SQL
       expect(result.count).to eq(0)
     end
+
+    it "checks all tables have a primary key (needed for eg Blue / Green logical replication)" do
+      no_pk_tables = ActiveRecord::Base.connection.execute(<<~SQL).to_a
+        SELECT pgc.relname as "table", pgns.nspname as "namespace"
+        FROM pg_class pgc
+        JOIN pg_namespace pgns ON pgns.oid = pgc.relnamespace
+        WHERE pgc.relkind = 'r'
+        AND pgns.nspname NOT IN ('pg_catalog', 'information_schema')
+        AND pgc.oid NOT IN
+          (
+            SELECT pgc.oid
+            FROM pg_class pgc
+            JOIN pg_index pgi ON pgi.indrelid = pgc.oid
+            JOIN pg_namespace pgns ON pgns.oid = pgc.relnamespace
+            WHERE pgi.indisprimary = true
+            AND pgc.relkind = 'r'
+          );
+      SQL
+
+      expect(no_pk_tables).to be_empty, "Tables with no primary key: #{no_pk_tables}"
+    end
   end
 end
diff --git a/spec/support/db_support.rb b/spec/support/db_support.rb
index 2cc60ab5..fb12bb98 100644
--- a/spec/support/db_support.rb
+++ b/spec/support/db_support.rb
@@ -74,5 +74,13 @@ def convert_args_column(db_jobs)
         row
       end
     end
+
+    def primary_key_exists?(table_name)
+      result = Que::Scheduler::VersionSupport.execute(<<~SQL)
+        SELECT * FROM information_schema.table_constraints
+        WHERE table_name = '#{table_name}' AND constraint_type = 'PRIMARY KEY';
+      SQL
+      result.count.positive?
+    end
   end
 end