alphagov · ryanb-gds · Sep 18, 2023 · Aug 31, 2023 · Sep 1, 2023 · ryanb-gds
diff --git a/docs/adding-new-fields.md b/docs/adding-new-fields.md
@@ -41,6 +41,9 @@ This job will block other rake tasks from being run for 15 minutes to an hour.
 
 [Read more about re-indexing the elasticsearch indexes here](https://docs.publishing.service.gov.uk/manual/reindex-elasticsearch.html#how-to-reindex-an-elasticsearch-index).
 
+If you consider the change low risk and are only adding new fields for which content doesn't yet exist, you can run the `search:update_schema` task. This task will attempt to update the Elasticsearch index schema in place without requiring a re-index.
+If you have made any changes which affect existing fields, Elasticsearch will reject the change and a full re-index will be required. 
+
 ### Troubleshooting
 
 #### The new field doesn't show up

diff --git a/lib/index.rb b/lib/index.rb
@@ -84,6 +84,18 @@ def with_lock
       end
     end
 
+    def sync_mappings
+      {}.tap do |errors|
+        mappings.each do |type, mapping|
+          @client.indices.put_mapping(index: index_name, type:, body: mapping)
+          logger.info "Updated mappings for #{type} type on #{index_name} index"
+        rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
+          errors[type] = e
+          logger.warn "Unable to update mappings for #{type} type on #{index_name} index: #{e.message}"
+        end
+      end
+    end
+
     def add(documents, options = {})
       logger.info "Adding #{documents.size} document(s) to #{index_name}"
 

diff --git a/lib/rummager.rb b/lib/rummager.rb
@@ -168,6 +168,7 @@
 require "legacy_client/index_for_search"
 require "legacy_client/multivalue_converter"
 require "schema_migrator"
+require "schema_synchroniser"
 require "missing_metadata/fetcher"
 require "missing_metadata/runner"
 require "parameter_parser/base_parameter_parser"

diff --git a/lib/schema_synchroniser.rb b/lib/schema_synchroniser.rb
@@ -0,0 +1,15 @@
+class SchemaSynchroniser
+  attr_reader :errors
+
+  def initialize(index_group)
+    @index = index_group.current
+  end
+
+  def call
+    @errors = @index.sync_mappings
+  end
+
+  def synchronised_types
+    @index.mappings.keys.difference(@errors.keys)
+  end
+end
diff --git a/lib/tasks/indices.rake b/lib/tasks/indices.rake
@@ -142,6 +142,28 @@ this task will run against all active clusters.
     end
   end
 
+  desc "Update the schema in place to reflect the current Search API configuration. This task is idempotent.
+
+If there are changes to configuration that cannot be made to the live schema because the change is not applicable to
+the existing data, you will need to run the \"migrate_schema\" task instead, which requires locking the index."
+  task :update_schema, [:clusters] do |_, args|
+    clusters_from_args(args).each do |cluster|
+      puts "Updating schema on cluster #{cluster.key}"
+
+      index_names.each do |index_name|
+        index_group = SearchConfig.instance(cluster).search_server.index_group(index_name)
+        synchroniser = SchemaSynchroniser.new(index_group)
+        synchroniser.call
+        synchroniser.synchronised_types.each do |type|
+          puts "Successfully synchronised #{type} type on #{index_name} index"
+        end
+        synchroniser.errors.each do |type, exception|
+          puts "Unable to synchronise #{type} on #{index_name} due to #{exception.message}"
+        end
+      end
+    end
+  end
+
   desc "Switches an index group to a new index WITHOUT transferring the data"
   task :switch_to_empty_index, :clusters do |_, args|
     # Note that this task will effectively clear out the index, so shouldn't be

diff --git a/spec/integration/schema_synchroniser_spec.rb b/spec/integration/schema_synchroniser_spec.rb
@@ -0,0 +1,16 @@
+require "spec_helper"
+
+RSpec.describe SchemaSynchroniser do
+  before do
+    clean_index_content("govuk_test")
+  end
+
+  it "synchronises the current Elasticsearch index schema with the schema defined by the search API" do
+    index_group = search_server.index_group("govuk_test")
+    mappings = index_group.current.mappings
+    synchroniser = described_class.new(index_group)
+    synchroniser.call
+    expect(synchroniser.synchronised_types).not_to be_empty
+    expect(synchroniser.synchronised_types).to eq(mappings.keys)
+  end
+end
diff --git a/spec/unit/index_spec.rb b/spec/unit/index_spec.rb
@@ -0,0 +1,48 @@
+require "spec_helper"
+
+RSpec.describe SearchIndices::Index do
+  let(:base_uri) { "http://example.com:9200" }
+
+  it "syncs mappings to elasticsearch and returns any failures" do
+    mappings = {
+      "generic-document" => {
+        "properties" => {
+          "new-field" => { "type": "text" },
+        },
+      },
+      "failing-document" => {
+        "properties" => {
+          "invalid-field" => { "type": "text" },
+        },
+      },
+    }
+    stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/generic-document})
+      .with(body: mappings["generic-document"])
+      .to_return({
+        status: 200,
+        body: { "ok" => true, "acknowledged" => true }.to_json,
+        headers: { "Content-Type" => "application/json" },
+      })
+
+    error_body = { "error" => {
+      "type" => "illegal_argument_exception",
+      "reason" => "invalid mapping",
+    } }.to_json
+    failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/failing-document})
+      .with(body: mappings["failing-document"])
+      .to_return({
+        status: 400,
+        body: error_body,
+        headers: { "Content-Type" => "application/json" },
+      })
+
+    index = SearchIndices::Index.new(base_uri, "govuk-abc", "govuk", mappings, SearchConfig.default_instance)
+
+    errors = index.sync_mappings
+
+    assert_requested stub
+    assert_requested failing_stub
+    expect(errors).not_to be_empty
+    expect(Elasticsearch::Transport::Transport::Errors::BadRequest.new("[400] #{error_body}").message).to eq(errors["failing-document"].message)
+  end
+end