Skip to content

Commit

Permalink
Provide a rake task for updating an Elasticsearch index schema in pla…
Browse files Browse the repository at this point in the history
…ce when Search API configuration has changed

This rake task has been added because at the moment even purely additive changes to elasticsearch schema configuration require a full reindex. Full reindexes require locking the index for significant periods of time so have to be run out of hours for production and can block integration for long periods. This is causing pain when developing applications that consume the Search API.

The task is idempotent and Elasticsearch will not permit changes to the index that would attempt to change the type of already existing data. Any such changes result in a “Bad Request” error from Elasticsearch and these will be reported to the rake operator. In such cases a full re-index would be required.
  • Loading branch information
ryanb-gds committed Aug 31, 2023
1 parent 73247d0 commit 769b575
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 0 deletions.
10 changes: 10 additions & 0 deletions lib/index.rb
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ def with_lock
end
end

def sync_mappings
errors = {}
mappings.each do |type, mapping|
@client.indices.put_mapping(index: index_name, type:, body: mapping)
rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
errors[type] = e
end
errors
end

def add(documents, options = {})
logger.info "Adding #{documents.size} document(s) to #{index_name}"

Expand Down
1 change: 1 addition & 0 deletions lib/rummager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@
require "legacy_client/index_for_search"
require "legacy_client/multivalue_converter"
require "schema_migrator"
require "schema_synchroniser"
require "missing_metadata/fetcher"
require "missing_metadata/runner"
require "parameter_parser/base_parameter_parser"
Expand Down
15 changes: 15 additions & 0 deletions lib/schema_synchroniser.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
class SchemaSynchroniser
attr_reader :errors

def initialize(index_group)
@index = index_group.current
end

def sync
@errors = @index.sync_mappings
end

def synchronised_types
@index.mappings.keys.difference(@errors.keys)
end
end
22 changes: 22 additions & 0 deletions lib/tasks/indices.rake
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,28 @@ this task will run against all active clusters.
end
end

desc "Update the schema in place to reflect the current Search API configuration. This task is idempotent.
If there are changes to configuration that cannot be made to the live schema because the change is not applicable to
the existing data, you will need to run the \"migrate_schema\" task instead, which requires locking the index."
task :update_schema, [:clusters] do |_, args|
clusters_from_args(args).each do |cluster|
puts "Updating schema on cluster #{cluster.key}"

index_names.each do |index_name|
index_group = SearchConfig.instance(cluster).search_server.index_group(index_name)
synchroniser = SchemaSynchroniser.new(index_group)
synchroniser.sync
synchroniser.synchronised_types.each do |type|
puts "Successfully synchronised #{type} type on #{index_name} index"
end
synchroniser.errors.each do |type, exception|
puts "Unable to synchronise #{type} on #{index_name} due to #{exception.message}"
end
end
end
end

desc "Switches an index group to a new index WITHOUT transferring the data"
task :switch_to_empty_index, :clusters do |_, args|
# Note that this task will effectively clear out the index, so shouldn't be
Expand Down
16 changes: 16 additions & 0 deletions spec/integration/schema_synchroniser_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
require "spec_helper"

RSpec.describe SchemaSynchroniser do
before do
clean_index_content("govuk_test")
end

it "synchronises the current Elasticsearch index schema with the schema defined by the search API" do
index_group = search_server.index_group("govuk_test")
mappings = index_group.current.mappings
synchroniser = described_class.new(index_group)
synchroniser.sync
expect(synchroniser.synchronised_types).not_to be_empty
expect(synchroniser.synchronised_types).to eq(mappings.keys)
end
end
43 changes: 43 additions & 0 deletions spec/unit/index_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
require "spec_helper"

RSpec.describe SearchIndices::Index do
let(:base_uri) { "http://example.com:9200" }

it "syncs mappings to elasticsearch and returns any failures" do
mappings = {
"generic-document" => {
"properties" => {
"new-field" => { "type": "text" },
},
},
"failing-document" => {
"properties" => {
"invalid-field" => { "type": "text" },
},
},
}
stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/generic-document})
.with(body: mappings["generic-document"])

error_body = { "error" => {
"type" => "illegal_argument_exception",
"reason" => "invalid mapping",
} }.to_json
failing_stub = stub_request(:put, %r{#{base_uri}/govuk-abc/_mapping/failing-document})
.with(body: mappings["failing-document"])
.to_return({
status: 400,
body: error_body,
headers: { "Content-Type" => "application/json" },
})

index = SearchIndices::Index.new(base_uri, "govuk-abc", "govuk", mappings, SearchConfig.default_instance)

errors = index.sync_mappings

assert_requested stub
assert_requested failing_stub
expect(errors).not_to be_empty
expect(Elasticsearch::Transport::Transport::Errors::BadRequest.new("[400] #{error_body}").message).to eq(errors["failing-document"].message)
end
end

0 comments on commit 769b575

Please sign in to comment.