Skip to content
This repository has been archived by the owner on Mar 21, 2018. It is now read-only.

Commit

Permalink
feat(lazy): Adding lazy update feature
Browse files Browse the repository at this point in the history
  • Loading branch information
pixelastic committed Jul 13, 2016
1 parent 769c3e4 commit 16e7b74
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 41 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ algolia:
record_css_selector: 'p,ul'
```

#### `lazy_update`

`false`: The plugin will push all the records to a temporary index and once
everything is pushed will override the current index with it. This is the most
straightforward way and will ensure that all the changes happen in one move. You
either search in the old data, or in the new data. This is the default value.

`true`: With `lazy_update` enabled, the plugin will try to reduce the number of
calls done to the API, to consume less operations on your quota. It will get
a list of all the records in your index and all the records ready to be pushed.
It will compare both and push the new while deleting the old. In most cases it
should consume less operations, but the changes won't be atomic (ie. you might
have your index in an hybrid state, with old records not yet removed and/or new
records not yet added for a couple of minutes).

#### `settings`

Here you can pass any specific [index settings][5] to your Algolia index. All
Expand Down
66 changes: 60 additions & 6 deletions lib/push.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,21 @@ def init_options(args = [], options = {}, config = {})
@args = args
@options = options
@config = config
@checker = AlgoliaSearchCredentialChecker.new(@config)
@is_verbose = @config['verbose']
@is_dry_run = @config['dry_run']
@is_lazy_update = lazy_update?

self
end

# Check if the lazy update feature is enabled or not (default to false)
def lazy_update?
return false unless @config['algolia']
return true if @config['algolia']['lazy_update']
false
end

# Check if the specified file should be indexed (we exclude static files,
# robots.txt and custom defined exclusions).
def indexable?(file)
Expand Down Expand Up @@ -187,19 +196,64 @@ def batch_add_items(items, index)
end
end

def push(items)
AlgoliaSearchCredentialChecker.new(@config).assert_valid

Jekyll.logger.info '=== DRY RUN ===' if @is_dry_run

# Greedy update will push all the records to a temporary index, then
# override the existing index with this temp one
def greedy_update(items)
# Add items to a temp index, then rename it
index_name = @config['algolia']['index_name']
index_name = @checker.index_name
index_name_tmp = "#{index_name}_tmp"
batch_add_items(items, create_index(index_name_tmp))
Algolia.move_index(index_name_tmp, index_name) unless @is_dry_run

Jekyll.logger.info "Indexing of #{items.size} items " \
"in #{index_name} done."
end

# Lazy update will minimize the number of operations by only pushing new
# data and deleting old data
def lazy_update(items)
index = create_index(@checker.index_name)
remote = remote_ids(index)
local = items.map { |item| item[:objectID] }

delete_remote_not_in_local(index, local, remote)

add_local_not_in_remote(index, items, local, remote)
end

# Array of all objectID in the remote index
def remote_ids(index)
list = []
index.browse(attributesToRetrieve: 'objectID') do |hit|
list << hit['objectID']
end
list
end

# Delete all remote items that are no longer in the local items
def delete_remote_not_in_local(index, local, remote)
list = remote - local
Jekyll.logger.info "Deleting #{list.size} items"
index.delete_objects!(list) unless list.empty?
end

# Push all local items that are not yet in the index
def add_local_not_in_remote(index, items, local, remote)
list = local - remote
return Jekyll.logger.info "Adding #{list.size} items" if list.empty?
items_to_push = items.select do |item|
list.include?(item[:objectID])
end
batch_add_items(items_to_push, index)
end

def push(items)
checker = AlgoliaSearchCredentialChecker.new(@config)
checker.assert_valid

Jekyll.logger.info '=== DRY RUN ===' if @is_dry_run

@is_lazy_update ? lazy_update(items) : greedy_update(items)
end
end
end
2 changes: 2 additions & 0 deletions lib/record_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def extract
nokogiri_node = raw_item[:node]
raw_item.delete(:node)
item = shared_attributes.merge(raw_item)
item[:objectID] = item[:uuid]
item.delete(:uuid)

item = custom_hook_each(item, nokogiri_node)
next if item.nil?
Expand Down
204 changes: 190 additions & 14 deletions spec/push_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
end

before(:each) do
mock_logger
allow(Jekyll.logger).to receive(:info)
allow(Jekyll.logger).to receive(:warn)
allow(Jekyll.logger).to receive(:error)
end

describe 'init_options' do
Expand All @@ -44,6 +46,35 @@
end
end

describe 'lazy_update?' do
it 'should return false by default' do
# Given
push.init_options(nil, {}, {})

# When
actual = push.lazy_update?

# Then
expect(actual).to eq false
end

it 'should return true if such an option is set in the config' do
# Given
config = {
'algolia' => {
'lazy_update' => true
}
}
push.init_options(nil, {}, config)

# When
actual = push.lazy_update?

# Then
expect(actual).to eq true
end
end

describe 'indexable?' do
it 'exclude StaticFiles' do
expect(push.indexable?(static_file)).to eq false
Expand Down Expand Up @@ -166,9 +197,6 @@ def push.custom_hook_excluded_file?(_file)
@error_handler_double = double('Error Handler double').as_null_object
push.init_options(nil, {}, {})
allow(@index_double).to receive(:set_settings).and_raise
# Do not really log the errors/warnings on screen
allow(Jekyll.logger).to receive(:error)
allow(Jekyll.logger).to receive(:warn)
end

it 'stops if API throw an error' do
Expand Down Expand Up @@ -226,7 +254,6 @@ def push.custom_hook_excluded_file?(_file)
describe 'jekyll_new' do
it 'should return a patched version of site with a custom write' do
# Given
allow(Jekyll.logger).to receive(:warn)
normal_site = Jekyll::Site.new(Jekyll.configuration)
normal_method = normal_site.method(:write).source_location

Expand Down Expand Up @@ -291,6 +318,53 @@ def push.custom_hook_excluded_file?(_file)
end

describe 'push' do
before(:each) do
allow_any_instance_of(AlgoliaSearchCredentialChecker)
.to receive(:assert_valid)
end

it 'should do a lazy update if such is configured' do
# Given
allow(push).to receive(:lazy_update?).and_return(true)
allow(push).to receive(:lazy_update)
push.init_options(nil, {}, {})
items = ['foo']

# When
push.push(items)

# Then
expect(push).to have_received(:lazy_update).with(items)
end

it 'should do a greedy update if such is configured' do
# Given
allow(push).to receive(:greedy_update?).and_return(true)
allow(push).to receive(:greedy_update)
push.init_options(nil, {}, {})
items = ['foo']

# When
push.push(items)

# Then
expect(push).to have_received(:greedy_update).with(items)
end
end

describe 'batch_add_items' do
it 'should display an error if `add_objects!` failed' do
# Given
index = double('Algolia Index').as_null_object
allow(index).to receive(:add_objects!).and_raise

# When / Then
expect(-> { push.batch_add_items(items, index) })
.to raise_error SystemExit
end
end

describe 'greedy_update' do
let(:index_double) { double('Algolia Index').as_null_object }
let(:config) do
{
Expand All @@ -302,21 +376,19 @@ def push.custom_hook_excluded_file?(_file)

before(:each) do
push.init_options(nil, {}, config)
# Mock all calls to not send anything
allow_any_instance_of(AlgoliaSearchCredentialChecker)
.to receive(:assert_valid)
allow(Algolia).to receive(:set_extra_header)
allow(Algolia).to receive(:init)
allow(Algolia).to receive(:move_index)
allow(Algolia::Index).to receive(:new).and_return(index_double)
allow(Jekyll.logger).to receive(:info)
end

it 'should create a temporary index' do
# Given

# When
push.push(items)
push.greedy_update(items)

# Then
expect(Algolia::Index).to have_received(:new).with('INDEXNAME_tmp')
Expand Down Expand Up @@ -352,17 +424,121 @@ def push.custom_hook_excluded_file?(_file)
# Then
expect(Jekyll.logger).to have_received(:info).with(/of 2 items/i)
end
end

it 'should display an error if `add_objects!` failed' do
describe 'lazy_update' do
let(:items) do
[
{ objectID: 'foo' },
{ objectID: 'baz' }
]
end
let(:remote) { %w(foo bar) }
let(:local) { %w(foo baz) }
let(:index) { double.as_null_object }

describe 'remote_ids' do
it 'should call browse on the index with the attributesToRetrieve ' do
# Given
index = double.as_null_object

# Then
push.remote_ids(index)

# Then
expect(index).to have_received(:browse)
end

it 'should return an array of all objectID returned by browse' do
# Given
index = double.as_null_object
hit1 = { 'objectID' => 'foo' }
hit2 = { 'objectID' => 'bar' }
allow(index).to receive(:browse).and_yield(hit1).and_yield(hit2)

# Then
actual = push.remote_ids(index)

# Then
expect(actual).to eq %w(foo bar)
end
end

describe 'delete_remote_not_in_local' do
it 'calls delete_objects! with the array of items to delete' do
# Given

# When
push.delete_remote_not_in_local(index, local, remote)

# Then
expect(index).to have_received(:delete_objects!).with(['bar'])
end

it 'displays the number of items deleted' do
# Given

# When
push.delete_remote_not_in_local(index, local, remote)

# Then
expect(Jekyll.logger).to have_received(:info).with('Deleting 1 items')
end

it 'should do not do an API call if there is nothing to delete' do
# Given
input = %w(foo bar)

# When
push.delete_remote_not_in_local(index, input, input)

# Then
expect(index).not_to have_received(:delete_objects!)
end
end

describe 'add_local_not_in_remote' do
it 'should push all local items not yet in remote' do
# Given
allow(push).to receive(:batch_add_items)

# When
push.add_local_not_in_remote(index, items, local, remote)

# Then
expected = [{ objectID: 'baz' }]
expect(push).to have_received(:batch_add_items).with(expected, index)
end

it 'should warn about pushing 0 records' do
# Given
input = %w(foo bar)

# When
push.add_local_not_in_remote(index, items, input, input)

# Then
expect(Jekyll.logger)
.to have_received(:info).with('Adding 0 items')
end
end

it 'should delete items from remote and push new ones' do
# Given
allow(index_double).to receive(:add_objects!).and_raise
allow(Jekyll.logger).to receive(:error)
allow(Jekyll.logger).to receive(:warn)
allow(push).to receive(:create_index).and_return(index)
allow(push).to receive(:remote_ids).and_return(remote)
allow(push).to receive(:delete_remote_not_in_local)
allow(push).to receive(:add_local_not_in_remote)
push.init_options(nil, {}, {})

# When
push.lazy_update(items)

# Then
expect(Jekyll.logger).to receive(:error)
expect(-> { push.push(items) }).to raise_error SystemExit
expect(push).to have_received(:delete_remote_not_in_local)
.with(index, local, remote)
expect(push).to have_received(:add_local_not_in_remote)
.with(index, items, local, remote)
end
end
end
Loading

0 comments on commit 16e7b74

Please sign in to comment.