From 9c96cfdb7e05a2e785396c4e94856104d4f2aa6e Mon Sep 17 00:00:00 2001 From: Maciej Rzasa Date: Fri, 12 Feb 2021 14:28:46 +0100 Subject: [PATCH] Replace parent-child with a join field. Implement hierarchical structures with Elastic join field, in place of an obsolete (and removed) parent-child relationships. --- CHANGELOG.md | 3 +- README.md | 17 + lib/chewy/errors.rb | 6 + lib/chewy/fields/base.rb | 80 ++++- lib/chewy/fields/root.rb | 12 +- lib/chewy/index/adapter/active_record.rb | 5 + lib/chewy/index/adapter/orm.rb | 10 +- lib/chewy/index/import.rb | 3 +- lib/chewy/index/import/bulk_builder.rb | 250 ++++++++++++-- lib/chewy/search/request.rb | 4 +- spec/chewy/fields/base_spec.rb | 56 ++-- .../chewy/index/adapter/active_record_spec.rb | 26 ++ spec/chewy/index/import/bulk_builder_spec.rb | 304 ++++++++++++++++++ spec/chewy/index/import/routine_spec.rb | 6 +- spec/chewy/index/import_spec.rb | 40 +++ spec/support/active_record.rb | 7 + 16 files changed, 746 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f31347bee..5d9a3ae21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### New Features + * [#760](https://github.com/toptal/chewy/pull/760): Replace parent-child mapping with a [join field](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#parent-child-mapping-types) ([@mrzasa][]) + ### Changes ### Bugs Fixed @@ -723,4 +725,3 @@ [@Vitalina-Vakulchyk]: https://github.com/Vitalina-Vakulchyk [@webgago]: https://github.com/webgago [@yahooguntu]: https://github.com/yahooguntu - diff --git a/README.md b/README.md index f76ce18e2..dc670821f 100644 --- a/README.md +++ b/README.md @@ -446,6 +446,23 @@ end See the section on *Script fields* for details on calculating distance in a search. +### Join fields + +You can use a [join field](https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html) +to implement parent-child relationships between documents. +It [replaces the old `parent_id` based parent-child mapping](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html#parent-child-mapping-types) + +To use it, you need to pass `relations` and `join` (with `type` and `id`) options: +```ruby +field :hierarchy_link, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id} +``` +assuming you have `comment_type` and `commented_id` fields in your model. + +Note that when you reindex a parent, it's children and grandchildren will be reindexed as well. +This may require additional queries to the primary database and to elastisearch. + +Also note that the join field doesn't support crutches (it should be a field directly defined on the model). + ### Crutches™ technology Assume you are defining your index like this (product has_many categories through product_categories): diff --git a/lib/chewy/errors.rb b/lib/chewy/errors.rb index 870caf049..5b198ed26 100644 --- a/lib/chewy/errors.rb +++ b/lib/chewy/errors.rb @@ -30,4 +30,10 @@ def initialize(type, import_errors) super message end end + + class InvalidJoinFieldType < Error + def initialize(join_field_type, join_field_name, relations) + super("`#{join_field_type}` set for the join field `#{join_field_name}` is not on the :relations list (#{relations})") + end + end end diff --git a/lib/chewy/fields/base.rb b/lib/chewy/fields/base.rb index 18282bfa3..c94366feb 100644 --- a/lib/chewy/fields/base.rb +++ b/lib/chewy/fields/base.rb @@ -1,8 +1,8 @@ module Chewy module Fields class Base - attr_reader :name, :options, :value, :children - attr_accessor :parent + attr_reader :name, :join_options, :options, :children + attr_accessor :parent # used by Chewy::Index::Mapping to expand nested fields def initialize(name, value: nil, **options) @name = name.to_sym @@ -10,9 +10,11 @@ def initialize(name, value: nil, **options) update_options!(**options) @value = value @children = [] + @allowed_relations = find_allowed_relations(options[:relations]) # for join fields end def update_options!(**options) + @join_options = options.delete(:join) || {} @options = options end @@ -53,30 +55,70 @@ def compose(*objects) {name => result} end + def value + if join_field? + join_type = join_options[:type] + join_id = join_options[:id] + # memoize + @value ||= proc do |object| + validate_join_type!(value_by_name_proc(join_type).call(object)) + # If it's a join field and it has join_id, the value is compound and contains + # both name (type) and id of the parent object + if value_by_name_proc(join_id).call(object).present? + { + name: value_by_name_proc(join_type).call(object), # parent type + parent: value_by_name_proc(join_id).call(object) # parent id + } + else + value_by_name_proc(join_type).call(object) + end + end + else + @value + end + end + private def geo_point? @options[:type].to_s == 'geo_point' end + def join_field? + @options[:type].to_s == 'join' + end + def ignore_blank? @options.fetch(:ignore_blank) { geo_point? } end def evaluate(objects) - object = objects.first - if value.is_a?(Proc) - if value.arity.zero? - object.instance_exec(&value) - elsif value.arity.negative? - value.call(*object) - else - value.call(*objects.first(value.arity)) - end + value_by_proc(objects, value) else - message = value.is_a?(Symbol) || value.is_a?(String) ? value.to_sym : name + value_by_name(objects, value) + end + end + def value_by_proc(objects, value) + object = objects.first + if value.arity.zero? + object.instance_exec(&value) + elsif value.arity.negative? + value.call(*object) + else + value.call(*objects.first(value.arity)) + end + end + + def value_by_name(objects, value) + object = objects.first + message = value.is_a?(Symbol) || value.is_a?(String) ? value.to_sym : name + value_by_name_proc(message).call(object) + end + + def value_by_name_proc(message) + proc do |object| if object.is_a?(Hash) if object.key?(message) object[message] @@ -89,6 +131,20 @@ def evaluate(objects) end end + def validate_join_type!(type) + return unless type + return if @allowed_relations.include?(type.to_sym) + + raise Chewy::InvalidJoinFieldType.new(type, @name, options[:relations]) + end + + def find_allowed_relations(relations) + return [] unless relations + return relations unless relations.is_a?(Hash) + + (relations.keys + relations.values).flatten.uniq + end + def compose_children(value, *parent_objects) return unless value diff --git a/lib/chewy/fields/root.rb b/lib/chewy/fields/root.rb index decb1c929..d6e125ba8 100644 --- a/lib/chewy/fields/root.rb +++ b/lib/chewy/fields/root.rb @@ -1,7 +1,7 @@ module Chewy module Fields class Root < Chewy::Fields::Base - attr_reader :dynamic_templates, :id, :parent, :parent_id + attr_reader :dynamic_templates, :id def initialize(name, **options) super(name, **options) @@ -12,9 +12,7 @@ def initialize(name, **options) def update_options!(**options) @id = options.fetch(:id, options.fetch(:_id, @id)) - @parent = options.fetch(:parent, options.fetch(:_parent, @parent)) - @parent_id = options.fetch(:parent_id, @parent_id) - @options.merge!(options.except(:id, :_id, :parent, :_parent, :parent_id, :type)) + @options.merge!(options.except(:id, :_id, :type)) end def mappings_hash @@ -50,12 +48,6 @@ def dynamic_template(*args) end end - def compose_parent(object) - return unless parent_id - - parent_id.arity.zero? ? object.instance_exec(&parent_id) : parent_id.call(object) - end - def compose_id(object) return unless id diff --git a/lib/chewy/index/adapter/active_record.rb b/lib/chewy/index/adapter/active_record.rb index 7526e2f4a..b69e11b9a 100644 --- a/lib/chewy/index/adapter/active_record.rb +++ b/lib/chewy/index/adapter/active_record.rb @@ -94,6 +94,11 @@ def raw_default_scope_where_ids_in(ids, converter) object_class.connection.execute(sql).map(&converter) end + def raw(scope, converter) + sql = scope.to_sql + object_class.connection.execute(sql).map(&converter) + end + def relation_class ::ActiveRecord::Relation end diff --git a/lib/chewy/index/adapter/orm.rb b/lib/chewy/index/adapter/orm.rb index 7c04d9e9e..c86b6d06b 100644 --- a/lib/chewy/index/adapter/orm.rb +++ b/lib/chewy/index/adapter/orm.rb @@ -101,11 +101,13 @@ def load(ids, **options) additional_scope = options[options[:_index].to_sym].try(:[], :scope) || options[:scope] loaded_objects = load_scope_objects(scope, additional_scope) - .index_by do |object| - object.public_send(primary_key).to_s - end + loaded_objects = raw(loaded_objects, options[:raw_import]) if options[:raw_import] + + indexed_objects = loaded_objects.index_by do |object| + object.public_send(primary_key).to_s + end - ids.map { |id| loaded_objects[id.to_s] } + ids.map { |id| indexed_objects[id.to_s] } end private diff --git a/lib/chewy/index/import.rb b/lib/chewy/index/import.rb index a5aca8912..8ffd99535 100644 --- a/lib/chewy/index/import.rb +++ b/lib/chewy/index/import.rb @@ -36,8 +36,7 @@ module ClassMethods # passed objects from the index if they are not in the default scope # or marked for destruction. # - # It handles parent-child relationships: if the object parent_id has been - # changed it destroys the object and recreates it from scratch. + # It handles parent-child relationships with a join field reindexing children when the parent is reindexed. # # Performs journaling if enabled: it stores all the ids of the imported # objects to a specialized index. It is possible to replay particular import diff --git a/lib/chewy/index/import/bulk_builder.rb b/lib/chewy/index/import/bulk_builder.rb index c139761c0..c39a30cfd 100644 --- a/lib/chewy/index/import/bulk_builder.rb +++ b/lib/chewy/index/import/bulk_builder.rb @@ -4,7 +4,7 @@ module Import # This class purpose is to build ES client-acceptable bulk # request body from the passed objects for index and deletion. # It handles parent-child relationships as well by fetching - # existing documents from ES, taking their `_parent` field and + # existing documents from ES and database, taking their join field values and # using it in the bulk body. # If fields are passed - it creates partial update entries except for # the cases when the type has parent and parent_id has been changed. @@ -24,9 +24,11 @@ def initialize(index, to_index: [], delete: [], fields: []) # @see https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/bulk.rb # @return [Array] bulk body def bulk_body + populate_cache + @bulk_body ||= @to_index.flat_map(&method(:index_entry)).concat( @delete.flat_map(&method(:delete_entry)) - ) + ).uniq end # The only purpose of this method is to cache document ids for @@ -39,64 +41,250 @@ def index_objects_by_id private - def crutches - @crutches ||= Chewy::Index::Crutch::Crutches.new @index, @to_index - end - - def parents - return unless type_root.parent_id - - @parents ||= begin - ids = @index.map do |object| - object.respond_to?(:id) ? object.id : object - end - ids.concat(@delete.map do |object| - object.respond_to?(:id) ? object.id : object - end) - @index.filter(ids: {values: ids}).order('_doc').pluck(:_id, :_parent).to_h - end + def crutches_for_index + @crutches_for_index ||= Chewy::Index::Crutch::Crutches.new @index, @to_index end def index_entry(object) entry = {} entry[:_id] = index_object_ids[object] if index_object_ids[object] - if parents - entry[:parent] = type_root.compose_parent(object) - parent = entry[:_id].present? && parents[entry[:_id].to_s] - end + data = data_for(object) + parent = cache(entry[:_id]) - if parent && entry[:parent].to_s != parent - entry[:data] = @index.compose(object, crutches) - [{delete: entry.except(:data).merge(parent: parent)}, {index: entry}] + entry[:routing] = routing(object) if join_field? + if parent_changed?(data, parent) + reindex_entries(object, data) + reindex_descendants(object) elsif @fields.present? return [] unless entry[:_id] - entry[:data] = {doc: @index.compose(object, crutches, fields: @fields)} + entry[:data] = {doc: data_for(object, fields: @fields)} [{update: entry}] else - entry[:data] = @index.compose(object, crutches) + entry[:data] = data [{index: entry}] end end + def reindex_entries(object, data, root: object) + entry = {} + entry[:_id] = index_object_ids[object] || entry_id(object) + entry[:data] = data + entry[:routing] = routing(root) || routing(object) if join_field? + delete = delete_single_entry(object, root: root).first + index = {index: entry} + [delete, index] + end + + def reindex_descendants(root) + descendants = load_descendants(root) + crutches = Chewy::Index::Crutch::Crutches.new @index, [root, *descendants] + descendants.flat_map do |object| + reindex_entries( + object, + data_for(object, crutches: crutches), + root: root + ) + end + end + def delete_entry(object) + delete_single_entry(object) + end + + def delete_single_entry(object, root: object) entry = {} entry[:_id] = entry_id(object) entry[:_id] ||= object.as_json return [] if entry[:_id].blank? - if parents - parent = entry[:_id].present? && parents[entry[:_id].to_s] - return [] unless parent + if join_field? + cached_parent = cache(entry[:_id]) + entry_parent_id = + if cached_parent + cached_parent[:parent_id] + else + find_parent_id(object) + end - entry[:parent] = parent + entry[:routing] = existing_routing(root.try(:id)) || existing_routing(object.id) + entry[:parent] = entry_parent_id if entry_parent_id end [{delete: entry}] end + def load_descendants(root) + root_type = join_field_type(root) + return [] unless root_type + + descendant_ids = [] + grouped_parents = {root_type => [root.id]} + # iteratively fetch all the descendants (with grouped_parents as a queue for next iteration) + until grouped_parents.empty? + children_data = grouped_parents.flat_map do |parent_type, parent_ids| + @index.query( + has_parent: { + parent_type: parent_type, + # ignore_unmapped to avoid error for the leaves of the tree + # (types without children) + ignore_unmapped: true, + query: {ids: {values: parent_ids}} + } + ).pluck(:_id, join_field).map { |id, join| [join['name'], id] } + end + descendant_ids |= children_data.map(&:last) + + grouped_parents = {} + children_data.each do |name, id| + next unless name + + grouped_parents[name] ||= [] + grouped_parents[name] << id + end + end + # query the primary database to load the descentants' records + @index.adapter.load(descendant_ids, _index: @index.base_name, raw_import: @index._default_import_options[:raw_import]) + end + + def populate_cache + @cache = load_cache + end + + def cache(id) + @cache[id.to_s] + end + + def load_cache + return {} unless join_field? + + @index + .filter(ids: {values: ids_for_cache}) + .order('_doc') + .pluck(:_id, :_routing, join_field) + .map do |id, routing, join| + [ + id, + {routing: routing, parent_id: join['parent']} + ] + end.to_h + end + + def existing_routing(id) + # All objects needed here should be cached in #load_cache, + # if not, we return nil. In some cases we don't have existing routing cached, + # e.g. for loaded descendants + return unless cache(id) + + cache(id)[:routing] + end + + # Two types of ids: + # * of parents of the objects to be indexed + # * of objects to be deleted + def ids_for_cache + ids = @to_index.flat_map do |object| + [find_parent_id(object), object.id] if object.respond_to?(:id) + end + ids.concat(@delete.map do |object| + object.id if object.respond_to?(:id) + end) + ids.uniq.compact + end + + def routing(object) + # filter out non-model objects, early return on object==nil + return unless object.respond_to?(:id) + + parent_id = find_parent_id(object) + if parent_id + routing(index_objects_by_id[parent_id.to_s]) || existing_routing(parent_id) + else + object.id.to_s + end + end + + def find_parent_id(object) + return unless object.respond_to?(:id) + + join = data_for(object, fields: [join_field.to_sym])[join_field] + join['parent'] if join + end + + def join_field + return @join_field if defined?(@join_field) + + @join_field = find_join_field + end + + def find_join_field + type_settings = @index.mappings_hash[:mappings] + return unless type_settings + + properties = type_settings[:properties] + join_fields = properties.find { |_, options| options[:type] == :join } + return unless join_fields + + join_fields.first.to_s + end + + def join_field_type(object) + return unless join_field? + + raw_object = + if @index._default_import_options[:raw_import] + @index._default_import_options[:raw_import].call(object.attributes) + else + object + end + + join_field_value = data_for( + raw_object, + fields: [join_field.to_sym], # build only the field that is needed + crutches: Chewy::Index::Crutch::Crutches.new(@index, [raw_object]) + )[join_field] + + case join_field_value + when String + join_field_value + when Hash + join_field_value['name'] + end + end + + def join_field? + join_field && !join_field.empty? + end + + def data_for(object, fields: [], crutches: crutches_for_index) + @index.compose(object, crutches, fields: fields) + end + + def parent_changed?(data, old_parent) + return false unless old_parent + return false unless join_field? + return false unless @fields.include?(join_field.to_sym) + return false unless data.key?(join_field) + + # The join field value can be a hash, e.g.: + # {"name": "child", "parent": "123"} for a child + # {"name": "parent"} for a parent + # but it can also be a string: (e.g. "parent") for a parent: + # https://www.elastic.co/guide/en/elasticsearch/reference/current/parent-join.html#parent-join + new_join_field_value = data[join_field] + if new_join_field_value.is_a? Hash + # If we have a hash in the join field, + # we're taking the `parent` field that holds the parent id. + new_parent_id = new_join_field_value['parent'] + new_parent_id != old_parent[:parent_id] + else + # If there is a non-hash value (String or nil), it means that the join field is changed + # and the current object is no longer a child. + true + end + end + def entry_id(object) if type_root.id type_root.compose_id(object) diff --git a/lib/chewy/search/request.rb b/lib/chewy/search/request.rb index 8356c2874..558f7ee1c 100644 --- a/lib/chewy/search/request.rb +++ b/lib/chewy/search/request.rb @@ -18,7 +18,7 @@ class Request include Scoping include Scrolling UNDEFINED = Class.new.freeze - EVERFIELDS = %w[_index _type _id _parent].freeze + EVERFIELDS = %w[_index _type _id _parent _routing].freeze DELEGATED_METHODS = %i[ query filter post_filter order reorder docvalue_fields track_scores track_total_hits request_cache explain version profile @@ -914,7 +914,7 @@ def find(*ids) # Returns and array of values for specified fields. # Uses `source` to restrict the list of returned fields. - # Fields `_id`, `_type` and `_index` are also supported. + # Fields `_id`, `_type`, `_routing` and `_index` are also supported. # # @overload pluck(field) # If single field is passed - it returns and array of values. diff --git a/spec/chewy/fields/base_spec.rb b/spec/chewy/fields/base_spec.rb index 1d427e990..33b24b701 100644 --- a/spec/chewy/fields/base_spec.rb +++ b/spec/chewy/fields/base_spec.rb @@ -44,24 +44,9 @@ end context 'parent objects' do - let!(:country) do - described_class.new(:name, value: lambda { |country, crutches| - country.cities.map do |city| - double(districts: city.districts, name: crutches.city_name) - end - }) - end - let!(:city) do - described_class.new(:name, value: lambda { |city, country, crutches| - city.districts.map do |district| - [district, country.name, crutches.suffix] - end - }) - end - let(:district_value) { ->(district, city, country, crutches) { [district, city.name, country.name, crutches] } } - let!(:district) do - described_class.new(:name, value: district_value) - end + let!(:country) { described_class.new(:name, value: ->(country, crutches) { country.cities.map { |city| double(districts: city.districts, name: crutches.city_name) } }) } + let!(:city) { described_class.new(:name, value: ->(city, country, crutches) { city.districts.map { |district| [district, country.name, crutches.suffix] } }) } + let!(:district) { described_class.new(:name, value: ->(district, city, country, crutches) { [district, city.name, country.name, crutches] }) } let(:crutches) { double(suffix: 'suffix', city_name: 'Bangkok') } before do @@ -556,6 +541,41 @@ end end + context 'join field type' do + before do + stub_model(:comment) + stub_index(:comments) do + index_scope Comment + field :id + field :hierarchy, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id} + end + end + + specify do + expect( + CommentsIndex.root.compose( + {'id' => 1, 'comment_type' => 'question'} + ) + ).to eq( + {'id' => 1, 'hierarchy' => 'question'} + ) + + expect( + CommentsIndex.root.compose( + {'id' => 2, 'comment_type' => 'answer', 'commented_id' => 1} + ) + ).to eq( + {'id' => 2, 'hierarchy' => {'name' => 'answer', 'parent' => 1}} + ) + + expect do + CommentsIndex.root.compose( + {'id' => 2, 'comment_type' => 'asd', 'commented_id' => 1} + ) + end.to raise_error Chewy::InvalidJoinFieldType + end + end + context 'without ignore_blank option' do before do stub_index(:countries) do diff --git a/spec/chewy/index/adapter/active_record_spec.rb b/spec/chewy/index/adapter/active_record_spec.rb index 699e91e98..4f2f8b9ab 100644 --- a/spec/chewy/index/adapter/active_record_spec.rb +++ b/spec/chewy/index/adapter/active_record_spec.rb @@ -1,5 +1,11 @@ require 'spec_helper' +RawCity = Struct.new(:id) do + def rating + id * 10 + end +end + describe Chewy::Index::Adapter::ActiveRecord, :active_record do before do stub_model(:city) @@ -571,5 +577,25 @@ def delete_already? ).to eq(cities.first(2) + [nil]) end end + + context 'with raw_import option' do + subject { described_class.new(City) } + + let!(:cities) { Array.new(3) { |i| City.create!(rating: i / 2) } } + let(:city_ids) { cities.map(&:id) } + + let(:raw_import) { ->(hash) { RawCity.new(hash['id']) } } + it 'uses the custom loader' do + raw_cities = subject.load(city_ids, _index: 'cities', raw_import: raw_import).map do |c| + {id: c.id, rating: c.rating} + end + + expect(raw_cities).to eq([ + {id: 1, rating: 10}, + {id: 2, rating: 20}, + {id: 3, rating: 30} + ]) + end + end end end diff --git a/spec/chewy/index/import/bulk_builder_spec.rb b/spec/chewy/index/import/bulk_builder_spec.rb index c793af93f..c4b21ae22 100644 --- a/spec/chewy/index/import/bulk_builder_spec.rb +++ b/spec/chewy/index/import/bulk_builder_spec.rb @@ -1,5 +1,21 @@ require 'spec_helper' +SimpleComment = Class.new do + attr_reader :content, :comment_type, :commented_id, :updated_at, :id + + def initialize(hash) + @id = hash['id'] + @content = hash['content'] + @comment_type = hash['comment_type'] + @commented_id = hash['commented_id'] + @updated_at = hash['updated_at'] + end + + def derived + "[derived] #{content}" + end +end + describe Chewy::Index::Import::BulkBuilder do before { Chewy.massacre } @@ -169,6 +185,294 @@ end end end + + context 'with parents' do + let(:index) { CommentsIndex } + before do + stub_model(:comment) + stub_index(:comments) do + index_scope Comment + + crutch :content_with_crutches do |collection| # collection here is a current batch of products + collection.map { |comment| [comment.id, "[crutches] #{comment.content}"] }.to_h + end + + field :content + field :content_with_crutches, value: ->(comment, crutches) { crutches.content_with_crutches[comment.id] } + field :comment_type, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id} + end + end + + let!(:existing_comments) do + [ + Comment.create!(id: 1, content: 'Where is Nemo?', comment_type: :question), + Comment.create!(id: 2, content: 'Here.', comment_type: :answer, commented_id: 1), + Comment.create!(id: 31, content: 'What is the best programming language?', comment_type: :question) + ] + end + + def do_raw_index_comment(options:, data:) + CommentsIndex.client.index(options.merge(index: 'comments', type: '_doc', refresh: true, body: data)) + end + + def raw_index_comment(comment) + options = {id: comment.id, routing: root(comment).id} + comment_type = comment.commented_id.present? ? {name: comment.comment_type, parent: comment.commented_id} : comment.comment_type + do_raw_index_comment( + options: options, + data: {content: comment.content, comment_type: comment_type} + ) + end + + def root(comment) + current = comment + # slow, but it's OK, as we don't have too deep trees + current = Comment.find(current.commented_id) while current.commented_id + current + end + + before do + CommentsIndex.reset! # initialize index + end + + let(:comments) do + [ + Comment.create!(id: 3, content: 'There!', comment_type: :answer, commented_id: 1), + Comment.create!(id: 4, content: 'Yes, he is here.', comment_type: :vote, commented_id: 2), + + Comment.create!(id: 11, content: 'What is the sense of the universe?', comment_type: :question), + Comment.create!(id: 12, content: 'I don\'t know.', comment_type: :answer, commented_id: 11), + Comment.create!(id: 13, content: '42', comment_type: :answer, commented_id: 11), + Comment.create!(id: 14, content: 'I think that 42 is a correct answer', comment_type: :vote, commented_id: 13), + + Comment.create!(id: 21, content: 'How are you?', comment_type: :question), + + Comment.create!(id: 32, content: 'Ruby', comment_type: :answer, commented_id: 31) + ] + end + + context 'when indexing a single object' do + let(:to_index) { [comments[0]] } + + specify do + expect(subject.bulk_body).to eq([ + {index: {_id: 3, routing: '1', data: {'content' => 'There!', 'content_with_crutches' => '[crutches] There!', 'comment_type' => {'name' => 'answer', 'parent' => 1}}}} + ]) + end + end + + context 'with raw import' do + before do + stub_index(:comments) do + index_scope Comment + default_import_options raw_import: ->(hash) { SimpleComment.new(hash) } + + crutch :content_with_crutches do |collection| # collection here is a current batch of products + collection.map { |comment| [comment.id, "[crutches] #{comment.content}"] }.to_h + end + + field :content + field :content_with_crutches, value: ->(comment, crutches) { crutches.content_with_crutches[comment.id] } + field :derived + field :comment_type, type: :join, relations: {question: %i[answer comment], answer: :vote, vote: :subvote}, join: {type: :comment_type, id: :commented_id} + end + end + + let(:to_index) { [comments[0]].map { |c| SimpleComment.new(c.attributes) } } # id: 3 + let(:delete) { [existing_comments[0]].map { |c| c } } # id: 1 + + specify do + expected_data = {'content' => 'There!', 'content_with_crutches' => '[crutches] There!', 'derived' => '[derived] There!', 'comment_type' => {'name' => 'answer', 'parent' => 1}} + expect(subject.bulk_body).to eq([ + {index: {_id: 3, routing: '1', data: expected_data}}, + {delete: {_id: 1, routing: '1'}} + ]) + end + end + + context 'when switching parents' do + let(:switching_parent_comment) { comments[0].tap { |c| c.update!(commented_id: 31) } } # id: 3 + let(:removing_parent_comment) { comments[1].tap { |c| c.update!(commented_id: nil, comment_type: nil) } } # id: 4 + let(:converting_to_parent_comment) { comments[3].tap { |c| c.update!(commented_id: nil, comment_type: :question) } } # id: 12 + let(:converting_to_child_comment) { comments[6].tap { |c| c.update!(commented_id: 1, comment_type: :answer) } } # id: 21 + let(:fields) { %w[commented_id comment_type] } + + let(:to_index) { [switching_parent_comment, removing_parent_comment, converting_to_parent_comment, converting_to_child_comment] } + + before do + existing_comments.each { |c| raw_index_comment(c) } + comments.each { |c| raw_index_comment(c) } + end + + specify do + expect(subject.bulk_body).to eq([ + {delete: {_id: 3, routing: '1', parent: 1}}, + {index: {_id: 3, routing: '31', data: {'content' => 'There!', 'content_with_crutches' => '[crutches] There!', 'comment_type' => {'name' => 'answer', 'parent' => 31}}}}, + {delete: {_id: 4, routing: '1', parent: 2}}, + {index: {_id: 4, routing: '4', data: {'content' => 'Yes, he is here.', 'content_with_crutches' => '[crutches] Yes, he is here.', 'comment_type' => nil}}}, + {delete: {_id: 12, routing: '11', parent: 11}}, + {index: {_id: 12, routing: '12', data: {'content' => 'I don\'t know.', 'content_with_crutches' => '[crutches] I don\'t know.', 'comment_type' => 'question'}}}, + {delete: {_id: 21, routing: '21'}}, + {index: {_id: 21, routing: '1', data: {'content' => 'How are you?', 'content_with_crutches' => '[crutches] How are you?', 'comment_type' => {'name' => 'answer', 'parent' => 1}}}} + ]) + end + end + + context 'when indexing with grandparents' do + let(:comments) do + [ + Comment.create!(id: 3, content: 'Yes, he is here.', comment_type: :vote, commented_id: 2), + Comment.create!(id: 4, content: 'What?', comment_type: :subvote, commented_id: 3) + ] + end + let(:to_index) { comments } + + before do + existing_comments.each { |c| raw_index_comment(c) } + end + + specify do + expected_data3 = {'content' => 'Yes, he is here.', 'content_with_crutches' => '[crutches] Yes, he is here.', 'comment_type' => {'name' => 'vote', 'parent' => 2}} + expected_data4 = {'content' => 'What?', 'content_with_crutches' => '[crutches] What?', 'comment_type' => {'name' => 'subvote', 'parent' => 3}} + expect(subject.bulk_body).to eq([ + {index: {_id: 3, routing: '1', data: expected_data3}}, + {index: {_id: 4, routing: '1', data: expected_data4}} + ]) + end + end + + context 'when switching grandparents' do + let(:comments) do + [ + Comment.create!(id: 3, content: 'Yes, he is here.', comment_type: :vote, commented_id: 2), + Comment.create!(id: 4, content: 'What?', comment_type: :subvote, commented_id: 3) + ] + end + let(:switching_parent_comment) { existing_comments[1].tap { |c| c.update!(commented_id: 31) } } # id: 2 + let(:fields) { %w[commented_id comment_type] } + let(:to_index) { [switching_parent_comment] } + + before do + existing_comments.each { |c| raw_index_comment(c) } + comments.each { |c| raw_index_comment(c) } + end + + it 'reindexes children and grandchildren' do + expected_data2 = {'content' => 'Here.', 'content_with_crutches' => '[crutches] Here.', 'comment_type' => {'name' => 'answer', 'parent' => 31}} + expected_data3 = {'content' => 'Yes, he is here.', 'content_with_crutches' => '[crutches] Yes, he is here.', 'comment_type' => {'name' => 'vote', 'parent' => 2}} + expected_data4 = {'content' => 'What?', 'content_with_crutches' => '[crutches] What?', 'comment_type' => {'name' => 'subvote', 'parent' => 3}} + expect(subject.bulk_body).to eq([ + {delete: {_id: 2, routing: '1', parent: 1}}, + {index: {_id: 2, routing: '31', data: expected_data2}}, + {delete: {_id: 3, routing: '1', parent: 2}}, + {index: {_id: 3, routing: '31', data: expected_data3}}, + {delete: {_id: 4, routing: '1', parent: 3}}, + {index: {_id: 4, routing: '31', data: expected_data4}} + ]) + end + end + + describe 'when removing parents or grandparents' do + let(:comments) do + [ + Comment.create!(id: 3, content: 'Yes, he is here.', comment_type: :vote, commented_id: 2), + Comment.create!(id: 4, content: 'What?', comment_type: :subvote, commented_id: 3) + ] + end + let(:delete) { [existing_comments[0]] } # id: 1 + + before do + existing_comments.each { |c| raw_index_comment(c) } + comments.each { |c| raw_index_comment(c) } + end + + it 'does not remove all descendants' do + expect(subject.bulk_body).to eq([ + {delete: {_id: 1, routing: '1'}} + ]) + end + end + + context 'when indexing' do + let(:to_index) { comments } + + specify do + expected_data3 = {'content' => 'There!', 'content_with_crutches' => '[crutches] There!', 'comment_type' => {'name' => 'answer', 'parent' => 1}} + expected_data4 = {'content' => 'Yes, he is here.', 'content_with_crutches' => '[crutches] Yes, he is here.', 'comment_type' => {'name' => 'vote', 'parent' => 2}} + + expected_data11 = {'content' => 'What is the sense of the universe?', 'content_with_crutches' => '[crutches] What is the sense of the universe?', 'comment_type' => 'question'} + expected_data12 = {'content' => 'I don\'t know.', 'content_with_crutches' => '[crutches] I don\'t know.', 'comment_type' => {'name' => 'answer', 'parent' => 11}} + expected_data13 = {'content' => '42', 'content_with_crutches' => '[crutches] 42', 'comment_type' => {'name' => 'answer', 'parent' => 11}} + expected_data14 = {'content' => 'I think that 42 is a correct answer', 'content_with_crutches' => '[crutches] I think that 42 is a correct answer', + 'comment_type' => {'name' => 'vote', 'parent' => 13}} + + expected_data21 = {'content' => 'How are you?', 'content_with_crutches' => '[crutches] How are you?', 'comment_type' => 'question'} + + expected_data32 = {'content' => 'Ruby', 'content_with_crutches' => '[crutches] Ruby', 'comment_type' => {'name' => 'answer', 'parent' => 31}} + + expect(subject.bulk_body).to eq([ + {index: {_id: 3, routing: '1', data: expected_data3}}, + {index: {_id: 4, routing: '1', data: expected_data4}}, + + {index: {_id: 11, routing: '11', data: expected_data11}}, + {index: {_id: 12, routing: '11', data: expected_data12}}, + {index: {_id: 13, routing: '11', data: expected_data13}}, + {index: {_id: 14, routing: '11', data: expected_data14}}, + + {index: {_id: 21, routing: '21', data: expected_data21}}, + + {index: {_id: 32, routing: '31', data: expected_data32}} + ]) + end + end + + context 'when deleting' do + before do + existing_comments.each { |c| raw_index_comment(c) } + comments.each { |c| raw_index_comment(c) } + end + + let(:delete) { comments } + specify do + expect(subject.bulk_body).to eq([ + {delete: {_id: 3, routing: '1', parent: 1}}, + {delete: {_id: 4, routing: '1', parent: 2}}, + + {delete: {_id: 11, routing: '11'}}, + {delete: {_id: 12, routing: '11', parent: 11}}, + {delete: {_id: 13, routing: '11', parent: 11}}, + {delete: {_id: 14, routing: '11', parent: 13}}, + + {delete: {_id: 21, routing: '21'}}, + + {delete: {_id: 32, routing: '31', parent: 31}} + ]) + end + end + + context 'when updating' do + before do + comments.each { |c| raw_index_comment(c) } + end + let(:fields) { %w[content] } + let(:to_index) { comments } + specify do + expect(subject.bulk_body).to eq([ + {update: {_id: 3, routing: '1', data: {doc: {'content' => comments[0].content}}}}, + {update: {_id: 4, routing: '1', data: {doc: {'content' => comments[1].content}}}}, + + {update: {_id: 11, routing: '11', data: {doc: {'content' => comments[2].content}}}}, + {update: {_id: 12, routing: '11', data: {doc: {'content' => comments[3].content}}}}, + {update: {_id: 13, routing: '11', data: {doc: {'content' => comments[4].content}}}}, + {update: {_id: 14, routing: '11', data: {doc: {'content' => comments[5].content}}}}, + + {update: {_id: 21, routing: '21', data: {doc: {'content' => comments[6].content}}}}, + + {update: {_id: 32, routing: '31', data: {doc: {'content' => comments[7].content}}}} + ]) + end + end + end end describe '#index_objects_by_id' do diff --git a/spec/chewy/index/import/routine_spec.rb b/spec/chewy/index/import/routine_spec.rb index 40eb3d7ae..5d1064b7b 100644 --- a/spec/chewy/index/import/routine_spec.rb +++ b/spec/chewy/index/import/routine_spec.rb @@ -11,8 +11,8 @@ CitiesIndex.create! end - let(:index) { [double(id: 1, name: 'Name', object: {}), double(id: 2, name: 'Name', object: {})] } - let(:delete) { [double(id: 3, name: 'Name')] } + let(:index) { [double('city_1', id: 1, name: 'Name', object: {}), double('city_2', id: 2, name: 'Name', object: {})] } + let(:delete) { [double('city_3', id: 3, name: 'Name', object: {})] } describe '#options' do specify do @@ -93,7 +93,7 @@ describe '#errors' do subject { described_class.new(CitiesIndex) } - let(:index) { [double(id: 1, name: 'Name', object: ''), double(id: 2, name: 'Name', object: {})] } + let(:index) { [double('city_1', id: 1, name: 'Name', object: ''), double('city_2', id: 2, name: 'Name', object: {})] } specify { expect(subject.errors).to eq([]) } specify do diff --git a/spec/chewy/index/import_spec.rb b/spec/chewy/index/import_spec.rb index ca7d79e89..ddd0cd76b 100644 --- a/spec/chewy/index/import_spec.rb +++ b/spec/chewy/index/import_spec.rb @@ -493,6 +493,46 @@ def import(*args) it_behaves_like 'importing' end + + context 'with parent-child relationship' do + before do + stub_model(:comment) + stub_index(:comments) do + index_scope Comment + field :content + field :comment_type, type: :join, relations: {question: %i[answer comment], answer: :vote}, join: {type: :comment_type, id: :commented_id} + end + end + + let!(:comments) do + [ + Comment.create!(id: 1, content: 'Where is Nemo?', comment_type: :question), + Comment.create!(id: 2, content: 'Here.', comment_type: :answer, commented_id: 1), + Comment.create!(id: 3, content: 'There!', comment_type: :answer, commented_id: 1), + Comment.create!(id: 4, content: 'Yes, he is here.', comment_type: :vote, commented_id: 2) + ] + end + + def imported_comments + CommentsIndex.all.map do |comment| + comment.attributes.except('_score', '_explanation') + end + end + + it 'imports parent and children' do + CommentsIndex.import!(comments.map(&:id)) + + expect(imported_comments).to match_array([ + {'id' => '1', 'content' => 'Where is Nemo?', 'comment_type' => 'question'}, + {'id' => '2', 'content' => 'Here.', 'comment_type' => {'name' => 'answer', 'parent' => 1}}, + {'id' => '3', 'content' => 'There!', 'comment_type' => {'name' => 'answer', 'parent' => 1}}, + {'id' => '4', 'content' => 'Yes, he is here.', 'comment_type' => {'name' => 'vote', 'parent' => 2}} + ]) + + answer_ids = CommentsIndex.query(has_parent: {parent_type: 'question', query: {match: {content: 'Where'}}}).pluck(:_id) + expect(answer_ids).to match_array(%w[2 3]) + end + end end describe '.import!', :orm do diff --git a/spec/support/active_record.rb b/spec/support/active_record.rb index 45c3c501b..4c9c72b9f 100644 --- a/spec/support/active_record.rb +++ b/spec/support/active_record.rb @@ -31,6 +31,13 @@ t.column :lat, :string t.column :lon, :string end + + create_table :comments do |t| + t.column :content, :string + t.column :comment_type, :string + t.column :commented_id, :integer + t.column :updated_at, :datetime + end end module ActiveRecordClassHelpers