229 lines
4.9 KiB
Ruby
229 lines
4.9 KiB
Ruby
|
|
class Indexer
|
||
|
|
BATCH_SIZE = 1000
|
||
|
|
INDEXERS_FOR_CLASS = {
|
||
|
|
Work: %w[WorkIndexer WorkCreatorIndexer BookmarkedWorkIndexer],
|
||
|
|
Bookmark: %w[BookmarkIndexer],
|
||
|
|
Tag: %w[TagIndexer],
|
||
|
|
Pseud: %w[PseudIndexer],
|
||
|
|
Series: %w[BookmarkedSeriesIndexer],
|
||
|
|
ExternalWork: %w[BookmarkedExternalWorkIndexer],
|
||
|
|
User: %w[UserIndexer]
|
||
|
|
}.freeze
|
||
|
|
|
||
|
|
delegate :klass, :klass_with_includes, :index_name, :document_type, to: :class
|
||
|
|
|
||
|
|
##################
|
||
|
|
# CLASS METHODS
|
||
|
|
##################
|
||
|
|
|
||
|
|
def self.klass
|
||
|
|
raise "Must be defined in subclass"
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.klass_with_includes
|
||
|
|
klass.constantize
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.all
|
||
|
|
[
|
||
|
|
BookmarkedExternalWorkIndexer,
|
||
|
|
BookmarkedSeriesIndexer,
|
||
|
|
BookmarkedWorkIndexer,
|
||
|
|
BookmarkIndexer,
|
||
|
|
PseudIndexer,
|
||
|
|
TagIndexer,
|
||
|
|
UserIndexer,
|
||
|
|
WorkIndexer,
|
||
|
|
WorkCreatorIndexer
|
||
|
|
]
|
||
|
|
end
|
||
|
|
|
||
|
|
# Originally added to allow IndexSweeper to find the Elasticsearch document
|
||
|
|
# ids when they do not match the associated ActiveRecord objects' ids.
|
||
|
|
#
|
||
|
|
# Override in subclasses if necessary.
|
||
|
|
def self.find_elasticsearch_ids(ids)
|
||
|
|
ids
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.delete_index
|
||
|
|
if $elasticsearch.indices.exists(index: index_name)
|
||
|
|
$elasticsearch.indices.delete(index: index_name)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.create_index(shards: 5)
|
||
|
|
$elasticsearch.indices.create(
|
||
|
|
index: index_name,
|
||
|
|
body: {
|
||
|
|
settings: {
|
||
|
|
index: {
|
||
|
|
# static settings
|
||
|
|
number_of_shards: shards,
|
||
|
|
# dynamic settings
|
||
|
|
max_result_window: ArchiveConfig.MAX_SEARCH_RESULTS,
|
||
|
|
}
|
||
|
|
}.merge(settings),
|
||
|
|
mappings: mapping,
|
||
|
|
}
|
||
|
|
)
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.prepare_for_testing
|
||
|
|
raise "Wrong environment for test prep!" unless Rails.env.test?
|
||
|
|
|
||
|
|
delete_index
|
||
|
|
# Relevance sorting is unpredictable with multiple shards
|
||
|
|
# and small amounts of data
|
||
|
|
create_index(shards: 1)
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.refresh_index
|
||
|
|
# Refreshes are resource-intensive, we use them only in tests.
|
||
|
|
raise "Wrong environment for index refreshes!" unless Rails.env.test?
|
||
|
|
|
||
|
|
return unless $elasticsearch.indices.exists(index: index_name)
|
||
|
|
|
||
|
|
$elasticsearch.indices.refresh(index: index_name)
|
||
|
|
end
|
||
|
|
|
||
|
|
# Note that the index must exist before you can set the mapping
|
||
|
|
def self.create_mapping
|
||
|
|
$elasticsearch.indices.put_mapping(
|
||
|
|
index: index_name,
|
||
|
|
body: mapping
|
||
|
|
)
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.mapping
|
||
|
|
{
|
||
|
|
properties: {
|
||
|
|
# add properties in subclasses
|
||
|
|
}
|
||
|
|
}
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.settings
|
||
|
|
{
|
||
|
|
analyzer: {
|
||
|
|
custom_analyzer: {
|
||
|
|
# add properties in subclasses
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.index_all(options = {})
|
||
|
|
unless options[:skip_delete]
|
||
|
|
delete_index
|
||
|
|
create_index
|
||
|
|
end
|
||
|
|
index_from_db
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.index_from_db
|
||
|
|
total = (indexables.count / BATCH_SIZE) + 1
|
||
|
|
i = 1
|
||
|
|
indexables.find_in_batches(batch_size: BATCH_SIZE) do |group|
|
||
|
|
puts "Queueing #{klass} batch #{i} of #{total}" unless Rails.env.test?
|
||
|
|
AsyncIndexer.new(self, :world).enqueue_ids(group.map(&:id))
|
||
|
|
i += 1
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
# Add conditions here
|
||
|
|
def self.indexables
|
||
|
|
klass.constantize
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.index_name
|
||
|
|
"#{ArchiveConfig.ELASTICSEARCH_PREFIX}_#{Rails.env}_#{klass.underscore.pluralize}"
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.document_type
|
||
|
|
klass.underscore
|
||
|
|
end
|
||
|
|
|
||
|
|
# Given a searchable object, what indexers should handle it?
|
||
|
|
# Returns an array of indexers
|
||
|
|
def self.for_object(object)
|
||
|
|
name = object.is_a?(Tag) ? 'Tag' : object.class.to_s
|
||
|
|
(INDEXERS_FOR_CLASS[name.to_sym] || []).map(&:constantize)
|
||
|
|
end
|
||
|
|
|
||
|
|
# Should be called after a batch update, with the IDs that were successfully
|
||
|
|
# updated. Calls successful_reindex on the indexable class.
|
||
|
|
def self.handle_success(ids)
|
||
|
|
if indexables.respond_to?(:successful_reindex)
|
||
|
|
indexables.successful_reindex(ids)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
####################
|
||
|
|
# INSTANCE METHODS
|
||
|
|
####################
|
||
|
|
|
||
|
|
attr_reader :ids
|
||
|
|
|
||
|
|
def initialize(ids)
|
||
|
|
@ids = ids
|
||
|
|
end
|
||
|
|
|
||
|
|
def objects
|
||
|
|
@objects ||= klass_with_includes.where(id: ids).inject({}) do |h, obj|
|
||
|
|
h.merge(obj.id => obj)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def batch
|
||
|
|
return @batch if @batch
|
||
|
|
|
||
|
|
@batch = []
|
||
|
|
ids.each do |id|
|
||
|
|
object = objects[id.to_i]
|
||
|
|
if object.present?
|
||
|
|
@batch << { index: routing_info(id) }
|
||
|
|
@batch << document(object)
|
||
|
|
else
|
||
|
|
@batch << { delete: routing_info(id) }
|
||
|
|
end
|
||
|
|
end
|
||
|
|
@batch
|
||
|
|
end
|
||
|
|
|
||
|
|
def index_documents
|
||
|
|
return if batch.empty?
|
||
|
|
|
||
|
|
$elasticsearch.bulk(body: batch)
|
||
|
|
end
|
||
|
|
|
||
|
|
def index_document(object)
|
||
|
|
info = {
|
||
|
|
index: index_name,
|
||
|
|
id: document_id(object.id),
|
||
|
|
body: document(object)
|
||
|
|
}
|
||
|
|
if respond_to?(:parent_id)
|
||
|
|
info.merge!(routing: parent_id(object.id, object))
|
||
|
|
end
|
||
|
|
$elasticsearch.index(info)
|
||
|
|
end
|
||
|
|
|
||
|
|
def routing_info(id)
|
||
|
|
{
|
||
|
|
"_index" => index_name,
|
||
|
|
"_id" => id
|
||
|
|
}
|
||
|
|
end
|
||
|
|
|
||
|
|
def document(object)
|
||
|
|
object.as_json(root: false)
|
||
|
|
end
|
||
|
|
|
||
|
|
# can be overriden by our bookmarkable indexers
|
||
|
|
def document_id(id)
|
||
|
|
id
|
||
|
|
end
|
||
|
|
|
||
|
|
end
|