otwarchive-symphonyarchive/app/models/search/query.rb
2026-03-11 22:22:11 +00:00

272 lines
6.3 KiB
Ruby

class Query
attr_reader :options
# Options: page, per_page
def initialize(options={})
@options = HashWithIndifferentAccess.new(options)
end
def search
begin
$elasticsearch.search(
index: index_name,
body: generated_query,
track_total_hits: true
)
rescue Elastic::Transport::Transport::Errors::BadRequest
{ error: "Your search failed because of a syntax error. Please try again." }
end
end
def search_results
response = search
QueryResult.new(klass, response, { page: page, per_page: per_page })
end
# Perform a count query based on the given options
def count
$elasticsearch.count(
index: index_name,
body: { query: generated_query[:query] }
)['count']
end
# Retrieve a randomly sampled selection of results:
def sample(count: 5)
response = $elasticsearch.search(
index: index_name,
body: {
query: {
function_score: {
query: filtered_query,
random_score: {},
boost_mode: "replace"
}
},
size: count
}
)
QueryResult.new(klass, response, { page: 1, per_page: count })
end
# Perform a specific aggregation:
def aggregation_search(aggregation)
$elasticsearch.search(
index: index_name,
body: {
query: filtered_query,
size: 0, # aggregations only
aggs: { aggregation: aggregation }
}
).dig("aggregations", "aggregation")
end
# Use a composite aggregation to get all values that a particular field can
# take on. Returns a hash mapping from values to counts.
def field_values(field_name, batch_size: 100)
aggregation = {
composite: {
size: batch_size,
sources: [{ field_value: { terms: { field: field_name } } }]
}
}
counts = {}
loop do
results = aggregation_search(aggregation)
results["buckets"].each do |info|
counts[info.dig("key", "field_value")] = info.dig("doc_count")
end
after_key = results["after_key"]
return counts if after_key.nil?
aggregation[:composite][:after] = after_key
end
end
# Return (an approximation of) the number of distinct values that a
# particular field can take on:
def field_count(field_name, precision_threshold: 1000)
aggregation_search(
cardinality: {
field: field_name,
precision_threshold: precision_threshold
}
).dig("value")
end
# Sort by relevance by default, override in subclasses as necessary
def sort
{ _score: { order: "desc" } }
end
# Search query with filters
def generated_query
q = {
query: filtered_query,
size: per_page,
from: pagination_offset,
sort: sort
}
if (aggs = aggregations).present?
q.merge!(aggs)
end
q
end
# Combine the filters and queries, with a fallback in case there are no
# filters or queries:
def filtered_query
make_bool(
must: queries, # required, score calculated
filter: filters, # required, score ignored
must_not: exclusion_filters # disallowed, score ignored
) || { match_all: {} }
end
# Define specifics in subclasses
def filters
@filters
end
def term_filter(field, value, options={})
{ term: options.merge(field => value) }
end
def terms_filter(field, value, options={})
{ terms: options.merge(field => value) }
end
def exists_filter(field)
{ exists: { field: field } }
end
# A filter used to match all words in a particular field, most frequently
# used for matching non-existent tags. The match query doesn't allow
# negation/or/and/wildcards, so it should only be used on fields where the
# users are expected to enter, e.g. canonical tags.
def match_filter(field, value, options = {})
{ match: { field => { query: value, operator: "and" }.merge(options) } }
end
# Replaces the existing scores for a query with the value of a field. The
# optional value "missing" determines what score value should be used if the
# specified field is missing from a document.
def field_value_score(field, query, missing: 0)
{
function_score: {
query: query,
field_value_factor: {
field: field,
missing: missing
},
boost_mode: :replace
}
}
end
def bool_value(str)
%w(true 1 T).include?(str.to_s)
end
def exclusion_filters
@exclusion_filters
end
def queries
end
def aggregations
end
def index_name
end
def document_type
end
def per_page
options[:per_page] || ArchiveConfig.ITEMS_PER_PAGE
end
# Example: if the limit is 3 results, and we're displaying 2 per page,
# disallow pages beyond page 2.
def page
[
options[:page] || 1,
(ArchiveConfig.MAX_SEARCH_RESULTS / per_page.to_f).ceil
].min
end
def pagination_offset
(page * per_page) - per_page
end
# Only escape if it isn't already escaped
def escape_slashes(word)
word.gsub(/([^\\])\//) { |s| $1 + '\\/' }
end
def escape_reserved_characters(word)
word = escape_slashes(word)
word.gsub!('!', '\\!')
word.gsub!('+', '\\\\+')
word.gsub!('-', '\\-')
word.gsub!('?', '\\?')
word.gsub!("~", '\\~')
word.gsub!("(", '\\(')
word.gsub!(")", '\\)')
word.gsub!("[", '\\[')
word.gsub!("]", '\\]')
word.gsub!(':', '\\:')
word
end
def split_query_text_phrases(fieldname, text)
str = ""
return str if text.blank?
text.split(",").map(&:squish).each do |phrase|
str << " #{fieldname}:\"#{phrase}\""
end
str
end
def split_query_text_words(fieldname, text)
str = ""
return str if text.blank?
text.split(" ").each do |word|
if word.length >= 2 && word[0] == "-"
str << " NOT"
word.slice!(0)
end
word = escape_reserved_characters(word)
str << " #{fieldname}:#{word}"
end
str
end
def make_bool(query)
query.reject! { |_, value| value.blank? }
query[:minimum_should_match] = 1 if query[:should].present?
if query.empty?
nil
elsif query.values.flatten.size == 1 && (query[:must] || query[:should])
# There's only one clause in our boolean, so we might as well skip the
# bool and just require it.
query.values.flatten.first
else
{ bool: query }
end
end
def make_list(*args)
args.flatten.compact
end
end