class Query attr_reader :options # Options: page, per_page def initialize(options={}) @options = HashWithIndifferentAccess.new(options) end def search begin $elasticsearch.search( index: index_name, body: generated_query, track_total_hits: true ) rescue Elastic::Transport::Transport::Errors::BadRequest { error: "Your search failed because of a syntax error. Please try again." } end end def search_results response = search QueryResult.new(klass, response, { page: page, per_page: per_page }) end # Perform a count query based on the given options def count $elasticsearch.count( index: index_name, body: { query: generated_query[:query] } )['count'] end # Retrieve a randomly sampled selection of results: def sample(count: 5) response = $elasticsearch.search( index: index_name, body: { query: { function_score: { query: filtered_query, random_score: {}, boost_mode: "replace" } }, size: count } ) QueryResult.new(klass, response, { page: 1, per_page: count }) end # Perform a specific aggregation: def aggregation_search(aggregation) $elasticsearch.search( index: index_name, body: { query: filtered_query, size: 0, # aggregations only aggs: { aggregation: aggregation } } ).dig("aggregations", "aggregation") end # Use a composite aggregation to get all values that a particular field can # take on. Returns a hash mapping from values to counts. def field_values(field_name, batch_size: 100) aggregation = { composite: { size: batch_size, sources: [{ field_value: { terms: { field: field_name } } }] } } counts = {} loop do results = aggregation_search(aggregation) results["buckets"].each do |info| counts[info.dig("key", "field_value")] = info.dig("doc_count") end after_key = results["after_key"] return counts if after_key.nil? aggregation[:composite][:after] = after_key end end # Return (an approximation of) the number of distinct values that a # particular field can take on: def field_count(field_name, precision_threshold: 1000) aggregation_search( cardinality: { field: field_name, precision_threshold: precision_threshold } ).dig("value") end # Sort by relevance by default, override in subclasses as necessary def sort { _score: { order: "desc" } } end # Search query with filters def generated_query q = { query: filtered_query, size: per_page, from: pagination_offset, sort: sort } if (aggs = aggregations).present? q.merge!(aggs) end q end # Combine the filters and queries, with a fallback in case there are no # filters or queries: def filtered_query make_bool( must: queries, # required, score calculated filter: filters, # required, score ignored must_not: exclusion_filters # disallowed, score ignored ) || { match_all: {} } end # Define specifics in subclasses def filters @filters end def term_filter(field, value, options={}) { term: options.merge(field => value) } end def terms_filter(field, value, options={}) { terms: options.merge(field => value) } end def exists_filter(field) { exists: { field: field } } end # A filter used to match all words in a particular field, most frequently # used for matching non-existent tags. The match query doesn't allow # negation/or/and/wildcards, so it should only be used on fields where the # users are expected to enter, e.g. canonical tags. def match_filter(field, value, options = {}) { match: { field => { query: value, operator: "and" }.merge(options) } } end # Replaces the existing scores for a query with the value of a field. The # optional value "missing" determines what score value should be used if the # specified field is missing from a document. def field_value_score(field, query, missing: 0) { function_score: { query: query, field_value_factor: { field: field, missing: missing }, boost_mode: :replace } } end def bool_value(str) %w(true 1 T).include?(str.to_s) end def exclusion_filters @exclusion_filters end def queries end def aggregations end def index_name end def document_type end def per_page options[:per_page] || ArchiveConfig.ITEMS_PER_PAGE end # Example: if the limit is 3 results, and we're displaying 2 per page, # disallow pages beyond page 2. def page [ options[:page] || 1, (ArchiveConfig.MAX_SEARCH_RESULTS / per_page.to_f).ceil ].min end def pagination_offset (page * per_page) - per_page end # Only escape if it isn't already escaped def escape_slashes(word) word.gsub(/([^\\])\//) { |s| $1 + '\\/' } end def escape_reserved_characters(word) word = escape_slashes(word) word.gsub!('!', '\\!') word.gsub!('+', '\\\\+') word.gsub!('-', '\\-') word.gsub!('?', '\\?') word.gsub!("~", '\\~') word.gsub!("(", '\\(') word.gsub!(")", '\\)') word.gsub!("[", '\\[') word.gsub!("]", '\\]') word.gsub!(':', '\\:') word end def split_query_text_phrases(fieldname, text) str = "" return str if text.blank? text.split(",").map(&:squish).each do |phrase| str << " #{fieldname}:\"#{phrase}\"" end str end def split_query_text_words(fieldname, text) str = "" return str if text.blank? text.split(" ").each do |word| if word.length >= 2 && word[0] == "-" str << " NOT" word.slice!(0) end word = escape_reserved_characters(word) str << " #{fieldname}:#{word}" end str end def make_bool(query) query.reject! { |_, value| value.blank? } query[:minimum_should_match] = 1 if query[:should].present? if query.empty? nil elsif query.values.flatten.size == 1 && (query[:must] || query[:should]) # There's only one clause in our boolean, so we might as well skip the # bool and just require it. query.values.flatten.first else { bool: query } end end def make_list(*args) args.flatten.compact end end