324 lines
13 KiB
Ruby
324 lines
13 KiB
Ruby
|
|
module AutocompleteSource
|
||
|
|
AUTOCOMPLETE_DELIMITER = ": ".freeze
|
||
|
|
AUTOCOMPLETE_COMPLETION_KEY = "completion".freeze
|
||
|
|
AUTOCOMPLETE_SCORE_KEY = "score".freeze
|
||
|
|
AUTOCOMPLETE_CACHE_KEY = "cache".freeze
|
||
|
|
AUTOCOMPLETE_RANGE_LENGTH = 50 # not random
|
||
|
|
AUTOCOMPLETE_BOOST = 1000 # amt by which we boost results that have all the words
|
||
|
|
|
||
|
|
# this marks a completed word in the completion set -- we use double commas because
|
||
|
|
# commas are not allowed in pseud and tag names, and double-commas have been disallowed
|
||
|
|
# from collection titles
|
||
|
|
AUTOCOMPLETE_WORD_TERMINATOR = ",,".freeze
|
||
|
|
|
||
|
|
def transliterate(input)
|
||
|
|
input = input.to_s.mb_chars.unicode_normalize(:nfkd).gsub(/[\u0300-\u036F]/, "")
|
||
|
|
result = ""
|
||
|
|
input.each_char do |char|
|
||
|
|
tl = ActiveSupport::Inflector.transliterate(char)
|
||
|
|
# If transliterate returns "?", the original character is either unsupported
|
||
|
|
# (e.g. a non-Latin character) or was actually a question mark.
|
||
|
|
# In both cases, we should keep the original.
|
||
|
|
result << if tl == "?"
|
||
|
|
char
|
||
|
|
else
|
||
|
|
tl
|
||
|
|
end
|
||
|
|
end
|
||
|
|
result
|
||
|
|
end
|
||
|
|
|
||
|
|
# override to define any autocomplete prefix spaces where this object should live
|
||
|
|
def autocomplete_prefixes
|
||
|
|
[self.transliterate("autocomplete_#{self.class.name.downcase}")]
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_search_string
|
||
|
|
self.transliterate(name)
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_search_string_was
|
||
|
|
self.transliterate(name_was)
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_search_string_before_last_save
|
||
|
|
self.transliterate(name_before_last_save)
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_value
|
||
|
|
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title}" : "")
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_value_was
|
||
|
|
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name_was}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title_was}" : "")
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_value_before_last_save
|
||
|
|
"#{id}#{AUTOCOMPLETE_DELIMITER}#{name_before_last_save}" + (self.respond_to?(:title) ? "#{AUTOCOMPLETE_DELIMITER}#{title_before_last_save}" : "")
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_score
|
||
|
|
0
|
||
|
|
end
|
||
|
|
|
||
|
|
def add_to_autocomplete(score = nil)
|
||
|
|
score = autocomplete_score unless score
|
||
|
|
self.class.autocomplete_pieces(autocomplete_search_string).each do |word_piece|
|
||
|
|
# each prefix represents an autocompletion space -- eg, "autocomplete_collection_all"
|
||
|
|
autocomplete_prefixes.each do |prefix|
|
||
|
|
# We put each prefix and the word + completion token into the set of all completions,
|
||
|
|
# with score 0 so they just get sorted lexicographically --
|
||
|
|
# this will be used to quickly find all possible completions in this space
|
||
|
|
REDIS_AUTOCOMPLETE.zadd(self.transliterate(self.class.autocomplete_completion_key(prefix)), 0, word_piece)
|
||
|
|
|
||
|
|
# We put each complete search string into a separate set indexed by word with specified score
|
||
|
|
if self.class.is_complete_word?(word_piece)
|
||
|
|
REDIS_AUTOCOMPLETE.zadd(self.transliterate(self.class.autocomplete_score_key(prefix, word_piece)), score, autocomplete_value)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def remove_from_autocomplete
|
||
|
|
self.class.remove_from_autocomplete(self.autocomplete_search_string, self.autocomplete_prefixes, self.autocomplete_value)
|
||
|
|
end
|
||
|
|
|
||
|
|
def remove_stale_from_autocomplete
|
||
|
|
self.class.remove_from_autocomplete(self.autocomplete_search_string_before_last_save, self.autocomplete_prefixes, self.autocomplete_value_before_last_save)
|
||
|
|
end
|
||
|
|
|
||
|
|
module ClassMethods
|
||
|
|
include AutocompleteSource
|
||
|
|
|
||
|
|
# returns a properly escaped and case-insensitive regexp for a more manual search
|
||
|
|
def get_search_regex(search_param)
|
||
|
|
Regexp.new(Regexp.escape(search_param), Regexp::IGNORECASE)
|
||
|
|
end
|
||
|
|
|
||
|
|
# takes either an array or string of search terms (typically extra values passed in through live params, like fandom)
|
||
|
|
# and returns an array of stripped and lowercase words for actual searching or use in keys
|
||
|
|
def get_search_terms(search_term)
|
||
|
|
terms = if search_term.is_a?(Array)
|
||
|
|
search_term.map { |term| term.split(",") }.flatten
|
||
|
|
else
|
||
|
|
search_term.blank? ? [] : search_term.split(",")
|
||
|
|
end
|
||
|
|
terms.map { |term| self.transliterate(term.strip.downcase) }
|
||
|
|
end
|
||
|
|
|
||
|
|
def parse_autocomplete_value(current_autocomplete_value)
|
||
|
|
current_autocomplete_value.split(AUTOCOMPLETE_DELIMITER, 3)
|
||
|
|
end
|
||
|
|
|
||
|
|
def fullname_from_autocomplete(current_autocomplete_value)
|
||
|
|
current_autocomplete_value.split(AUTOCOMPLETE_DELIMITER, 2)[1]
|
||
|
|
end
|
||
|
|
|
||
|
|
def id_from_autocomplete(current_autocomplete_value)
|
||
|
|
parse_autocomplete_value(current_autocomplete_value)[0]
|
||
|
|
end
|
||
|
|
|
||
|
|
def name_from_autocomplete(current_autocomplete_value)
|
||
|
|
parse_autocomplete_value(current_autocomplete_value)[1]
|
||
|
|
end
|
||
|
|
|
||
|
|
def title_from_autocomplete(current_autocomplete_value)
|
||
|
|
parse_autocomplete_value(current_autocomplete_value)[2]
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_lookup(options = {})
|
||
|
|
options.reverse_merge!({search_param: "", autocomplete_prefix: "", sort: "down"})
|
||
|
|
search_param = options[:search_param]
|
||
|
|
autocomplete_prefix = options[:autocomplete_prefix]
|
||
|
|
if REDIS_AUTOCOMPLETE.exists(autocomplete_cache_key(autocomplete_prefix, search_param))
|
||
|
|
return REDIS_AUTOCOMPLETE.zrange(autocomplete_cache_key(autocomplete_prefix, search_param), 0, -1)
|
||
|
|
end
|
||
|
|
|
||
|
|
# we assume that if the user is typing in a phrase, any words they have
|
||
|
|
# entered are the exact word they want, so we only get the prefixes for
|
||
|
|
# the very last word they have entered so far
|
||
|
|
search_pieces = autocomplete_phrase_split(search_param).map { |w| w + AUTOCOMPLETE_WORD_TERMINATOR }
|
||
|
|
|
||
|
|
# for each complete word, we look up the phrases in that word's set
|
||
|
|
# along with their scores and add up the scores
|
||
|
|
scored_results = {}
|
||
|
|
count = {}
|
||
|
|
exact = {}
|
||
|
|
search_regex = Regexp.new(Regexp.escape(search_param), Regexp::IGNORECASE)
|
||
|
|
|
||
|
|
search_pieces.each_with_index do |search_piece, index|
|
||
|
|
lastpiece = false
|
||
|
|
if index == search_pieces.size - 1
|
||
|
|
lastpiece = true
|
||
|
|
search_piece.gsub!(/#{Tag::AUTOCOMPLETE_WORD_TERMINATOR}$/, '')
|
||
|
|
|
||
|
|
break if search_pieces.size > 1 && search_piece.length == 1
|
||
|
|
end
|
||
|
|
|
||
|
|
# Get all the complete words which could match this search term
|
||
|
|
completions = autocomplete_word_completions(search_piece, autocomplete_prefix)
|
||
|
|
|
||
|
|
completions.each do |word|
|
||
|
|
# O(logN + M) where M is number of items returned -- we could speed up even more by putting in a limit
|
||
|
|
phrases_with_scores = []
|
||
|
|
if lastpiece && search_piece.length < 3
|
||
|
|
# use a limit
|
||
|
|
phrases_with_scores = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(autocomplete_prefix, word),
|
||
|
|
'inf', 0, withscores: true, limit: [0, 50])
|
||
|
|
else
|
||
|
|
phrases_with_scores = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(autocomplete_prefix, word),
|
||
|
|
'inf', 0, withscores: true)
|
||
|
|
end
|
||
|
|
|
||
|
|
phrases_with_scores.each do |phrase, score|
|
||
|
|
score = score.to_i
|
||
|
|
if options[:constraint_sets]
|
||
|
|
# phrases must be in these sets or else no go
|
||
|
|
# O(logN) complexity
|
||
|
|
next unless options[:constraint_sets].all {|set| REDIS_AUTOCOMPLETE.zrank(set, phrase)}
|
||
|
|
end
|
||
|
|
|
||
|
|
if count[phrase]
|
||
|
|
# if we've already seen this phrase, increase the score
|
||
|
|
scored_results[phrase] += score
|
||
|
|
count[phrase] += 1
|
||
|
|
else
|
||
|
|
# initialize the score and check if it exactly matches our regexp
|
||
|
|
scored_results[phrase] = score
|
||
|
|
if lastpiece
|
||
|
|
# don't count if it only matches the last search piece
|
||
|
|
count[phrase] = 0
|
||
|
|
else
|
||
|
|
count[phrase] = 1
|
||
|
|
end
|
||
|
|
if phrase.match(search_regex)
|
||
|
|
exact[phrase] = true
|
||
|
|
else
|
||
|
|
exact[phrase] = false
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
# final sort is O(NlogN) but N is only the number of complete phrase results which should be relatively small
|
||
|
|
results = scored_results.keys.sort do |k1, k2|
|
||
|
|
exact[k1] && !exact[k2] ? -1 : (exact[k2] && !exact[k1] ? 1 :
|
||
|
|
count[k1] > count[k2] ? -1 : (count[k2] > count[k1] ? 1 :
|
||
|
|
scored_results[options[:sort] == "down" ? k2 : k1].to_i <=> scored_results[options[:sort] == "down" ? k1 : k2].to_i))
|
||
|
|
end
|
||
|
|
limit = options[:limit] || 15
|
||
|
|
|
||
|
|
if search_param.length <= 2
|
||
|
|
# cache the result for really quick response when only 1-2 letters entered
|
||
|
|
# adds only a little bit to memory and saves doing a lot of processing of many phrases
|
||
|
|
results[0..limit].each_with_index {|res, index| REDIS_AUTOCOMPLETE.zadd(autocomplete_cache_key(autocomplete_prefix, search_param), index, res)}
|
||
|
|
# expire every 24 hours so new entries get added if appropriate
|
||
|
|
REDIS_AUTOCOMPLETE.expire(autocomplete_cache_key(autocomplete_prefix, search_param), 24*60*60)
|
||
|
|
end
|
||
|
|
results[0..limit]
|
||
|
|
end
|
||
|
|
|
||
|
|
def is_complete_word?(word_piece)
|
||
|
|
word_piece.match(/#{AUTOCOMPLETE_WORD_TERMINATOR}$/)
|
||
|
|
end
|
||
|
|
|
||
|
|
def get_word(word_piece)
|
||
|
|
word_piece.gsub(/#{AUTOCOMPLETE_WORD_TERMINATOR}$/, '')
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_score_key(autocomplete_prefix, word)
|
||
|
|
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_SCORE_KEY + "_" + get_word(word))
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_completion_key(autocomplete_prefix)
|
||
|
|
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_COMPLETION_KEY)
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_cache_key(autocomplete_prefix, search_param)
|
||
|
|
self.transliterate(autocomplete_prefix + "_" + AUTOCOMPLETE_CACHE_KEY + "_" + search_param)
|
||
|
|
end
|
||
|
|
|
||
|
|
# Split a string into words.
|
||
|
|
def autocomplete_phrase_split(string)
|
||
|
|
# Use the ActiveSupport::Multibyte::Chars class to handle downcasing
|
||
|
|
# instead of the basic string class, because it can handle downcasing
|
||
|
|
# letters with accents or other diacritics.
|
||
|
|
normalized = self.transliterate(string).downcase.to_s
|
||
|
|
|
||
|
|
# Split on one or more spaces, ampersands, slashes, double quotation marks,
|
||
|
|
# opening parentheses, closing parentheses (just in case), tildes, hyphens, vertical bars
|
||
|
|
normalized.split(%r{(?:\s|&|/|"|\(|\)|~|-|\|)+})
|
||
|
|
end
|
||
|
|
|
||
|
|
def autocomplete_pieces(string)
|
||
|
|
# prefixes for autocomplete
|
||
|
|
prefixes = []
|
||
|
|
|
||
|
|
words = autocomplete_phrase_split(string)
|
||
|
|
|
||
|
|
words.each do |word|
|
||
|
|
prefixes << self.transliterate(word) + AUTOCOMPLETE_WORD_TERMINATOR
|
||
|
|
word.length.downto(1).each do |last_index|
|
||
|
|
prefixes << word.slice(0, last_index)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
prefixes
|
||
|
|
end
|
||
|
|
|
||
|
|
# overall time complexity: O(log N)
|
||
|
|
def autocomplete_word_completions(word_piece, autocomplete_prefix)
|
||
|
|
get_exact = is_complete_word?(word_piece)
|
||
|
|
|
||
|
|
# the rank of the word piece tells us where to start looking
|
||
|
|
# in the completion set for possible completions
|
||
|
|
# O(logN) N = number of things in the completion set (ie all the possible prefixes for all the words)
|
||
|
|
start_position = REDIS_AUTOCOMPLETE.zrank(autocomplete_completion_key(autocomplete_prefix), word_piece)
|
||
|
|
return [] unless start_position
|
||
|
|
|
||
|
|
results = []
|
||
|
|
# start from that position and go for the specified range length
|
||
|
|
# O(logN + M) M is the range length, so reduces to logN
|
||
|
|
REDIS_AUTOCOMPLETE.zrange(autocomplete_completion_key(autocomplete_prefix), start_position, start_position + AUTOCOMPLETE_RANGE_LENGTH - 1).each do |entry|
|
||
|
|
minlen = [entry.length, word_piece.length].min
|
||
|
|
# if the entry stops matching the prefix then we've passed out of
|
||
|
|
# the completions that could belong to this word -- return
|
||
|
|
return results if entry.slice(0, minlen) != word_piece.slice(0, minlen)
|
||
|
|
|
||
|
|
# otherwise if we've hit a complete word add it to the results
|
||
|
|
if is_complete_word?(entry)
|
||
|
|
results << entry
|
||
|
|
return results if get_exact
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
results
|
||
|
|
end
|
||
|
|
|
||
|
|
# generic method to remove pieces for given search string and value from the given autocomplete prefixes
|
||
|
|
def remove_from_autocomplete(search_string, prefixes, value)
|
||
|
|
autocomplete_pieces(search_string).each do |word_piece|
|
||
|
|
prefixes.each do |prefix|
|
||
|
|
# we leave the word pieces in the completion set so we don't accidentally trash
|
||
|
|
# parts of other completions -- doing a weekly reload for cleanup is good enough
|
||
|
|
if is_complete_word?(word_piece)
|
||
|
|
word = get_word(word_piece)
|
||
|
|
phrases = REDIS_AUTOCOMPLETE.zrevrangebyscore(autocomplete_score_key(prefix, word), 'inf', 0)
|
||
|
|
if phrases.count == 1 && phrases.first == value
|
||
|
|
# there's only one phrase for this word and we're removing it, remove the completed word from the completion set
|
||
|
|
REDIS_AUTOCOMPLETE.zrem(autocomplete_completion_key(prefix), word_piece)
|
||
|
|
end
|
||
|
|
# remove the phrase we're deleting from the score set
|
||
|
|
REDIS_AUTOCOMPLETE.zrem(autocomplete_score_key(prefix, word_piece), value)
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
end
|
||
|
|
|
||
|
|
def self.included(base)
|
||
|
|
base.extend(ClassMethods)
|
||
|
|
end
|
||
|
|
end
|