SpaceVim/bundle/telescope.nvim/lua/telescope/sorters.lua

local log = require "telescope.log"
local util = require "telescope.utils"

local sorters = {}

local ngram_highlighter = function(ngram_len, prompt, display)
  local highlights = {}
  display = display:lower()

  for disp_index = 1, #display do
    local char = display:sub(disp_index, disp_index + ngram_len - 1)
    if prompt:find(char, 1, true) then
      table.insert(highlights, {
        start = disp_index,
        finish = disp_index + ngram_len - 1,
      })
    end
  end

  return highlights
end

local FILTERED = -1

local Sorter = {}
Sorter.__index = Sorter

---@class Sorter
--- Sorter sorts a list of results by return a single integer for a line,
--- given a prompt
---
--- Lower number is better (because it's like a closer match)
--- But, any number below 0 means you want that line filtered out.
---@field scoring_function function: Function that has the interface: (sorter, prompt, line): number
---@field tags table: Unique tags collected at filtering for tag completion
---@field filter_function function: Function that can filter results
---@field highlighter function: Highlights results to display them pretty
---@field discard boolean: Whether this is a discardable style sorter or not.
---@field score function: Override the score function if desired.
---@field init function: Function to run when creating sorter
---@field start function: Function to run on every new prompt
---@field finish function: Function to run after every new prompt
---@field destroy function: Functo to run when destroying sorter
function Sorter:new(opts)
  opts = opts or {}

  return setmetatable({
    score = opts.score,
    state = {},
    tags = opts.tags,

    -- State management
    init = opts.init,
    start = opts.start,
    finish = opts.finish,
    destroy = opts.destroy,
    _status = nil,

    filter_function = opts.filter_function,
    scoring_function = opts.scoring_function,
    highlighter = opts.highlighter,
    discard = opts.discard,
    _discard_state = {
      filtered = {},
      prompt = "",
    },
  }, Sorter)
end

function Sorter:_init()
  self._status = "init"
  if self.init then
    self:init()
  end
end

function Sorter:_destroy()
  self._status = "destroy"
  if self.destroy then
    self:destroy()
  end
end

-- TODO: We could make this a bit smarter and cache results "as we go" and where they got filtered.
--          Then when we hit backspace, we don't have to re-caculate everything.
--          Prime did a lot of the hard work already, but I don't want to copy as much memory around
--              as he did in his example.
--              Example can be found in ./scratch/prime_prompt_cache.lua
function Sorter:_start(prompt)
  self._status = "start"
  if self.start then
    self:start(prompt)
  end

  if not self.discard then
    return
  end

  local previous = self._discard_state.prompt
  local len_previous = #previous

  if #prompt < len_previous then
    log.trace "Reset discard because shorter prompt"
    self._discard_state.filtered = {}
  elseif string.sub(prompt, 1, len_previous) ~= previous then
    log.trace "Reset discard no match"
    self._discard_state.filtered = {}
  end

  self._discard_state.prompt = prompt
end

function Sorter:_finish(prompt)
  self._status = "finish"
  if self.finish then
    self:finish(prompt)
  end
end

-- TODO: Consider doing something that makes it so we can skip the filter checks
--          if we're not discarding. Also, that means we don't have to check otherwise as well :)
function Sorter:score(prompt, entry, cb_add, cb_filter)
  if not entry or not entry.ordinal then
    return
  end

  if self._status and self._status ~= "start" then
    return
  end

  local ordinal = entry.ordinal
  if self:_was_discarded(prompt, ordinal) then
    return cb_filter(entry)
  end

  local filter_score
  if self.filter_function ~= nil then
    if self.tags then
      self.tags:insert(entry)
    end
    filter_score, prompt = self:filter_function(prompt, entry, cb_add, cb_filter)
  end

  if filter_score == FILTERED then
    return cb_filter(entry)
  end

  local score = self:scoring_function(prompt or "", ordinal, entry, cb_add, cb_filter)
  if score == FILTERED then
    self:_mark_discarded(prompt, ordinal)
    return cb_filter(entry)
  end

  if cb_add then
    return cb_add(score, entry)
  else
    return score
  end
end

function Sorter:_was_discarded(prompt, ordinal)
  return self.discard and self._discard_state.filtered[ordinal]
end

function Sorter:_mark_discarded(prompt, ordinal)
  if not self.discard then
    return
  end

  self._discard_state.filtered[ordinal] = true
end

function sorters.new(...)
  return Sorter:new(...)
end

sorters.Sorter = Sorter

local make_cached_tail = function()
  local os_sep = util.get_separator()
  local match_string = "[^" .. os_sep .. "]*$"
  return setmetatable({}, {
    __index = function(t, k)
      local tail = string.match(k, match_string)

      rawset(t, k, tail)
      return tail
    end,
  })
end

local make_cached_uppers = function()
  return setmetatable({}, {
    __index = function(t, k)
      local obj = {}
      for i = 1, #k do
        local s_byte = k:byte(i, i)
        if s_byte <= 90 and s_byte >= 65 then
          obj[s_byte] = true
        end
      end

      rawset(t, k, obj)
      return obj
    end,
  })
end

-- TODO: Match on upper case words
-- TODO: Match on last match
sorters.get_fuzzy_file = function(opts)
  opts = opts or {}

  local ngram_len = opts.ngram_len or 2

  local cached_ngrams = {}

  local function overlapping_ngrams(s, n)
    if cached_ngrams[s] and cached_ngrams[s][n] then
      return cached_ngrams[s][n]
    end

    local R = {}
    for i = 1, s:len() - n + 1 do
      R[#R + 1] = s:sub(i, i + n - 1)
    end

    if not cached_ngrams[s] then
      cached_ngrams[s] = {}
    end

    cached_ngrams[s][n] = R

    return R
  end

  local cached_tails = make_cached_tail()
  local cached_uppers = make_cached_uppers()

  return Sorter:new {
    scoring_function = function(_, prompt, line)
      local N = #prompt

      if N == 0 or N < ngram_len then
        -- TODO: If the character is in the line,
        -- then it should get a point or somethin.
        return 1
      end

      local prompt_lower = prompt:lower()
      local line_lower = line:lower()

      local prompt_lower_ngrams = overlapping_ngrams(prompt_lower, ngram_len)

      -- Contains the original string
      local contains_string = line_lower:find(prompt_lower, 1, true)

      local prompt_uppers = cached_uppers[prompt]
      local line_uppers = cached_uppers[line]

      local uppers_matching = 0
      for k, _ in pairs(prompt_uppers) do
        if line_uppers[k] then
          uppers_matching = uppers_matching + 1
        end
      end

      -- TODO: Consider case senstivity
      local tail = cached_tails[line_lower]
      local contains_tail = tail:find(prompt, 1, true)

      local consecutive_matches = 0
      local previous_match_index = 0
      local match_count = 0

      for i = 1, #prompt_lower_ngrams do
        local match_start = line_lower:find(prompt_lower_ngrams[i], 1, true)
        if match_start then
          match_count = match_count + 1
          if match_start > previous_match_index then
            consecutive_matches = consecutive_matches + 1
          end

          previous_match_index = match_start
        end
      end

      local tail_modifier = 1
      if contains_tail then
        tail_modifier = 2
      end

      local denominator = (
          (10 * match_count / #prompt_lower_ngrams)
          -- biases for shorter strings
          + 3 * match_count * ngram_len / #line
          + consecutive_matches
          + N / (contains_string or (2 * #line))
          -- + 30/(c1 or 2*N)
          -- TODO: It might be possible that this too strongly correlates,
          --          but it's unlikely for people to type capital letters without actually
          --          wanting to do something with a capital letter in it.
          + uppers_matching
        ) * tail_modifier

      if denominator == 0 or denominator ~= denominator then
        return -1
      end

      if #prompt > 2 and denominator < 0.5 then
        return -1
      end

      return 1 / denominator
    end,

    highlighter = opts.highlighter or function(_, prompt, display)
      return ngram_highlighter(ngram_len, prompt, display)
    end,
  }
end

sorters.get_generic_fuzzy_sorter = function(opts)
  opts = opts or {}

  local ngram_len = opts.ngram_len or 2

  local cached_ngrams = {}
  local function overlapping_ngrams(s, n)
    if cached_ngrams[s] and cached_ngrams[s][n] then
      return cached_ngrams[s][n]
    end

    local R = {}
    for i = 1, s:len() - n + 1 do
      R[#R + 1] = s:sub(i, i + n - 1)
    end

    if not cached_ngrams[s] then
      cached_ngrams[s] = {}
    end

    cached_ngrams[s][n] = R

    return R
  end

  return Sorter:new {
    -- self
    -- prompt (which is the text on the line)
    -- line (entry.ordinal)
    -- entry (the whole entry)
    scoring_function = function(_, prompt, line, _)
      if prompt == 0 or #prompt < ngram_len then
        return 1
      end

      local prompt_lower = prompt:lower()
      local line_lower = line:lower()

      local prompt_ngrams = overlapping_ngrams(prompt_lower, ngram_len)

      local N = #prompt

      local contains_string = line_lower:find(prompt_lower, 1, true)

      local consecutive_matches = 0
      local previous_match_index = 0
      local match_count = 0

      for i = 1, #prompt_ngrams do
        local match_start = line_lower:find(prompt_ngrams[i], 1, true)
        if match_start then
          match_count = match_count + 1
          if match_start > previous_match_index then
            consecutive_matches = consecutive_matches + 1
          end

          previous_match_index = match_start
        end
      end

      -- TODO: Copied from ashkan.
      local denominator = (
        (10 * match_count / #prompt_ngrams)
        -- biases for shorter strings
        -- TODO(ashkan): this can bias towards repeated finds of the same
        -- subpattern with overlapping_ngrams
        + 3 * match_count * ngram_len / #line
        + consecutive_matches
        + N / (contains_string or (2 * #line)) -- + 30/(c1 or 2*N)

      )

      if denominator == 0 or denominator ~= denominator then
        return -1
      end

      if #prompt > 2 and denominator < 0.5 then
        return -1
      end

      return 1 / denominator
    end,

    highlighter = opts.highlighter or function(_, prompt, display)
      return ngram_highlighter(ngram_len, prompt, display)
    end,
  }
end

sorters.fuzzy_with_index_bias = function(opts)
  opts = opts or {}
  opts.ngram_len = 2

  -- TODO: Probably could use a better sorter here.
  local fuzzy_sorter = sorters.get_generic_fuzzy_sorter(opts)

  return Sorter:new {
    scoring_function = function(_, prompt, line, entry, cb_add, cb_filter)
      local base_score = fuzzy_sorter:scoring_function(prompt, line, cb_add, cb_filter)

      if base_score == FILTERED then
        return FILTERED
      end

      if not base_score or base_score == 0 then
        return entry.index
      else
        return math.min(math.pow(entry.index, 0.25), 2) * base_score
      end
    end,
  }
end

-- Sorter using the fzy algorithm
sorters.get_fzy_sorter = function(opts)
  opts = opts or {}
  local fzy = opts.fzy_mod or require "telescope.algos.fzy"
  local OFFSET = -fzy.get_score_floor()

  return sorters.Sorter:new {
    discard = true,

    scoring_function = function(_, prompt, line)
      -- Check for actual matches before running the scoring alogrithm.
      if not fzy.has_match(prompt, line) then
        return -1
      end

      local fzy_score = fzy.score(prompt, line)

      -- The fzy score is -inf for empty queries and overlong strings.  Since
      -- this function converts all scores into the range (0, 1), we can
      -- convert these to 1 as a suitable "worst score" value.
      if fzy_score == fzy.get_score_min() then
        return 1
      end

      -- Poor non-empty matches can also have negative values. Offset the score
      -- so that all values are positive, then invert to match the
      -- telescope.Sorter "smaller is better" convention. Note that for exact
      -- matches, fzy returns +inf, which when inverted becomes 0.
      return 1 / (fzy_score + OFFSET)
    end,

    -- The fzy.positions function, which returns an array of string indices, is
    -- compatible with telescope's conventions. It's moderately wasteful to
    -- call call fzy.score(x,y) followed by fzy.positions(x,y): both call the
    -- fzy.compute function, which does all the work. But, this doesn't affect
    -- perceived performance.
    highlighter = function(_, prompt, display)
      return fzy.positions(prompt, display)
    end,
  }
end

-- TODO: Could probably do something nice where we check their conf
--          and choose their default for this.
--          But I think `fzy` is good default for now.
sorters.highlighter_only = function(opts)
  opts = opts or {}
  local fzy = opts.fzy_mod or require "telescope.algos.fzy"

  return Sorter:new {
    scoring_function = function()
      return 1
    end,

    highlighter = function(_, prompt, display)
      return fzy.positions(prompt, display)
    end,
  }
end

sorters.empty = function()
  return Sorter:new {
    scoring_function = function()
      return 1
    end,
  }
end

-- Bad & Dumb Sorter
sorters.get_levenshtein_sorter = function()
  return Sorter:new {
    scoring_function = function(_, prompt, line)
      return require "telescope.algos.string_distance"(prompt, line)
    end,
  }
end

local substr_highlighter = function(_, prompt, display)
  local highlights = {}
  display = display:lower()

  local search_terms = util.max_split(prompt, "%s")
  local hl_start, hl_end

  for _, word in pairs(search_terms) do
    hl_start, hl_end = display:find(word, 1, true)
    if hl_start then
      table.insert(highlights, { start = hl_start, finish = hl_end })
    end
  end

  return highlights
end

sorters.get_substr_matcher = function()
  return Sorter:new {
    highlighter = substr_highlighter,
    scoring_function = function(_, prompt, _, entry)
      if #prompt == 0 then
        return 1
      end

      local display = entry.ordinal:lower()

      local search_terms = util.max_split(prompt, "%s")
      local matched = 0
      local total_search_terms = 0
      for _, word in pairs(search_terms) do
        total_search_terms = total_search_terms + 1
        if display:find(word, 1, true) then
          matched = matched + 1
        end
      end

      return matched == total_search_terms and entry.index or -1
    end,
  }
end

local substr_matcher = function(_, prompt, line, _)
  local display = line:lower()
  local search_terms = util.max_split(prompt:lower(), "%s")
  local matched = 0
  local total_search_terms = 0
  for _, word in pairs(search_terms) do
    total_search_terms = total_search_terms + 1
    if display:find(word, 1, true) then
      matched = matched + 1
    end
  end

  return matched == total_search_terms and 0 or FILTERED
end

local filter_function = function(opts)
  local scoring_function = vim.F.if_nil(opts.filter_function, substr_matcher)
  local tag = vim.F.if_nil(opts.tag, "ordinal")

  return function(_, prompt, entry)
    local filter = "^(" .. opts.delimiter .. "(%S+)" .. "[" .. opts.delimiter .. "%s]" .. ")"
    local matched = prompt:match(filter)

    if matched == nil then
      return 0, prompt
    end
    -- clear prompt of tag
    prompt = prompt:sub(#matched + 1, -1)
    local query = vim.trim(matched:gsub(opts.delimiter, ""))
    return scoring_function(_, query, entry[tag], _), prompt
  end
end

local function create_tag_set(tag)
  tag = vim.F.if_nil(tag, "ordinal")
  local set = {}
  return setmetatable(set, {
    __index = {
      insert = function(set_, entry)
        local value = entry[tag]
        if not set_[value] then
          set_[value] = true
        end
      end,
    },
  })
end

sorters.prefilter = function(opts)
  local sorter = opts.sorter
  opts.delimiter = util.get_default(opts.delimiter, ":")
  sorter._delimiter = opts.delimiter
  sorter.tags = create_tag_set(opts.tag)
  sorter.filter_function = filter_function(opts)
  sorter._was_discarded = function()
    return false
  end
  return sorter
end

return sorters