#-- # ================================================================== # Author: Jamis Buck (jamis@jamisbuck.org) # Date: 2008-10-09 # # This file is in the public domain. Usage, modification, and # redistribution of this file are unrestricted. # ================================================================== #++ # The "fuzzy" file finder provides a way for searching a directory # tree with only a partial name. This is similar to the "cmd-T" # feature in TextMate (http://macromates.com). # # Usage: # # finder = FuzzyFileFinder.new # finder.search("app/blogcon") do |match| # puts match[:highlighted_path] # end # # In the above example, all files matching "app/blogcon" will be # yielded to the block. The given pattern is reduced to a regular # expression internally, so that any file that contains those # characters in that order (even if there are other characters # in between) will match. # # In other words, "app/blogcon" would match any of the following # (parenthesized strings indicate how the match was made): # # * (app)/controllers/(blog)_(con)troller.rb # * lib/c(ap)_(p)ool/(bl)ue_(o)r_(g)reen_(co)loratio(n) # * test/(app)/(blog)_(con)troller_test.rb # # And so forth. class FuzzyFileFinder module Version MAJOR = 1 MINOR = 0 TINY = 4 STRING = [MAJOR, MINOR, TINY].join(".") end # This is the exception that is raised if you try to scan a # directory tree with too many entries. By default, a ceiling of # 10,000 entries is enforced, but you can change that number via # the +ceiling+ parameter to FuzzyFileFinder.new. class TooManyEntries < RuntimeError; end # Used internally to represent a run of characters within a # match. This is used to build the highlighted version of # a file name. class CharacterRun < Struct.new(:string, :inside) #:nodoc: def to_s if inside "(#{string})" else string end end end # Used internally to represent a file within the directory tree. class FileSystemEntry #:nodoc: attr_reader :parent attr_reader :name def initialize(parent, name) @parent = parent @name = name end def path File.join(parent.name, name) end end # Used internally to represent a subdirectory within the directory # tree. class Directory #:nodoc: attr_reader :name def initialize(name, is_root=false) @name = name @is_root = is_root end def root? is_root end end # The roots directory trees to search. attr_reader :roots # The list of files beneath all +roots+ attr_reader :files # The maximum number of files beneath all +roots+ attr_reader :ceiling # The prefix shared by all +roots+. attr_reader :shared_prefix # The list of glob patterns to ignore. attr_reader :ignores # Initializes a new FuzzyFileFinder. This will scan the # given +directories+, using +ceiling+ as the maximum number # of entries to scan. If there are more than +ceiling+ entries # a TooManyEntries exception will be raised. def initialize(directories=['.'], ceiling=10_000, ignores=nil) directories = Array(directories) directories << "." if directories.empty? # expand any paths with ~ root_dirnames = directories.map { |d| File.expand_path(d) }.select { |d| File.directory?(d) }.uniq @roots = root_dirnames.map { |d| Directory.new(d, true) } @shared_prefix = determine_shared_prefix @shared_prefix_re = Regexp.new("^#{Regexp.escape(shared_prefix)}" + (shared_prefix.empty? ? "" : "/")) @files = [] @ceiling = ceiling @ignores = Array(ignores) rescan! end # Rescans the subtree. If the directory contents every change, # you'll need to call this to force the finder to be aware of # the changes. def rescan! @files.clear roots.each { |root| follow_tree(root) } end # Takes the given +pattern+ (which must be a string) and searches # all files beneath +root+, yielding each match. # # +pattern+ is interpreted thus: # # * "foo" : look for any file with the characters 'f', 'o', and 'o' # in its basename (discounting directory names). The characters # must be in that order. # * "foo/bar" : look for any file with the characters 'b', 'a', # and 'r' in its basename (discounting directory names). Also, # any successful match must also have at least one directory # element matching the characters 'f', 'o', and 'o' (in that # order. # * "foo/bar/baz" : same as "foo/bar", but matching two # directory elements in addition to a file name of "baz". # # Each yielded match will be a hash containing the following keys: # # * :path refers to the full path to the file # * :directory refers to the directory of the file # * :name refers to the name of the file (without directory) # * :highlighted_directory refers to the directory of the file with # matches highlighted in parentheses. # * :highlighted_name refers to the name of the file with matches # highlighted in parentheses # * :highlighted_path refers to the full path of the file with # matches highlighted in parentheses # * :abbr refers to an abbreviated form of :highlighted_path, where # path segments without matches are compressed to just their first # character. # * :score refers to a value between 0 and 1 indicating how closely # the file matches the given pattern. A score of 1 means the # pattern matches the file exactly. def search(pattern, &block) pattern.strip! path_parts = pattern.split("/") path_parts.push "" if pattern[-1,1] == "/" file_name_part = path_parts.pop || "" if path_parts.any? path_regex_raw = "^(.*?)" + path_parts.map { |part| make_pattern(part) }.join("(.*?/.*?)") + "(.*?)$" path_regex = Regexp.new(path_regex_raw, Regexp::IGNORECASE) end file_regex_raw = "^(.*?)" << make_pattern(file_name_part) << "(.*)$" file_regex = Regexp.new(file_regex_raw, Regexp::IGNORECASE) path_matches = {} files.each do |file| path_match = match_path(file.parent, path_matches, path_regex, path_parts.length) next if path_match[:missed] match_file(file, file_regex, path_match, &block) end end # Takes the given +pattern+ (which must be a string, formatted as # described in #search), and returns up to +max+ matches in an # Array. If +max+ is nil, all matches will be returned. def find(pattern, max=nil) results = [] search(pattern) do |match| results << match break if max && results.length >= max end return results end # Displays the finder object in a sane, non-explosive manner. def inspect #:nodoc: "#<%s:0x%x roots=%s, files=%d>" % [self.class.name, object_id, roots.map { |r| r.name.inspect }.join(", "), files.length] end private # Recursively scans +directory+ and all files and subdirectories # beneath it, depth-first. def follow_tree(directory) Dir.entries(directory.name).each do |entry| next if entry[0,1] == "." raise TooManyEntries if files.length > ceiling full = File.join(directory.name, entry) if File.directory?(full) follow_tree(Directory.new(full)) elsif !ignore?(full.sub(@shared_prefix_re, "")) files.push(FileSystemEntry.new(directory, entry)) end end end # Returns +true+ if the given name matches any of the ignore # patterns. def ignore?(name) ignores.any? { |pattern| File.fnmatch(pattern, name) } end # Takes the given pattern string "foo" and converts it to a new # string "(f)([^/]*?)(o)([^/]*?)(o)" that can be used to create # a regular expression. def make_pattern(pattern) pattern = pattern.split(//) pattern << "" if pattern.empty? pattern.inject("") do |regex, character| regex << "([^/]*?)" if regex.length > 0 regex << "(" << Regexp.escape(character) << ")" end end # Given a MatchData object +match+ and a number of "inside" # segments to support, compute both the match score and the # highlighted match string. The "inside segments" refers to how # many patterns were matched in this one match. For a file name, # this will always be one. For directories, it will be one for # each directory segment in the original pattern. def build_match_result(match, inside_segments) runs = [] inside_chars = total_chars = 0 match.captures.each_with_index do |capture, index| if capture.length > 0 # odd-numbered captures are matches inside the pattern. # even-numbered captures are matches between the pattern's elements. inside = index % 2 != 0 total_chars += capture.gsub(%r(/), "").length # ignore '/' delimiters inside_chars += capture.length if inside if runs.last && runs.last.inside == inside runs.last.string << capture else runs << CharacterRun.new(capture, inside) end end end # Determine the score of this match. # 1. fewer "inside runs" (runs corresponding to the original pattern) # is better. # 2. better coverage of the actual path name is better inside_runs = runs.select { |r| r.inside } run_ratio = inside_runs.length.zero? ? 1 : inside_segments / inside_runs.length.to_f char_ratio = total_chars.zero? ? 1 : inside_chars.to_f / total_chars score = run_ratio * char_ratio return { :score => score, :result => runs.join } end # Match the given path against the regex, caching the result in +path_matches+. # If +path+ is already cached in the path_matches cache, just return the cached # value. def match_path(path, path_matches, path_regex, path_segments) return path_matches[path] if path_matches.key?(path) name_with_slash = path.name + "/" # add a trailing slash for matching the prefix matchable_name = name_with_slash.sub(@shared_prefix_re, "") matchable_name.chop! # kill the trailing slash if path_regex match = matchable_name.match(path_regex) path_matches[path] = match && build_match_result(match, path_segments) || { :score => 1, :result => matchable_name, :missed => true } else path_matches[path] = { :score => 1, :result => matchable_name } end end # Match +file+ against +file_regex+. If it matches, yield the match # metadata to the block. def match_file(file, file_regex, path_match, &block) if file_match = file.name.match(file_regex) match_result = build_match_result(file_match, 1) full_match_result = path_match[:result].empty? ? match_result[:result] : File.join(path_match[:result], match_result[:result]) shortened_path = path_match[:result].gsub(/[^\/]+/) { |m| m.index("(") ? m : m[0,1] } abbr = shortened_path.empty? ? match_result[:result] : File.join(shortened_path, match_result[:result]) result = { :path => file.path, :abbr => abbr, :directory => file.parent.name, :name => file.name, :highlighted_directory => path_match[:result], :highlighted_name => match_result[:result], :highlighted_path => full_match_result, :score => path_match[:score] * match_result[:score] } yield result end end def determine_shared_prefix # the common case: if there is only a single root, then the entire # name of the root is the shared prefix. return roots.first.name if roots.length == 1 split_roots = roots.map { |root| root.name.split(%r{/}) } segments = split_roots.map { |root| root.length }.max master = split_roots.pop segments.times do |segment| if !split_roots.all? { |root| root[segment] == master[segment] } return master[0,segment].join("/") end end # shouldn't ever get here, since we uniq the root list before # calling this method, but if we do, somehow... return roots.first.name end end