Recursive Directory Search with Ruby and Groovy

A while back I was bored and decided I need to brush up on my Ruby chops. I had been wanting to play with threads for quite some time and couldn’t think of anything that would be a fun project to do…until this crazy idea hit me. "Wouldn’t it be cool if could generate multiple threads to search different servers for any file of my choosing?" The code I wrote doesn’t directly do this, but with some minor tweaks it could be done.

I took that idea and ran with it using Ruby. After I finished coding, I thought I would try writing it from scratch using my second favorite language, Groovy (Ruby is my first). I have to admit, writing the Groovy code was more intuitive because of the baked in file/directory iterators. I refactored my Ruby code a few times and ended up using the find module to maximize performance. Below is the code, and as always, I am open to suggestions on other ways of doing it 😀

Ruby Code

#########################
#  Juan Vazquez
#  http://javazquez.com
#########################
require 'find'
class DirectoryWizard
  attr_accessor :root_dir,:exts, :thread_cnt, :thread_tracker, :count
  #initialize with a root , and file extensions
  def initialize(root, t_count,*extensions)
    @root_dir, @exts, @thread_cnt , @thread_tracker, @count = root, extensions, t_count, [], 0
  end

  def start_looking
    begin
      puts Dir.entries(@root_dir).select{|dir_item| is_in_ext(dir_item) }
      list_dirs(@root_dir).each do|di|
        @thread_tracker << Thread.new(@root_dir+di){|directory|
                                             recursive_file_search(directory) }
        wait_for_running_threads  if(@thread_tracker.size > @thread_cnt)
      end
      wait_for_running_threads
    rescue Exception => e; puts e;
    end
  end
  def recursive_file_search(directory)
    Find.find(directory){|dir_item|
      if(is_in_ext(dir_item))
       @count+=1
       puts dir_item
      end
    }
  end

    #return array of immediate subdirectories excluding . and ..
  def list_dirs(directory)
   Dir.entries(directory).select{|fh|(!is_p_c_directory?(fh) &&
                                          File.directory?(directory+fh))}
  end

  #return an array of all file/directories excluding '.' and '..'
  def list_contents(directory)
    Dir.entries(directory).delete_if{|x| is_p_c_directory?(x)}
  end

  #is Parent or Current Directory
  def is_p_c_directory?(filename);(filename =="." || filename == "..");end

#return an array of files that match ext
  def is_in_ext(dir_item); @exts.detect{|ext| dir_item.match(ext)}; end

 def wait_for_running_threads
    @thread_tracker.each{|th|th.join}
    @thread_tracker=[]
  end
end #end class


t = DirectoryWizard.new("\\\\server\\e$\\profiles\\",16,'filename')

t.start_looking

puts "Done with Program count is #{t.count}"

Groovy Code


import java.util.regex.*;
class DirWiz{
   def root_dir, exts, thread_max_cnt, thread_tracker, count

   public DirWiz(String basedir, int t_count, List extensions){
        this.root_dir = basedir
        this.exts = compile_regex(extensions)
        this.thread_max_cnt = t_count
        this.thread_tracker = []
        this.count=0
    }
    def start_looking(){
      try{
          def dir = new File(this.root_dir)
           check_for_files(this.root_dir)
          //recursively search directories
           dir.eachDir{ subDir->
            //thread it off
           if(this.thread_tracker.size() > this.thread_max_cnt){
               this.thread_tracker.each{it->it.join()}
               this.thread_tracker=[]
           }
           this.thread_tracker << Thread.start{
                 subDir.eachFileRecurse{ fh ->
                    check_using_compiled_regex(fh.canonicalPath)
                 }
           }
        }
      }catch(Exception e){
        println("error ${e}")
      }
      this.thread_tracker.each{it->it.join()}
      println("Done")
    }
   def print_if_match(String file){this.exts.each{ext->
                if(file=~ext){this.count+=1;println(file)}}
   }
   def check_using_compiled_regex(String file){
    try{
	def var = this.exts.find{it.matcher(file).matches()}
	if(var){this.count+=1;println(file)}
    }catch(Exception e){println("Not a Directory ${dir}\n$e")}
   }
   def check_for_files(String dir){
      try{ new File(dir).eachFile{ file ->
	        check_using_compiled_regex(file.canonicalPath)
        }
      }catch(Exception e){println("Not a Directory ${dir}\n$e")}
   }
   def compile_regex(List list){
    List ret_list=[]
    list.each{ ret_list <<    Pattern.compile(it,Pattern.CASE_INSENSITIVE)}
    return ret_list
   }
}


def t = new DirWiz('c:\\',16,[".*\\.jpg.*"])//look for jpegs
//def t = new DirWiz('\\\\server\\dir\\',16,["filname"])
t.start_looking()

println("Done with the program total number of files is ${t.count}")