lib/coderay/helpers/file_type.rb



module CodeRay
  
  # = FileType
  #
  # A simple filetype recognizer.
  #
  # == Usage
  #
  #  # determine the type of the given
  #  lang = FileType[file_name]
  #  
  #  # return :text if the file type is unknown
  #  lang = FileType.fetch file_name, :text
  #  
  #  # try the shebang line, too
  #  lang = FileType.fetch file_name, :text, true
  module FileType
    
    UnknownFileType = Class.new Exception
    
    class << self
      
      # Try to determine the file type of the file.
      #
      # +filename+ is a relative or absolute path to a file.
      #
      # The file itself is only accessed when +read_shebang+ is set to true.
      # That means you can get filetypes from files that don't exist.
      def [] filename, read_shebang = false
        name = File.basename filename
        ext = File.extname(name).sub(/^\./, '')  # from last dot, delete the leading dot
        ext2 = filename.to_s[/\.(.*)/, 1]  # from first dot
        
        type =
          TypeFromExt[ext] ||
          TypeFromExt[ext.downcase] ||
          (TypeFromExt[ext2] if ext2) ||
          (TypeFromExt[ext2.downcase] if ext2) ||
          TypeFromName[name] ||
          TypeFromName[name.downcase]
        type ||= shebang(filename) if read_shebang
        
        type
      end
      
      # This works like Hash#fetch.
      #
      # If the filetype cannot be found, the +default+ value
      # is returned.
      def fetch filename, default = nil, read_shebang = false
        if default && block_given?
          warn 'Block supersedes default value argument; use either.'
        end
        
        if type = self[filename, read_shebang]
          type
        else
          return yield if block_given?
          return default if default
          raise UnknownFileType, 'Could not determine type of %p.' % filename
        end
      end
      
    protected
      
      def shebang filename
        return unless File.exist? filename
        File.open filename, 'r' do |f|
          if first_line = f.gets
            if type = first_line[TypeFromShebang]
              type.to_sym
            end
          end
        end
      end
      
    end
    
    TypeFromExt = {
      'c'         => :c,
      'cfc'       => :xml,
      'cfm'       => :xml,
      'clj'       => :clojure,
      'css'       => :css,
      'diff'      => :diff,
      'dpr'       => :delphi,
      'erb'       => :erb,
      'gemspec'   => :ruby,
      'groovy'    => :groovy,
      'gvy'       => :groovy,
      'h'         => :c,
      'haml'      => :haml,
      'htm'       => :html,
      'html'      => :html,
      'html.erb'  => :erb,
      'java'      => :java,
      'js'        => :java_script,
      'json'      => :json,
      'lua'       => :lua,
      'mab'       => :ruby,
      'pas'       => :delphi,
      'patch'     => :diff,
      'phtml'     => :php,
      'php'       => :php,
      'php3'      => :php,
      'php4'      => :php,
      'php5'      => :php,
      'prawn'     => :ruby,
      'py'        => :python,
      'py3'       => :python,
      'pyw'       => :python,
      'rake'      => :ruby,
      'raydebug'  => :raydebug,
      'rb'        => :ruby,
      'rbw'       => :ruby,
      'rhtml'     => :erb,
      'rjs'       => :ruby,
      'rpdf'      => :ruby,
      'ru'        => :ruby,  # config.ru
      'rxml'      => :ruby,
      'sass'      => :sass,
      'sql'       => :sql,
      'taskpaper' => :taskpaper,
      'template'  => :json,  # AWS CloudFormation template
      'tmproj'    => :xml,
      'xaml'      => :xml,
      'xhtml'     => :html,
      'xml'       => :xml,
      'yaml'      => :yaml,
      'yml'       => :yaml,
    }
    for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
      TypeFromExt[cpp_alias] = :cpp
    end
    
    TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
    
    TypeFromName = {
      'Capfile'  => :ruby,
      'Rakefile' => :ruby,
      'Rantfile' => :ruby,
      'Gemfile'  => :ruby,
      'Guardfile' => :ruby,
      'Vagrantfile' => :ruby,
      'Appraisals' => :ruby
    }
    
  end
  
end