lib/coderay/helpers/file_type.rb



module CodeRay
  
  # = FileType
  #
  # A simple filetype recognizer.
  #
  # == Usage
  #
  #  # determine the type of the given
  #  lang = FileType[file_name]
  #  
  #  # return :text if the file type is unknown
  #  lang = FileType.fetch file_name, :text
  #  
  #  # try the shebang line, too
  #  lang = FileType.fetch file_name, :text, true
  module FileType
    
    UnknownFileType = Class.new Exception
    
    class << self
      
      # Try to determine the file type of the file.
      #
      # +filename+ is a relative or absolute path to a file.
      #
      # The file itself is only accessed when +read_shebang+ is set to true.
      # That means you can get filetypes from files that don't exist.
      def [] filename, read_shebang = false
        name = File.basename filename
        ext = File.extname(name).sub(/^\./, '')  # from last dot, delete the leading dot
        ext2 = filename.to_s[/\.(.*)/, 1]  # from first dot
        
        type =
          TypeFromExt[ext] ||
          TypeFromExt[ext.downcase] ||
          (TypeFromExt[ext2] if ext2) ||
          (TypeFromExt[ext2.downcase] if ext2) ||
          TypeFromName[name] ||
          TypeFromName[name.downcase]
        type ||= shebang(filename) if read_shebang
        
        type
      end
      
      # This works like Hash#fetch.
      #
      # If the filetype cannot be found, the +default+ value
      # is returned.
      def fetch filename, default = nil, read_shebang = false
        if default && block_given?
          warn 'Block supersedes default value argument; use either.'
        end
        
        if type = self[filename, read_shebang]
          type
        else
          return yield if block_given?
          return default if default
          raise UnknownFileType, 'Could not determine type of %p.' % filename
        end
      end
      
    protected
      
      def shebang filename
        return unless File.exist? filename
        File.open filename, 'r' do |f|
          if first_line = f.gets
            if type = first_line[TypeFromShebang]
              type.to_sym
            end
          end
        end
      end
      
    end
    
    TypeFromExt = {
      'c'        => :c,
      'cfc'      => :xml,
      'cfm'      => :xml,
      'clj'      => :clojure,
      'css'      => :css,
      'diff'     => :diff,
      'dpr'      => :delphi,
      'erb'      => :erb,
      'gemspec'  => :ruby,
      'groovy'   => :groovy,
      'gvy'      => :groovy,
      'h'        => :c,
      'haml'     => :haml,
      'htm'      => :page,
      'html'     => :page,
      'html.erb' => :erb,
      'java'     => :java,
      'js'       => :java_script,
      'json'     => :json,
      'mab'      => :ruby,
      'pas'      => :delphi,
      'patch'    => :diff,
      'php'      => :php,
      'php3'     => :php,
      'php4'     => :php,
      'php5'     => :php,
      'prawn'    => :ruby,
      'py'       => :python,
      'py3'      => :python,
      'pyw'      => :python,
      'rake'     => :ruby,
      'raydebug' => :raydebug,
      'rb'       => :ruby,
      'rbw'      => :ruby,
      'rhtml'    => :erb,
      'rjs'      => :ruby,
      'rpdf'     => :ruby,
      'ru'       => :ruby,
      'rxml'     => :ruby,
      # 'sch'      => :scheme,
      'sql'      => :sql,
      # 'ss'       => :scheme,
      'xhtml'    => :page,
      'xml'      => :xml,
      'yaml'     => :yaml,
      'yml'      => :yaml,
    }
    for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
      TypeFromExt[cpp_alias] = :cpp
    end
    
    TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
    
    TypeFromName = {
      'Capfile'  => :ruby,
      'Rakefile' => :ruby,
      'Rantfile' => :ruby,
      'Gemfile'  => :ruby,
    }
    
  end
  
end