module CodeRay
# = FileType
#
# A simple filetype recognizer.
#
# == Usage
#
# # determine the type of the given
# lang = FileType[file_name]
#
# # return :text if the file type is unknown
# lang = FileType.fetch file_name, :text
#
# # try the shebang line, too
# lang = FileType.fetch file_name, :text, true
module FileType
UnknownFileType = Class.new Exception
class << self
# Try to determine the file type of the file.
#
# +filename+ is a relative or absolute path to a file.
#
# The file itself is only accessed when +read_shebang+ is set to true.
# That means you can get filetypes from files that don't exist.
def [] filename, read_shebang = false
name = File.basename filename
ext = File.extname(name).sub(/^\./, '') # from last dot, delete the leading dot
ext2 = filename.to_s[/\.(.*)/, 1] # from first dot
type =
TypeFromExt[ext] ||
TypeFromExt[ext.downcase] ||
(TypeFromExt[ext2] if ext2) ||
(TypeFromExt[ext2.downcase] if ext2) ||
TypeFromName[name] ||
TypeFromName[name.downcase]
type ||= type_from_shebang(filename) if read_shebang
type
end
# This works like Hash#fetch.
#
# If the filetype cannot be found, the +default+ value
# is returned.
def fetch filename, default = nil, read_shebang = false
if default && block_given?
warn 'Block supersedes default value argument; use either.'
end
if type = self[filename, read_shebang]
type
else
return yield if block_given?
return default if default
raise UnknownFileType, 'Could not determine type of %p.' % filename
end
end
protected
def type_from_shebang filename
return unless File.exist? filename
File.open filename, 'r' do |f|
if first_line = f.gets
if type = first_line[TypeFromShebang]
type.to_sym
end
end
end
end
end
TypeFromExt = {
'c' => :c,
'cfc' => :xml,
'cfm' => :xml,
'clj' => :clojure,
'css' => :css,
'diff' => :diff,
'dpr' => :delphi,
'erb' => :erb,
'gemspec' => :ruby,
'go' => :go,
'groovy' => :groovy,
'gvy' => :groovy,
'h' => :c,
'haml' => :haml,
'htm' => :html,
'html' => :html,
'html.erb' => :erb,
'java' => :java,
'js' => :java_script,
'json' => :json,
'lua' => :lua,
'mab' => :ruby,
'pas' => :delphi,
'patch' => :diff,
'phtml' => :php,
'php' => :php,
'php3' => :php,
'php4' => :php,
'php5' => :php,
'prawn' => :ruby,
'py' => :python,
'py3' => :python,
'pyw' => :python,
'rake' => :ruby,
'raydebug' => :raydebug,
'rb' => :ruby,
'rbw' => :ruby,
'rhtml' => :erb,
'rjs' => :ruby,
'rpdf' => :ruby,
'ru' => :ruby, # config.ru
'rxml' => :ruby,
'sass' => :sass,
'sql' => :sql,
'taskpaper' => :taskpaper,
'template' => :json, # AWS CloudFormation template
'tmproj' => :xml,
'xaml' => :xml,
'xhtml' => :html,
'xml' => :xml,
'yaml' => :yaml,
'yml' => :yaml,
}
for cpp_alias in %w[cc cpp cp cxx c++ C hh hpp h++ cu]
TypeFromExt[cpp_alias] = :cpp
end
TypeFromShebang = /\b(?:ruby|perl|python|sh)\b/
TypeFromName = {
'Capfile' => :ruby,
'Rakefile' => :ruby,
'Rantfile' => :ruby,
'Gemfile' => :ruby,
'Guardfile' => :ruby,
'Vagrantfile' => :ruby,
'Appraisals' => :ruby
}
end
end