module Linguist::BlobHelper

def _mime_type

Returns a MIME::Type

Internal: Lookup mime type for filename.
def _mime_type
  if defined? @_mime_type
    @_mime_type
  else
    @_mime_type = MiniMime.lookup_by_filename(name.to_s)
  end
end

def binary?

Return true or false

Public: Is the blob binary?
def binary?
  # Large blobs aren't even loaded into memory
  if data.nil?
    true
  # Treat blank files as text
  elsif data == ""
    false
  # Charlock doesn't know what to think
  elsif encoding.nil?
    true
  # If Charlock says its binary
  else
    detect_encoding[:type] == :binary
  end
end

def binary_mime_type?

Return true or false

Internal: Is the blob binary according to its mime type
def binary_mime_type?
  _mime_type ? _mime_type.binary? : false
end

def content_type

Returns a content type String.

# => 'application/octet-stream'
# => 'text/plain; charset=utf-8'

Examples

This value is used when serving raw blobs.

Public: Get the Content-Type header value
def content_type
  @content_type ||= (binary_mime_type? || binary?) ? mime_type :
    (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
end

def csv?

Return true or false

Public: Is this blob a CSV file?
def csv?
  text? && extname.downcase == '.csv'
end

def detect_encoding

no valid encoding could be found
this will return nil if an error occurred during detection or
Returns: a Hash, with :encoding, :confidence, :type

Try to guess the encoding
def detect_encoding
  @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
end

def disposition

Returns a content disposition String.

# => "inline"
# => "attachment; filename=file.tar"

This value is used when serving raw blobs.

Public: Get the Content-Disposition header value
def disposition
  if text? || image?
    'inline'
  elsif name.nil?
    "attachment"
  else
    "attachment; filename=#{CGI.escape(name)}"
  end
end

def documentation?

Return true or false

this pattern.
See "documentation.yml" for a list of documentation conventions that match

Documentation files are ignored by language statistics.

Public: Is the blob in a documentation directory?
def documentation?
  path =~ DocumentationRegexp ? true : false
end

def empty?

Return true or false

Public: Is the blob empty?
def empty?
  data.nil? || data == ""
end

def encoded_newlines_re

def encoded_newlines_re
  @encoded_newlines_re ||= Regexp.union(["\r\n", "\r", "\n"].
                                          map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) })
end

def encoding

def encoding
  if hash = detect_encoding
    hash[:encoding]
  end
end

def extname

Returns a String

# => '.rb'
blob(name='foo.rb').extname

Examples

Public: Get the extname of the path
def extname
  File.extname(name.to_s)
end

def first_lines(n)

def first_lines(n)
  return lines[0...n] if defined? @lines
  return [] unless viewable? && data
  i, c = 0, 0
  while c < n && j = data.index(encoded_newlines_re, i)
    i = j + $&.length
    c += 1
  end
  data[0...i].split(encoded_newlines_re, -1)
end

def generated?

Return true or false

May load Blob#data

language statistics.
Generated source code is suppressed in diffs and is ignored by

Public: Is the blob a generated file?
def generated?
  @_generated ||= Generated.generated?(path, lambda { data })
end

def high_ratio_of_long_lines?

Return true or false

Internal: Does the blob have a ratio of long lines?
def high_ratio_of_long_lines?
  return false if loc == 0
  size / loc > 5000
end

def image?

Return true or false

Public: Is the blob a supported image format?
def image?
  ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
end

def include_in_language_stats?

Internal: Should this blob be included in repository language statistics?
def include_in_language_stats?
  !vendored? &&
  !documentation? &&
  !generated? &&
  language && ( defined?(detectable?) && !detectable?.nil? ?
    detectable? :
    DETECTABLE_TYPES.include?(language.type)
  )
end

def language

Returns a Language or nil if none is detected

May load Blob#data

Public: Detects the Language of the blob.
def language
  @language ||= Linguist.detect(self)
end

def large?

Return true or false

Public: Is the blob too big to load?
def large?
  size.to_i > MEGABYTE
end

def last_lines(n)

def last_lines(n)
  if defined? @lines
    if n >= @lines.length
      @lines
    else
      lines[-n..-1]
    end
  end
  return [] unless viewable? && data
  no_eol = true
  i, c = data.length, 0
  k = i
  while c < n && j = data.rindex(encoded_newlines_re, i - 1)
    if c == 0 && j + $&.length == i
      no_eol = false
      n += 1
    end
    i = j
    k = j + $&.length
    c += 1
  end
  r = data[k..-1].split(encoded_newlines_re, -1)
  r.pop if !no_eol
  r
end

def likely_binary?

Return true or false

database.
overriding it if we have better data from the languages.yml
Internal: Is the blob binary according to its mime type,
def likely_binary?
  binary_mime_type? && !Language.find_by_filename(name)
end

def lines

Returns an Array of lines

Requires Blob#data

Public: Get each line of data
def lines
  @lines ||=
    if viewable? && data
      # `data` is usually encoded as ASCII-8BIT even when the content has
      # been detected as a different encoding. However, we are not allowed
      # to change the encoding of `data` because we've made the implicit
      # guarantee that each entry in `lines` is encoded the same way as
      # `data`.
      #
      # Instead, we re-encode each possible newline sequence as the
      # detected encoding, then force them back to the encoding of `data`
      # (usually a binary encoding like ASCII-8BIT). This means that the
      # byte sequence will match how newlines are likely encoded in the
      # file, but we don't have to change the encoding of `data` as far as
      # Ruby is concerned. This allows us to correctly parse out each line
      # without changing the encoding of `data`, and
      # also--importantly--without having to duplicate many (potentially
      # large) strings.
      begin
        # `data` is split after having its last `\n` removed by
        # chomp (if any). This prevents the creation of an empty
        # element after the final `\n` character on POSIX files.
        data.chomp.split(encoded_newlines_re, -1)
      rescue Encoding::ConverterNotFoundError
        # The data is not splittable in the detected encoding.  Assume it's
        # one big line.
        [data]
      end
    else
      []
    end
end

def loc

Returns Integer

Requires Blob#data

Public: Get number of lines of code
def loc
  lines.size
end

def mime_type

Returns a mime type String.

# => 'text/html'
# => 'text/plain'

Examples

Public: Get the actual blob mime type
def mime_type
  _mime_type ? _mime_type.content_type : 'text/plain'
end

def pdf?

Return true or false

Public: Is the blob a PDF?
def pdf?
  extname.downcase == '.pdf'
end

def ruby_encoding

def ruby_encoding
  if hash = detect_encoding
    hash[:ruby_encoding]
  end
end

def safe_to_colorize?

Return true or false

Public: Is the blob safe to colorize?
def safe_to_colorize?
  !large? && text? && !high_ratio_of_long_lines?
end

def sloc

Returns Integer

Requires Blob#data

Public: Get number of source lines of code
def sloc
  lines.grep(/\S/).size
end

def solid?

Return true or false

Public: Is the blob a supported 3D model format?
def solid?
  extname.downcase == '.stl'
end

def text?

Return true or false

Public: Is the blob text?
def text?
  !binary?
end

def tm_scope

Internal: Get the TextMate compatible scope for the blob
def tm_scope
  language && language.tm_scope
end

def vendored?

Return true or false

this pattern.
See "vendor.yml" for a list of vendored conventions that match

Vendored files are ignored by language statistics.

Public: Is the blob in a vendored directory?
def vendored?
  path =~ VendoredRegexp ? true : false
end

def viewable?

Return true or false

Non-viewable blobs will just show a "View Raw" link

Public: Is the blob viewable?
def viewable?
  !large? && text?
end