module Linguist::BlobHelper
def _mime_type
Internal: Lookup mime type for filename.
def _mime_type if defined? @_mime_type @_mime_type else @_mime_type = MiniMime.lookup_by_filename(name.to_s) end end
def binary?
Public: Is the blob binary?
def binary? # Large blobs aren't even loaded into memory if data.nil? true # Treat blank files as text elsif data == "" false # Charlock doesn't know what to think elsif encoding.nil? true # If Charlock says its binary else detect_encoding[:type] == :binary end end
def binary_mime_type?
Internal: Is the blob binary according to its mime type
def binary_mime_type? _mime_type ? _mime_type.binary? : false end
def content_type
# => 'application/octet-stream'
# => 'text/plain; charset=utf-8'
Examples
This value is used when serving raw blobs.
Public: Get the Content-Type header value
def content_type @content_type ||= (binary_mime_type? || binary?) ? mime_type : (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain") end
def csv?
Public: Is this blob a CSV file?
def csv? text? && extname.downcase == '.csv' end
def detect_encoding
this will return nil if an error occurred during detection or
Returns: a Hash, with :encoding, :confidence, :type
Try to guess the encoding
def detect_encoding @detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data end
def disposition
# => "inline"
# => "attachment; filename=file.tar"
This value is used when serving raw blobs.
Public: Get the Content-Disposition header value
def disposition if text? || image? 'inline' elsif name.nil? "attachment" else "attachment; filename=#{CGI.escape(name)}" end end
def documentation?
this pattern.
See "documentation.yml" for a list of documentation conventions that match
Documentation files are ignored by language statistics.
Public: Is the blob in a documentation directory?
def documentation? path =~ DocumentationRegexp ? true : false end
def empty?
Public: Is the blob empty?
def empty? data.nil? || data == "" end
def encoded_newlines_re
def encoded_newlines_re @encoded_newlines_re ||= Regexp.union(["\r\n", "\r", "\n"]. map { |nl| nl.encode(ruby_encoding, "ASCII-8BIT").force_encoding(data.encoding) }) end
def encoding
def encoding if hash = detect_encoding hash[:encoding] end end
def extname
# => '.rb'
blob(name='foo.rb').extname
Examples
Public: Get the extname of the path
def extname File.extname(name.to_s) end
def first_lines(n)
def first_lines(n) return lines[0...n] if defined? @lines return [] unless viewable? && data i, c = 0, 0 while c < n && j = data.index(encoded_newlines_re, i) i = j + $&.length c += 1 end data[0...i].split(encoded_newlines_re, -1) end
def generated?
May load Blob#data
language statistics.
Generated source code is suppressed in diffs and is ignored by
Public: Is the blob a generated file?
def generated? @_generated ||= Generated.generated?(path, lambda { data }) end
def high_ratio_of_long_lines?
Internal: Does the blob have a ratio of long lines?
def high_ratio_of_long_lines? return false if loc == 0 size / loc > 5000 end
def image?
Public: Is the blob a supported image format?
def image? ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase) end
def include_in_language_stats?
def include_in_language_stats? !vendored? && !documentation? && !generated? && language && ( defined?(detectable?) && !detectable?.nil? ? detectable? : DETECTABLE_TYPES.include?(language.type) ) end
def language
May load Blob#data
Public: Detects the Language of the blob.
def language @language ||= Linguist.detect(self) end
def large?
Public: Is the blob too big to load?
def large? size.to_i > MEGABYTE end
def last_lines(n)
def last_lines(n) if defined? @lines if n >= @lines.length @lines else lines[-n..-1] end end return [] unless viewable? && data no_eol = true i, c = data.length, 0 k = i while c < n && j = data.rindex(encoded_newlines_re, i - 1) if c == 0 && j + $&.length == i no_eol = false n += 1 end i = j k = j + $&.length c += 1 end r = data[k..-1].split(encoded_newlines_re, -1) r.pop if !no_eol r end
def likely_binary?
database.
overriding it if we have better data from the languages.yml
Internal: Is the blob binary according to its mime type,
def likely_binary? binary_mime_type? && !Language.find_by_filename(name) end
def lines
Requires Blob#data
Public: Get each line of data
def lines @lines ||= if viewable? && data # `data` is usually encoded as ASCII-8BIT even when the content has # been detected as a different encoding. However, we are not allowed # to change the encoding of `data` because we've made the implicit # guarantee that each entry in `lines` is encoded the same way as # `data`. # # Instead, we re-encode each possible newline sequence as the # detected encoding, then force them back to the encoding of `data` # (usually a binary encoding like ASCII-8BIT). This means that the # byte sequence will match how newlines are likely encoded in the # file, but we don't have to change the encoding of `data` as far as # Ruby is concerned. This allows us to correctly parse out each line # without changing the encoding of `data`, and # also--importantly--without having to duplicate many (potentially # large) strings. begin # `data` is split after having its last `\n` removed by # chomp (if any). This prevents the creation of an empty # element after the final `\n` character on POSIX files. data.chomp.split(encoded_newlines_re, -1) rescue Encoding::ConverterNotFoundError # The data is not splittable in the detected encoding. Assume it's # one big line. [data] end else [] end end
def loc
Requires Blob#data
Public: Get number of lines of code
def loc lines.size end
def mime_type
# => 'text/html'
# => 'text/plain'
Examples
Public: Get the actual blob mime type
def mime_type _mime_type ? _mime_type.content_type : 'text/plain' end
def pdf?
Public: Is the blob a PDF?
def pdf? extname.downcase == '.pdf' end
def ruby_encoding
def ruby_encoding if hash = detect_encoding hash[:ruby_encoding] end end
def safe_to_colorize?
Public: Is the blob safe to colorize?
def safe_to_colorize? !large? && text? && !high_ratio_of_long_lines? end
def sloc
Requires Blob#data
Public: Get number of source lines of code
def sloc lines.grep(/\S/).size end
def solid?
Public: Is the blob a supported 3D model format?
def solid? extname.downcase == '.stl' end
def text?
Public: Is the blob text?
def text? !binary? end
def tm_scope
def tm_scope language && language.tm_scope end
def vendored?
this pattern.
See "vendor.yml" for a list of vendored conventions that match
Vendored files are ignored by language statistics.
Public: Is the blob in a vendored directory?
def vendored? path =~ VendoredRegexp ? true : false end
def viewable?
Non-viewable blobs will just show a "View Raw" link
Public: Is the blob viewable?
def viewable? !large? && text? end