lib/rouge/guessers/disambiguation.rb



# frozen_string_literal: true

module Rouge
  module Guessers
    class Disambiguation < Guesser
      include Util
      include Lexers

      def initialize(filename, source)
        @filename = File.basename(filename)
        @source = source
      end

      def filter(lexers)
        return lexers if lexers.size == 1
        return lexers if lexers.size == Lexer.all.size

        @analyzer = TextAnalyzer.new(get_source(@source))

        self.class.disambiguators.each do |disambiguator|
          next unless disambiguator.match?(@filename)

          filtered = disambiguator.decide!(self)
          return filtered if filtered
        end

        return lexers
      end

      def contains?(text)
        return @analyzer.include?(text)
      end

      def matches?(re)
        return !!(@analyzer =~ re)
      end

      @disambiguators = []
      def self.disambiguate(*patterns, &decider)
        @disambiguators << Disambiguator.new(patterns, &decider)
      end

      def self.disambiguators
        @disambiguators
      end

      class Disambiguator
        include Util

        def initialize(patterns, &decider)
          @patterns = patterns
          @decider = decider
        end

        def decide!(guesser)
          out = guesser.instance_eval(&@decider)
          case out
          when Array then out
          when nil then nil
          else [out]
          end
        end

        def match?(filename)
          @patterns.any? { |p| test_glob(p, filename) }
        end
      end

      disambiguate '*.pl' do
        next Perl if contains?('my $')
        next Prolog if contains?(':-')
        next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
      end

      disambiguate '*.h' do
        next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
        next ObjectiveC if contains?('@"')
        next Cpp if matches?(/^\s*(?:catch|class|constexpr|namespace|private|
                                   protected|public|template|throw|try|using)\b/x)

        C
      end

      disambiguate '*.m' do
        next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
        next ObjectiveC if contains?('@"')

        next Mathematica if contains?('(*')
        next Mathematica if contains?(':=')

        next Mason if matches?(/<%(def|method|text|doc|args|flags|attr|init|once|shared|perl|cleanup|filter)([^>]*)(>)/)

        next Matlab if matches?(/^\s*?%/)

        next Mason if matches? %r!(</?%|<&)!
      end

      disambiguate '*.php' do
        # PHP always takes precedence over Hack
        PHP
      end

      disambiguate '*.hh' do
        next Cpp if matches?(/^\s*#include/)
        next Hack if matches?(/^<\?hh/)
        next Hack if matches?(/(\(|, ?)\$\$/)

        Cpp
      end

      disambiguate '*.plist' do
        next XML if matches?(/\A<\?xml\b/)

        Plist
      end

      disambiguate '*.sc' do
        next Python if matches?(/^#/)
        next SuperCollider if matches?(/(?:^~|;$)/)

        next Python
      end

      disambiguate 'Messages' do
        next MsgTrans if matches?(/^[^\s:]+:[^\s:]+/)

        next PlainText
      end

      disambiguate '*.cls' do
        next TeX if matches?(/\A\s*(?:\\|%)/)
        next OpenEdge if matches?(/(no\-undo|BLOCK\-LEVEL|ROUTINE\-LEVEL|&ANALYZE\-SUSPEND)/i)
        next Apex
      end

      disambiguate '*.pp' do
        next Puppet if matches?(/(::)?([a-z]\w*::)/)
        next Pascal if matches?(/^(function|begin|var)\b/)
        next Pascal if matches?(/\b(end(;|\.))/)

        Puppet
      end
      
      disambiguate '*.p' do
        next Prolog if contains?(':-')
        next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
        next OpenEdge
      end
    end
  end
end