lib/rouge/lexers/xpath.rb



# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
  module Lexers
    class XPath < RegexLexer
      title 'XPath'
      desc 'XML Path Language (XPath) 3.1'
      tag 'xpath'
      filenames '*.xpath'

      # Terminal literals:
      # https://www.w3.org/TR/xpath-31/#terminal-symbols
      def self.digits
        @digits ||= %r/[0-9]+/
      end

      def self.decimalLiteral
        @decimalLiteral ||= %r/\.#{digits}|#{digits}\.[0-9]*/
      end

      def self.doubleLiteral
        @doubleLiteral ||= %r/(\.#{digits})|#{digits}(\.[0-9]*)?[eE][+-]?#{digits}/
      end

      def self.stringLiteral
        @stringLiteral ||= %r/("(("")|[^"])*")|('(('')|[^'])*')/
      end

      def self.ncName
        @ncName ||= %r/[a-z_][a-z_\-.0-9]*/i
      end

      def self.qName
        @qName ||= %r/(?:#{ncName})(?::#{ncName})?/
      end

      def self.uriQName
        @uriQName ||= %r/Q\{[^{}]*\}#{ncName}/
      end

      def self.eqName
        @eqName ||= %r/(?:#{uriQName}|#{qName})/
      end

      def self.commentStart
        @commentStart ||= %r/\(:/
      end

      def self.openParen
        @openParen ||= %r/\((?!:)/
      end

      # Terminal symbols:
      # https://www.w3.org/TR/xpath-30/#id-terminal-delimitation
      def self.kindTest
        @kindTest ||= Regexp.union %w(
          element attribute schema-element schema-attribute
          comment text node document-node namespace-node
        )
      end

      def self.kindTestForPI
        @kindTestForPI ||= Regexp.union %w(processing-instruction)
      end

      def self.axes
        @axes ||= Regexp.union %w(
          child descendant attribute self descendant-or-self
          following-sibling following namespace
          parent ancestor preceding-sibling preceding ancestor-or-self
        )
      end

      def self.operators
        @operators ||= Regexp.union %w(, => = := : >= >> > <= << < - * != + // / || |)
      end

      def self.keywords
        @keywords ||= Regexp.union %w(let for some every if then else return in satisfies)
      end

      def self.word_operators
        @word_operators ||= Regexp.union %w(
          and or eq ge gt le lt ne is
          div mod idiv
          intersect except union
          to
        )
      end

      def self.constructorTypes
        @constructorTypes ||= Regexp.union %w(function array map empty-sequence)
      end

      # Mixin states:

      state :commentsAndWhitespace do
        rule XPath.commentStart, Comment, :comment
        rule %r/\s+/m, Text::Whitespace
      end

      # Lexical states:
      # https://www.w3.org/TR/xquery-xpath-parsing/#XPath-lexical-states
      # https://lists.w3.org/Archives/Public/public-qt-comments/2004Aug/0127.html
      # https://www.w3.org/TR/xpath-30/#id-revision-log
      # https://www.w3.org/TR/xpath-31/#id-revision-log

      state :root do
        mixin :commentsAndWhitespace

        # Literals
        rule XPath.doubleLiteral, Num::Float
        rule XPath.decimalLiteral, Num::Float
        rule XPath.digits, Num
        rule XPath.stringLiteral, Literal::String

        # Variables
        rule %r/\$/, Name::Variable, :varname

        # Operators
        rule XPath.operators, Operator
        rule %r/#{XPath.word_operators}\b/, Operator::Word
        rule %r/#{XPath.keywords}\b/, Keyword
        rule %r/[?,{}()\[\]]/, Punctuation

        # Functions
        rule %r/(function)(\s*)(#{XPath.openParen})/ do # function declaration
          groups Keyword, Text::Whitespace, Punctuation
        end
        rule %r/(map|array|empty-sequence)/, Keyword # constructors
        rule %r/(#{XPath.kindTest})(\s*)(#{XPath.openParen})/ do  # kindtest
          groups Keyword, Text::Whitespace, Punctuation
          push :kindtest
        end
        rule %r/(#{XPath.kindTestForPI})(\s*)(#{XPath.openParen})/ do # processing instruction kindtest
          groups Keyword, Text::Whitespace, Punctuation
          push :kindtestforpi
        end
        rule %r/(#{XPath.eqName})(\s*)(#{XPath.openParen})/ do # function call
          groups Name::Function, Text::Whitespace, Punctuation
        end
        rule %r/(#{XPath.eqName})(\s*)(#)(\s*)(\d+)/ do # namedFunctionRef
          groups Name::Function, Text::Whitespace, Name::Function, Text::Whitespace, Name::Function
        end

        # Type commands
        rule %r/(cast|castable)(\s+)(as)/ do
          groups Keyword, Text::Whitespace, Keyword
          push :singletype
        end
        rule %r/(treat)(\s+)(as)/ do
          groups Keyword, Text::Whitespace, Keyword
          push :itemtype
        end
        rule %r/(instance)(\s+)(of)/ do
          groups Keyword, Text::Whitespace, Keyword
          push :itemtype
        end
        rule %r/(as)\b/ do
          token Keyword
          push :itemtype
        end

        # Paths
        rule %r/(#{XPath.ncName})(\s*)(:)(\s*)(\*)/ do
          groups Name::Tag, Text::Whitespace, Punctuation, Text::Whitespace, Operator
        end
        rule %r/(\*)(\s*)(:)(\s*)(#{XPath.ncName})/ do
          groups Operator, Text::Whitespace, Punctuation, Text::Whitespace, Name::Tag
        end
        rule %r/(#{XPath.axes})(\s*)(::)/ do
          groups Keyword, Text::Whitespace, Operator
        end
        rule %r/\.\.|\.|\*/, Operator
        rule %r/@/, Name::Attribute, :attrname
        rule XPath.eqName, Name::Tag
      end

      state :singletype do
        mixin :commentsAndWhitespace

        # Type name
        rule XPath.eqName do
          token Keyword::Type
          pop!
        end
      end

      state :itemtype do
        mixin :commentsAndWhitespace

        # Type tests
        rule %r/(#{XPath.kindTest})(\s*)(#{XPath.openParen})/ do
          groups Keyword::Type, Text::Whitespace, Punctuation
          # go to kindtest then occurrenceindicator
          goto :occurrenceindicator
          push :kindtest
        end
        rule %r/(#{XPath.kindTestForPI})(\s*)(#{XPath.openParen})/ do
          groups Keyword::Type, Text::Whitespace, Punctuation
          # go to kindtestforpi then occurrenceindicator
          goto :occurrenceindicator
          push :kindtestforpi
        end
        rule %r/(item)(\s*)(#{XPath.openParen})(\s*)(\))/ do
          groups Keyword::Type, Text::Whitespace, Punctuation, Text::Whitespace, Punctuation
          goto :occurrenceindicator
        end
        rule %r/(#{XPath.constructorTypes})(\s*)(#{XPath.openParen})/ do
          groups Keyword::Type, Text::Whitespace, Punctuation
        end

        # Type commands
        rule %r/(cast|castable)(\s+)(as)/ do
          groups Keyword, Text::Whitespace, Keyword
          goto :singletype
        end
        rule %r/(treat)(\s+)(as)/ do
          groups Keyword, Text::Whitespace, Keyword
          goto :itemtype
        end
        rule %r/(instance)(\s+)(of)/ do
          groups Keyword, Text::Whitespace, Keyword
          goto :itemtype
        end
        rule %r/(as)\b/, Keyword

        # Operators
        rule XPath.operators do
          token Operator
          pop!
        end
        rule %r/#{XPath.word_operators}\b/ do
          token Operator::Word
          pop!
        end
        rule %r/#{XPath.keywords}\b/ do
          token Keyword
          pop!
        end
        rule %r/[\[),]/ do
          token Punctuation
          pop!
        end

        # Other types (e.g. xs:double)
        rule XPath.eqName do
          token Keyword::Type
          goto :occurrenceindicator
        end
      end

      # For pseudo-parameters for the KindTest productions
      state :kindtest do
        mixin :commentsAndWhitespace

        # Pseudo-parameters:
        rule %r/[?*]/, Operator
        rule %r/,/, Punctuation
        rule %r/(element|schema-element)(\s*)(#{XPath.openParen})/ do
          groups Keyword::Type, Text::Whitespace, Punctuation
          push :kindtest
        end
        rule XPath.eqName, Name::Tag

        # End of pseudo-parameters
        rule %r/\)/, Punctuation, :pop!
      end

      # Similar to :kindtest, but recognizes NCNames instead of EQNames
      state :kindtestforpi do
        mixin :commentsAndWhitespace

        # Pseudo-parameters
        rule XPath.ncName, Name
        rule XPath.stringLiteral, Literal::String

        # End of pseudo-parameters
        rule %r/\)/, Punctuation, :pop!
      end

      state :occurrenceindicator do
        mixin :commentsAndWhitespace

        # Occurrence indicator
        rule %r/[?*+]/ do
          token Operator
          pop!
        end

        # Otherwise, lex it in root state:
        rule %r/(?![?*+])/ do
          pop!
        end
      end

      state :varname do
        mixin :commentsAndWhitespace

        # Function call
        rule %r/(#{XPath.eqName})(\s*)(#{XPath.openParen})/ do
          groups Name::Variable, Text::Whitespace, Punctuation
          pop!
        end

        # Variable name
        rule XPath.eqName, Name::Variable, :pop!
      end

      state :attrname do
        mixin :commentsAndWhitespace

        # Attribute name
        rule XPath.eqName, Name::Attribute, :pop!
        rule %r/\*/, Operator, :pop!
      end

      state :comment do
        # Comment end
        rule %r/:\)/, Comment, :pop!

        # Nested comment
        rule XPath.commentStart, Comment, :comment

        # Comment contents
        rule %r/[^:(]+/m, Comment
        rule %r/[:(]/, Comment
      end
    end
  end
end