lib/rouge/lexers/hql.rb



# -*- coding: utf-8 -*- #

module Rouge
  module Lexers
    load_lexer 'sql.rb'

    class HQL < SQL
      title "HQL"
      desc "Hive Query Language SQL dialect"
      tag 'hql'
      filenames '*.hql'

      def self.keywords
        # sources:
        # https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL
        # https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF
        @keywords ||= Set.new(%w(
          ADD ADMIN AFTER ANALYZE ARCHIVE ASC BEFORE BUCKET BUCKETS CASCADE
          CHANGE CLUSTER CLUSTERED CLUSTERSTATUS COLLECTION COLUMNS COMMENT
          COMPACT COMPACTIONS COMPUTE CONCATENATE CONTINUE DATA DATABASES
          DATETIME DAY DBPROPERTIES DEFERRED DEFINED DELIMITED DEPENDENCY DESC
          DIRECTORIES DIRECTORY DISABLE DISTRIBUTE ELEM_TYPE ENABLE ESCAPED
          EXCLUSIVE EXPLAIN EXPORT FIELDS FILE FILEFORMAT FIRST FORMAT FORMATTED
          FUNCTIONS HOLD_DDLTIME HOUR IDXPROPERTIES IGNORE INDEX INDEXES INPATH
          INPUTDRIVER INPUTFORMAT ITEMS JAR KEYS KEY_TYPE LIMIT LINES LOAD
          LOCATION LOCK LOCKS LOGICAL LONG MAPJOIN MATERIALIZED METADATA MINUS
          MINUTE MONTH MSCK NOSCAN NO_DROP OFFLINE OPTION OUTPUTDRIVER
          OUTPUTFORMAT OVERWRITE OWNER PARTITIONED PARTITIONS PLUS PRETTY
          PRINCIPALS PROTECTION PURGE READ READONLY REBUILD RECORDREADER
          RECORDWRITER REGEXP RELOAD RENAME REPAIR REPLACE REPLICATION RESTRICT
          REWRITE RLIKE ROLE ROLES SCHEMA SCHEMAS SECOND SEMI SERDE
          SERDEPROPERTIES SERVER SETS SHARED SHOW SHOW_DATABASE SKEWED SORT
          SORTED SSL STATISTICS STORED STREAMTABLE STRING STRUCT TABLES
          TBLPROPERTIES TEMPORARY TERMINATED TINYINT TOUCH TRANSACTIONS UNARCHIVE
          UNDO UNIONTYPE UNLOCK UNSET UNSIGNED URI USE UTC UTCTIMESTAMP
          VALUE_TYPE VIEW WHILE YEAR IF

          ALL ALTER AND ARRAY AS AUTHORIZATION BETWEEN BIGINT BINARY BOOLEAN
          BOTH BY CASE CAST CHAR COLUMN CONF CREATE CROSS CUBE CURRENT
          CURRENT_DATE CURRENT_TIMESTAMP CURSOR DATABASE DATE DECIMAL DELETE
          DESCRIBE DISTINCT DOUBLE DROP ELSE END EXCHANGE EXISTS EXTENDED
          EXTERNAL FALSE FETCH FLOAT FOLLOWING FOR FROM FULL FUNCTION GRANT
          GROUP GROUPING HAVING IF IMPORT IN INNER INSERT INT INTERSECT
          INTERVAL INTO IS JOIN LATERAL LEFT LESS LIKE LOCAL MACRO MAP MORE
          NONE NOT NULL OF ON OR ORDER OUT OUTER OVER PARTIALSCAN PARTITION
          PERCENT PRECEDING PRESERVE PROCEDURE RANGE READS REDUCE REVOKE RIGHT
          ROLLUP ROW ROWS SELECT SET SMALLINT TABLE TABLESAMPLE THEN TIMESTAMP
          TO TRANSFORM TRIGGER TRUE TRUNCATE UNBOUNDED UNION UNIQUEJOIN UPDATE
          USER USING UTC_TMESTAMP VALUES VARCHAR WHEN WHERE WINDOW WITH

          AUTOCOMMIT ISOLATION LEVEL OFFSET SNAPSHOT TRANSACTION WORK WRITE

          COMMIT ONLY REGEXP RLIKE ROLLBACK START

          ABORT KEY LAST NORELY NOVALIDATE NULLS RELY VALIDATE

          CACHE CONSTRAINT FOREIGN PRIMARY REFERENCES

          DETAIL DOW EXPRESSION OPERATOR QUARTER SUMMARY VECTORIZATION WEEK YEARS MONTHS WEEKS DAYS HOURS MINUTES SECONDS

          DAYOFWEEK EXTRACT FLOOR INTEGER PRECISION VIEWS

          TIMESTAMPTZ ZONE

          TIME NUMERIC

          NAMED_STRUCT CREATE_UNION

          ROUND BROUND FLOOR CEIL CEILING RAND EXP LN LOG10 LOG2 LOG POW POWER SQRT BIN
          HEX UNHEX CONV ABS PMOD SIN ASIN COS ACOS TAN ATAN DEGREES RADIANS POSITIVE
          NEGATIVE SIGN E PI FACTORIAL CBRT SHIFTLEFT SHIFTRIGHT SHIFTRIGHTUNSIGNED
          GREATEST LEAST WIDTH_BUCKET SIZE SIZE MAP_KEYS MAP_VALUES ARRAY_CONTAINS
          SORT_ARRAY BINARY CAST FROM_UNIXTIME UNIX_TIMESTAMP UNIX_TIMESTAMP
          UNIX_TIMESTAMP TO_DATE YEAR QUARTER MONTH DAY DAYOFMONTH HOUR MINUTE SECOND
          WEEKOFYEAR EXTRACT DATEDIFF DATE_ADD DATE_SUB FROM_UTC_TIMESTAMP
          TO_UTC_TIMESTAMP CURRENT_DATE CURRENT_TIMESTAMP ADD_MONTHS LAST_DAY NEXT_DAY
          TRUNC MONTHS_BETWEEN DATE_FORMAT IF ISNULL ISNOTNULL  NVL COALESCE CASE WHEN
          then else end NULLIF ASSERT_TRUE ASCII BASE64 CHARACTER_LENGTH CHR CONCAT
          CONTEXT_NGRAMS CONCAT_WS CONCAT_WS DECODE ELT ENCODE FIELD  FIND_IN_SET
          FORMAT_NUMBER GET_JSON_OBJECT IN_FILE INSTR LENGTH LOCATE LOWER LCASE LPAD LTRIM
          NGRAMS OCTET_LENGTH PARSE_URL PRINTF REGEXP_EXTRACT REGEXP_REPLACE REPEAT
          REPLACE REVERSE RPAD RTRIM SENTENCES SPACE SPLIT STR_TO_MAP SUBSTR SUBSTRING
          SUBSTRING_INDEX TRANSLATE TRIM UNBASE64 UPPER UCASE INITCAP LEVENSHTEIN SOUNDEX
          MASK MASK_FIRST_N MASK_LAST_N MASK_SHOW_FIRST_N MASK_SHOW_LAST_N MASK_HASH
          JAVA_METHOD REFLECT HASH CURRENT_USER LOGGED_IN_USER CURRENT_DATABASE MD5 SHA1
          SHA CRC32 SHA2 AES_ENCRYPT AES_DECRYPT VERSION COUNT SUM AVG MIN MAX VARIANCE
          VAR_POP VAR_SAMP STDDEV_POP STDDEV_SAMP COVAR_POP COVAR_SAMP CORR PERCENTILE
          PERCENTILE_APPROX PERCENTILE_APPROX REGR_AVGX REGR_AVGY REGR_COUNT
          REGR_INTERCEPT REGR_R2 REGR_SLOPE REGR_SXX REGR_SXY REGR_SYY HISTOGRAM_NUMERIC
          COLLECT_SET COLLECT_LIST NTILE EXPLODE EXPLODE POSEXPLODE INLINE STACK

          JSON_TUPLE PARSE_URL_TUPLE

          XPATH XPATH_SHORT XPATH_INT XPATH_LONG XPATH_FLOAT XPATH_DOUBLE
          XPATH_NUMBER XPATH_STRING GET_JSON_OBJECT JSON_TUPLE

          PARSE_URL_TUPLE
        ))
      end

      def self.keywords_type
        # source: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types
        @keywords_type ||= Set.new(%w(
            TINYINT SMALLINT INT INTEGER BIGINT FLOAT DOUBLE PRECISION DECIMAL NUMERIC
            TIMESTAMP DATE INTERVAL
            STRING VARCHAR CHAR
            BOOLEAN BINARY
            ARRAY MAP STRUCT UNIONTYPE
        ))
      end

      prepend :root do
        # a double-quoted string is a string literal in Hive QL.
        rule %r/"/, Str::Double, :double_string

        # interpolation of variables through ${...}
        rule %r/\$\{/, Name::Variable, :hive_variable
      end

      prepend :single_string do
        rule %r/\$\{/, Name::Variable, :hive_variable
        rule %r/[^\\'\$]+/, Str::Single
      end

      prepend :double_string do
        rule %r/\$\{/, Name::Variable, :hive_variable
        # double-quoted strings are string literals so need to change token
        rule %r/"/, Str::Double, :pop!
        rule %r/[^\\"\$]+/, Str::Double
      end

      state :hive_variable do
        rule %r/\}/, Name::Variable, :pop!
        rule %r/[^\}]+/, Name::Variable
      end

    end
  end
end