# -*- coding: utf-8 -*- #
# frozen_string_literal: true
module Rouge
module Lexers
class Stan < RegexLexer
title "Stan"
desc 'Stan Modeling Language (mc-stan.org)'
tag 'stan'
filenames '*.stan', '*.stanfunctions'
# optional comment or whitespace
WS = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
ID = /[a-zA-Z_][a-zA-Z0-9_]*/
RT = /(?:(?:[a-z_]\s*(?:\[[0-9, ]\])?)\s+)*/
OP = Regexp.new([
# Assigment operators
"=",
# Comparison operators
"<", "<=", ">", ">=", "==", "!=",
# Boolean operators
"!", "&&", "\\|\\|",
# Real-valued arithmetic operators
"\\+", "-", "\\*", "/", "\\^",
# Transposition operator
"'",
# Elementwise functions
"\\.\\+", "\\.-", "\\.\\*", "\\./", "\\.\\^",
# Matrix division operators
"\\\\",
# Compound assigment operators
"\\+=", "-=", "\\*=", "/=", "\\.\\*=", "\\./=",
# Sampling
"~",
# Conditional operator
"\\?", ":"
].join("|"))
def self.keywords
@keywords ||= Set.new %w(
if else while for break continue print reject return
)
end
def self.types
@types ||= Set.new %w(
int real vector ordered positive_ordered simplex unit_vector
row_vector matrix cholesky_factor_corr cholesky_factor_cov corr_matrix
cov_matrix data void complex array
)
end
def self.reserved
@reserved ||= Set.new [
# Reserved words from Stan language
"for", "in", "while", "repeat", "until", "if", "then", "else", "true",
"false", "target", "functions", "model", "data", "parameters",
"quantities", "transformed", "generated",
# Reserved names from Stan implementation
"var", "fvar", "STAN_MAJOR", "STAN_MINOR", "STAN_PATCH",
"STAN_MATH_MAJOR", "STAN_MATH_MINOR", "STAN_MATH_PATCH",
# Reserved names from C++
"alignas", "alignof", "and", "and_eq", "asm", "auto", "bitand",
"bitor", "bool", "break", "case", "catch", "char", "char16_t",
"char32_t", "class", "compl", "const", "constexpr", "const_cast",
"continue", "decltype", "default", "delete", "do", "double",
"dynamic_cast", "else", "enum", "explicit", "export", "extern",
"false", "float", "for", "friend", "goto", "if", "inline", "int",
"long", "mutable", "namespace", "new", "noexcept", "not", "not_eq",
"nullptr", "operator", "or", "or_eq", "private", "protected",
"public", "register", "reinterpret_cast", "return", "short", "signed",
"sizeof", "static", "static_assert", "static_cast", "struct",
"switch", "template", "this", "thread_local", "throw", "true", "try",
"typedef", "typeid", "typename", "union", "unsigned", "using",
"virtual", "void", "volatile", "wchar_t", "while", "xor", "xor_eq"
]
end
def self.builtin_functions
@builtin_functions ||= Set.new [
# Integer-Valued Basic Functions
## Absolute functions
"abs", "int_step",
## Bound functions
"min", "max",
## Size functions
"size",
# Real-Valued Basic Functions
## Log probability function
"target", "get_lp",
## Logical functions
"step", "is_inf", "is_nan",
## Step-like functions
"fabs", "fdim", "fmin", "fmax", "fmod", "floor", "ceil", "round",
"trunc",
## Power and logarithm functions
"sqrt", "cbrt", "square", "exp", "exp2", "log", "log2", "log10",
"pow", "inv", "inv_sqrt", "inv_square",
## Trigonometric functions
"hypot", "cos", "sin", "tan", "acos", "asin", "atan", "atan2",
## Hyperbolic trigonometric functions
"cosh", "sinh", "tanh", "acosh", "asinh", "atanh",
## Link functions
"logit", "inv_logit", "inv_cloglog",
## Probability-related functions
"erf", "erfc", "Phi", "inv_Phi", "Phi_approx", "binary_log_loss",
"owens_t",
## Combinatorial functions
"beta", "inc_beta", "lbeta", "tgamma", "lgamma", "digamma",
"trigamma", "lmgamma", "gamma_p", "gamma_q",
"binomial_coefficient_log", "choose", "bessel_first_kind",
"bessel_second_kind", "modified_bessel_first_kind",
"log_modified_bessel_first_kind", "modified_bessel_second_kind",
"falling_factorial", "lchoose", "log_falling_factorial",
"rising_factorial", "log_rising_factorial",
## Composed functions
"expm1", "fma", "multiply_log", "ldexp", "lmultiply", "log1p",
"log1m", "log1p_exp", "log1m_exp", "log_diff_exp", "log_mix",
"log_sum_exp", "log_inv_logit", "log_inv_logit_diff",
"log1m_inv_logit",
## Special functions
"lambert_w0", "lambert_wm1",
# Complex-Valued Basic Functions
## Complex constructors and accessors
"to_complex", "get_real", "get_imag",
## Complex special functions
"arg", "norm", "conj", "proj", "polar",
# Array Operations
## Reductions
"sum", "prod", "log_sum_exp", "mean", "variance", "sd", "distance",
"squared_distance", "quantile",
## Array size and dimension function
"dims", "num_elements",
## Array broadcasting
"rep_array",
## Array concatenation
"append_array",
## Sorting functions
"sort_asc", "sort_desc", "sort_indices_asc", "sort_indices_desc",
"rank",
## Reversing functions
"reverse",
# Matrix Operations
## Integer-valued matrix size functions
"num_elements", "rows", "cols",
## Dot products and specialized products
"dot_product", "columns_dot_product", "rows_dot_product", "dot_self",
"columns_dot_self", "rows_dot_self", "tcrossprod", "crossprod",
"quad_form", "quad_form_diag", "quad_form_sym", "trace_quad_form",
"trace_gen_quad_form", "multiply_lower_tri_self_transpose",
"diag_pre_multiply", "diag_post_multiply",
## Broadcast functions
"rep_vector", "rep_row_vector", "rep_matrix",
"symmetrize_from_lower_tri",
## Diagonal matrix functions
"add_diag", "diagonal", "diag_matrix", "identity_matrix",
## Container construction functions
"linspaced_array", "linspaced_int_array", "linspaced_vector",
"linspaced_row_vector", "one_hot_int_array", "one_hot_array",
"one_hot_vector", "one_hot_row_vector", "ones_int_array",
"ones_array", "ones_vector", "ones_row_vector", "zeros_int_array",
"zeros_array", "zeros_vector", "zeros_row_vector", "uniform_simplex",
## Slicing and blocking functions
"col", "row", "block", "sub_col", "sub_row", "head", "tail",
"segment",
## Matrix concatenation
"append_col", "append_row",
## Special matrix functions
"softmax", "log_softmax", "cumulative_sum",
## Covariance functions
"cov_exp_quad",
## Linear algebra functions and solvers
"mdivide_left_tri_low", "mdivide_right_tri_low", "mdivide_left_spd",
"mdivide_right_spd", "matrix_exp", "matrix_exp_multiply",
"scale_matrix_exp_multiply", "matrix_power", "trace", "determinant",
"log_determinant", "inverse", "inverse_spd", "chol2inv",
"generalized_inverse", "eigenvalues_sym", "eigenvectors_sym",
"qr_thin_Q", "qr_thin_R", "qr_Q", "qr_R", "cholseky_decompose",
"singular_values", "svd_U", "svd_V",
# Sparse Matrix Operations
## Conversion functions
"csr_extract_w", "csr_extract_v", "csr_extract_u",
"csr_to_dense_matrix",
## Sparse matrix arithmetic
"csr_matrix_times_vector",
# Mixed Operations
"to_matrix", "to_vector", "to_row_vector", "to_array_2d",
"to_array_1d",
# Higher-Order Functions
## Algebraic equation solver
"algebra_solver", "algebra_solver_newton",
## Ordinary differential equation
"ode_rk45", "ode_rk45_tol", "ode_ckrk", "ode_ckrk_tol", "ode_adams",
"ode_adams_tol", "ode_bdf", "ode_bdf_tol", "ode_adjoint_tol_ctl",
## 1D integrator
"integrate_1d",
## Reduce-sum function
"reduce_sum", "reduce_sum_static",
## Map-rect function
"map_rect",
# Deprecated Functions
"integrate_ode_rk45", "integrate_ode", "integrate_ode_adams",
"integrate_ode_bdf",
# Hidden Markov Models
"hmm_marginal", "hmm_latent_rng", "hmm_hidden_state_prob"
]
end
def self.distributions
@distributions ||= Set.new(
[
# Discrete Distributions
## Binary Distributions
"bernoulli", "bernoulli_logit", "bernoulli_logit_glm",
## Bounded Discrete Distributions
"binomial", "binomial_logit", "beta_binomial", "hypergeometric",
"categorical", "categorical_logit_glm", "discrete_range",
"ordered_logistic", "ordered_logistic_glm", "ordered_probit",
## Unbounded Discrete Distributions
"neg_binomial", "neg_binomial_2", "neg_binomial_2_log",
"neg_binomial_2_log_glm", "poisson", "poisson_log",
"poisson_log_glm",
## Multivariate Discrete Distributions
"multinomial", "multinomial_logit",
# Continuous Distributions
## Unbounded Continuous Distributions
"normal", "std_normal", "normal_id_glm", "exp_mod_normal",
"skew_normal", "student_t", "cauchy", "double_exponential",
"logistic", "gumbel", "skew_double_exponential",
## Positive Continuous Distributions
"lognormal", "chi_square", "inv_chi_square",
"scaled_inv_chi_square", "exponential", "gamma", "inv_gamma",
"weibull", "frechet", "rayleigh",
## Positive Lower-Bounded Distributions
"pareto", "pareto_type_2", "wiener",
## Continuous Distributions on [0, 1]
"beta", "beta_proportion",
## Circular Distributions
"von_mises",
## Bounded Continuous Distributions
"uniform",
## Distributions over Unbounded Vectors
"multi_normal", "multi_normal_prec", "multi_normal_cholesky",
"multi_gp", "multi_gp_cholesky", "multi_student_t",
"gaussian_dlm_obs",
## Simplex Distributions
"dirichlet",
## Correlation Matrix Distributions
"lkj_corr", "lkj_corr_cholesky",
## Covariance Matrix Distributions
"wishart", "inv_wishart"
].product([
"", "_lpmf", "_lupmf", "_lpdf", "_lcdf", "_lccdf", "_rng", "_log",
"_cdf_log", "_ccdf_log"
]).map {|s| "#{s[0]}#{s[1]}"}
)
end
def self.constants
@constants ||= Set.new [
# Mathematical constants
"pi", "e", "sqrt2", "log2", "log10",
# Special values
"not_a_number", "positive_infinity", "negative_infinity",
"machine_precision"
]
end
state :root do
mixin :whitespace
rule %r/#include/, Comment::Preproc, :include
rule %r/#.*$/, Generic::Deleted
rule %r(
functions
|(?:transformed\s+)?data
|(?:transformed\s+)?parameters
|model
|generated\s+quantities
)x, Name::Namespace
rule %r(\{), Punctuation, :bracket_scope
mixin :scope
end
state :include do
rule %r((\s+)(\S+)(\s*)) do |m|
token Text, m[1]
token Comment::PreprocFile, m[2]
token Text, m[3]
pop!
end
end
state :whitespace do
rule %r(\n+)m, Text
rule %r(//(\\.|.)*?$), Comment::Single
mixin :inline_whitespace
end
state :inline_whitespace do
rule %r([ \t\r]+), Text
rule %r(/(\\\n)?[*].*?[*](\\\n)?/)m, Comment::Multiline
end
state :statements do
mixin :whitespace
rule %r/#include/, Comment::Preproc, :include
rule %r/#.*$/, Generic::Deleted
rule %r("), Str, :string
rule %r(
(
((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)
(#{WS})[+-](#{WS})
((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
)
|((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+|\d+)i
|((\d+[.]\d*|[.]?\d+)e[+-]?\d+|\d*[.]\d+)
)mx, Num::Float
rule %r/\d+/, Num::Integer
rule %r(\*/), Error
rule OP, Operator
rule %r([\[\],.;]), Punctuation
rule %r([|](?![|])), Punctuation
rule %r(T\b), Keyword::Reserved
rule %r((lower|upper)\b), Name::Attribute
rule ID do |m|
name = m[0]
if self.class.keywords.include? name
token Keyword
elsif self.class.types.include? name
token Keyword::Type
elsif self.class.reserved.include? name
token Keyword::Reserved
else
token Name::Variable
end
end
end
state :scope do
mixin :whitespace
rule %r(
(#{RT}) # Return type
(#{ID}) # Function name
(?=\([^;]*?\)) # Signature or arguments
)mx do |m|
recurse m[1]
name = m[2]
if self.class.builtin_functions.include? name
token Name::Builtin, name
elsif self.class.distributions.include? name
token Name::Builtin, name
elsif self.class.constants.include? name
token Keyword::Constant
else
token Name::Function, name
end
end
rule %r(\{), Punctuation, :bracket_scope
rule %r(\(), Punctuation, :parens_scope
mixin :statements
end
state :bracket_scope do
mixin :scope
rule %r(\}), Punctuation, :pop!
end
state :parens_scope do
mixin :scope
rule %r(\)), Punctuation, :pop!
end
end
end
end