lib/rouge/lexers/console.rb
# -*- coding: utf-8 -*- # # frozen_string_literal: true module Rouge module Lexers # The {ConsoleLexer} class is intended to lex content that represents the # text that would display in a console/terminal. As distinct from the # {Shell} lexer, {ConsoleLexer} will try to parse out the prompt from each # line before passing the remainder of the line to the language lexer for # the shell (by default, the {Shell} lexer). # # The {ConsoleLexer} class accepts five options: # 1. **lang**: the shell language to lex (default: `shell`); # 2. **output**: the output language (default: `plaintext?token=Generic.Output`); # 3. **prompt**: comma-separated list of strings that indicate the end of a # prompt (default: `$,#,>,;`); # 4. **comments**: whether to enable comments. # 5. **error**: comma-separated list of strings that indicate the start of an # error message # # The comments option, if enabled, will lex lines that begin with a `#` as a # comment. Please note that this option will only work if the prompt is # either not manually specified or, if manually specified, does not include # the `#` character. # # Most Markdown lexers that recognise GitHub-Flavored Markdown syntax, will # pass the language string to Rouge as written in the original document. # This allows an end user to pass options to {ConsoleLexer} by passing them # as CGI-style parameters as in the example below. # # @example # <pre>Here's some regular text. # # ```console?comments=true # # This is a comment # $ cp foo bar # ``` # # Some more regular text.</pre> class ConsoleLexer < Lexer tag 'console' aliases 'terminal', 'shell_session', 'shell-session' filenames '*.cap' desc 'A generic lexer for shell sessions. Accepts ?lang and ?output lexer options, a ?prompt option, ?comments to enable # comments, and ?error to handle error messages.' option :lang, 'the shell language to lex (default: shell)' option :output, 'the output language (default: plaintext?token=Generic.Output)' option :prompt, 'comma-separated list of strings that indicate the end of a prompt. (default: $,#,>,;)' option :comments, 'enable hash-comments at the start of a line - otherwise interpreted as a prompt. (default: false, implied by ?prompt not containing `#`)' option :error, 'comma-separated list of strings that indicate the start of an error message' def initialize(*) super @prompt = list_option(:prompt) { nil } @lang = lexer_option(:lang) { 'shell' } @output = lexer_option(:output) { PlainText.new(token: Generic::Output) } @comments = bool_option(:comments) { :guess } @error = list_option(:error) { nil } end # whether to allow comments. if manually specifying a prompt that isn't # simply "#", we flag this to on def allow_comments? case @comments when :guess @prompt && !@prompt.empty? && !end_chars.include?('#') else @comments end end def comment_regex /\A\s*?#/ end def end_chars @end_chars ||= if @prompt.any? @prompt.reject { |c| c.empty? } elsif allow_comments? %w($ > ;) else %w($ # > ;) end end def error_regex @error_regex ||= if @error.any? /^(?:#{@error.map(&Regexp.method(:escape)).join('|')})/ end end def lang_lexer @lang_lexer ||= case @lang when Lexer @lang when nil Shell.new(options) when Class @lang.new(options) when String Lexer.find(@lang).new(options) end end def line_regex /(.*?)(\n|$)/ end def output_lexer @output_lexer ||= case @output when nil PlainText.new(token: Generic::Output) when Lexer @output when Class @output.new(options) when String Lexer.find(@output).new(options) end end def process_line(input, &output) input.scan(line_regex) # As a nicety, support the use of elisions in input text. A user can # write a line with only `<...>` or one or more `.` characters and # Rouge will treat it as a comment. if input[0] =~ /\A\s*(?:<[.]+>|[.]+)\s*\z/ puts "console: matched snip #{input[0].inspect}" if @debug output_lexer.reset! lang_lexer.reset! yield Comment, input[0] elsif prompt_regex =~ input[0] puts "console: matched prompt #{input[0].inspect}" if @debug output_lexer.reset! yield Generic::Prompt, $& # make sure to take care of initial whitespace # before we pass to the lang lexer so it can determine where # the "real" beginning of the line is $' =~ /\A\s*/ yield Text::Whitespace, $& unless $&.empty? lang_lexer.continue_lex($', &output) elsif comment_regex =~ input[0].strip puts "console: matched comment #{input[0].inspect}" if @debug output_lexer.reset! lang_lexer.reset! yield Comment, input[0] elsif error_regex =~ input[0] puts "console: matched error #{input[0].inspect}" if @debug output_lexer.reset! lang_lexer.reset! yield Generic::Error, input[0] else puts "console: matched output #{input[0].inspect}" if @debug lang_lexer.reset! output_lexer.continue_lex(input[0], &output) end end def prompt_prefix_regex if allow_comments? /[^<#]*?/m else /.*?/m end end def prompt_regex @prompt_regex ||= begin /^#{prompt_prefix_regex}(?:#{end_chars.map(&Regexp.method(:escape)).join('|')})/ end end def stream_tokens(input, &output) input = StringScanner.new(input) lang_lexer.reset! output_lexer.reset! process_line(input, &output) while !input.eos? end end end end