class Rocco
to ‘nil` (that is, Mustache will use `./lib/rocco/layout.mustache`)_.
when rendering the final, highlighted file via Mustache. _Defaults
* `:template_file`, which specifies a external template file to use
target language. _Defaults to `#`_.
* `:comment_chars`, which specifies the comment characters of the
auto-detected from the filename. _Defaults to `ruby`_.
* `:language`: specifies which Pygments lexer to use if one can’t be
The ‘options` hash respects three members:
for other documentation sources, an `options` hash, and an optional `block`.
`Rocco.new` takes a source `filename`, an optional list of source filenames
def self.make(dest='docs/', source_files='lib/**/*.rb', options={})
def self.make(dest='docs/', source_files='lib/**/*.rb', options={}) Task.new(:rocco, dest, source_files, options) end
def detect_language
We'll also return `text` if `pygmentize` isn't available.
extensions that `pygmentize` doesn't understand will return `text`.
language based on its filename. Filenames without extensions, or with
If `pygmentize` is available, we can use it to autodetect a file's
def detect_language @_language ||= if pygmentize? %x[pygmentize -N #{@file}].strip.split('+').first else "text" end end
def docblock(docs)
Take a list of block comments and convert Docblock @annotations to
def docblock(docs) docs.map do |doc| doc.split("\n").map do |line| line.match(/^@\w+/) ? line.sub(/^@(\w+)\s+/, '> **\1** ')+" " : line end.join("\n") end end
def generate_comment_chars
def generate_comment_chars @_commentchar ||= if COMMENT_STYLES[@options[:language]] COMMENT_STYLES[@options[:language]] else { :single => @options[:comment_chars], :multi => nil, :heredoc => nil } end end
def highlight(blocks)
Take the result of `split` and apply Markdown formatting to comments and
def highlight(blocks) docs_blocks, code_blocks = blocks # Pre-process Docblock @annotations. if @options[:docblocks] docs_blocks = docblock(docs_blocks) end # Combine all docs blocks into a single big markdown document with section # dividers and run through the Markdown processor. Then split it back out # into separate sections. markdown = docs_blocks.join("\n\n##### DIVIDER\n\n") docs_html = process_markdown(markdown). split(/\n*<h5>DIVIDER<\/h5>\n*/m) # Combine all code blocks into a single big stream with section dividers and # run through either `pygmentize(1)` or <http://pygments.appspot.com> span, espan = '<span class="c.?">', '</span>' if @options[:comment_chars][:single] front = @options[:comment_chars][:single] divider_input = "\n\n#{front} DIVIDER\n\n" divider_output = Regexp.new( [ "\\n*", span, Regexp.escape(CGI.escapeHTML(front)), ' DIVIDER', espan, "\\n*" ].join, Regexp::MULTILINE ) else front = @options[:comment_chars][:multi][:start] back = @options[:comment_chars][:multi][:end] divider_input = "\n\n#{front}\nDIVIDER\n#{back}\n\n" divider_output = Regexp.new( [ "\\n*", span, Regexp.escape(CGI.escapeHTML(front)), espan, "\\n", span, "DIVIDER", espan, "\\n", span, Regexp.escape(CGI.escapeHTML(back)), espan, "\\n*" ].join, Regexp::MULTILINE ) end code_stream = code_blocks.join(divider_input) code_html = if pygmentize? highlight_pygmentize(code_stream) else highlight_webservice(code_stream) end # Do some post-processing on the pygments output to split things back # into sections and remove partial `<pre>` blocks. code_html = code_html. split(divider_output). map { |code| code.sub(/\n?<div class="highlight"><pre>/m, '') }. map { |code| code.sub(/\n?<\/pre><\/div>\n/m, '') } # Lastly, combine the docs and code lists back into a list of two-tuples. docs_html.zip(code_html) end
def highlight_pygmentize(code)
We `popen` a read/write pygmentize process in the parent and
def highlight_pygmentize(code) code_html = nil open("|pygmentize -l #{@options[:language]} -O encoding=utf-8 -f html", 'r+') do |fd| pid = fork { fd.close_read fd.write code fd.close_write exit! } fd.close_write code_html = fd.read fd.close_read Process.wait(pid) end code_html end
def highlight_webservice(code)
Pygments is not one of those things that's trivial for a ruby user to install,
def highlight_webservice(code) Net::HTTP.post_form( URI.parse('http://pygments.appspot.com/'), {'lang' => @options[:language], 'code' => code} ).body end
def initialize(filename, sources=[], options={}, &block)
def initialize(filename, sources=[], options={}, &block) @file = filename @sources = sources # When `block` is given, it must read the contents of the file using # whatever means necessary and return it as a string. With no `block`, # the file is read to retrieve data. @data = if block_given? yield else File.read(filename) end defaults = { :language => 'ruby', :comment_chars => '#', :template_file => nil } @options = defaults.merge(options) # If we detect a language if detect_language() != "text" # then assign the detected language to `:language`, and look for # comment characters based on that language @options[:language] = detect_language() @options[:comment_chars] = generate_comment_chars() # If we didn't detect a language, but the user provided one, use it # to look around for comment characters to override the default. elsif @options[:language] != defaults[:language] @options[:comment_chars] = generate_comment_chars() # If neither is true, then convert the default comment character string # into the comment_char syntax (we'll discuss that syntax in detail when # we get to `generate_comment_chars()` in a moment. else @options[:comment_chars] = { :single => @options[:comment_chars], :multi => nil } end # Turn `:comment_chars` into a regex matching a series of spaces, the # `:comment_chars` string, and the an optional space. We'll use that # to detect single-line comments. @comment_pattern = Regexp.new("^\\s*#{@options[:comment_chars][:single]}\s?") # `parse()` the file contents stored in `@data`. Run the result through # `split()` and that result through `highlight()` to generate the final # section list. @sections = highlight(split(parse(@data))) end
def normalize_leading_spaces(sections)
should yield a comment block of `Comment 1\nComment 2` and code of
print "omg!"
"""
Comment 2
Comment 1
"""
def func():
succeeding line. That is:
removing it, and then removing the same amount of whitespace from each
Normalizes documentation whitespace by checking for leading whitespace,
def normalize_leading_spaces(sections) sections.map do |section| if section.any? && section[0].any? leading_space = section[0][0].match("^\s+") if leading_space section[0] = section[0].map{ |line| line.sub(/^#{leading_space.to_s}/, '') } end end section end end
def parse(data)
form `[docs, code]` where both elements are arrays containing the
Parse the raw file data into a list of two-tuples. Each tuple has the
def parse(data) sections = [] docs, code = [], [] lines = data.split("\n") # The first line is ignored if it is a shebang line. We also ignore the # PEP 263 encoding information in python sourcefiles, and the similar ruby # 1.9 syntax. lines.shift if lines[0] =~ /^\#\!/ lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ && [ "python", "rb" ].include?(@options[:language]) # To detect both block comments and single-line comments, we'll set # up a tiny state machine, and loop through each line of the file. # This requires an `in_comment_block` boolean, and a few regular # expressions for line tests. We'll do the same for fake heredoc parsing. in_comment_block = false in_heredoc = false single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil if not @options[:comment_chars][:single].nil? single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?") end if not @options[:comment_chars][:multi].nil? block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$") block_comment_end = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") block_comment_one_liner = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") block_comment_start_with = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)$") block_comment_end_with = Regexp.new("\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") if @options[:comment_chars][:multi][:middle] block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?") end end if not @options[:comment_chars][:heredoc].nil? heredoc_start = Regexp.new("#{Regexp.escape(@options[:comment_chars][:heredoc])}(\\S+)$") end lines.each do |line| # If we're currently in a comment block, check whether the line matches # the _end_ of a comment block or the _end_ of a comment block with a # comment. if in_comment_block if block_comment_end && line.match(block_comment_end) in_comment_block = false elsif block_comment_end_with && line.match(block_comment_end_with) in_comment_block = false docs << line.match(block_comment_end_with).captures.first. sub(block_comment_mid || '', '') else docs << line.sub(block_comment_mid || '', '') end # If we're currently in a heredoc, we're looking for the end of the # heredoc, and everything it contains is code. elsif in_heredoc if line.match(Regexp.new("^#{Regexp.escape(in_heredoc)}$")) in_heredoc = false end code << line # Otherwise, check whether the line starts a heredoc. If so, note the end # pattern, and the line is code. Otherwise check whether the line matches # the beginning of a block, or a single-line comment all on it's lonesome. # In either case, if there's code, start a new section. else if heredoc_start && line.match(heredoc_start) in_heredoc = $1 code << line elsif block_comment_one_liner && line.match(block_comment_one_liner) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_one_liner).captures.first elsif block_comment_start && line.match(block_comment_start) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end elsif block_comment_start_with && line.match(block_comment_start_with) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_start_with).captures.first elsif single_line_comment && line.match(single_line_comment) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.sub(single_line_comment || '', '') else code << line end end end sections << [docs, code] if docs.any? || code.any? normalize_leading_spaces(sections) end
def process_markdown(text)
def process_markdown(text) Markdown.new(text, :smart).to_html end
def pygmentize?
def pygmentize? @_pygmentize ||= ENV['PATH'].split(':'). any? { |dir| File.executable?("#{dir}/pygmentize") } end
def split(sections)
separate lists: one holding the comments with leaders removed and
Take the list of paired *sections* two-tuples and split into two
def split(sections) docs_blocks, code_blocks = [], [] sections.each do |docs,code| docs_blocks << docs.join("\n") code_blocks << code.map do |line| tabs = line.match(/^(\t+)/) tabs ? line.sub(/^\t+/, ' ' * tabs.captures[0].length) : line end.join("\n") end [docs_blocks, code_blocks] end
def to_html
def to_html Rocco::Layout.new(self, @options[:template_file]).render end