# frozen_string_literal: true
module Asciidoctor
# Public: Methods and constants for managing AsciiDoc table content in a document.
# It supports all three of AsciiDoc's table formats: psv, dsv and csv.
class Table < AbstractBlock
# precision of column widths
DEFAULT_PRECISION = 4
# Public: A data object that encapsulates the collection of rows (head, foot, body) for a table
class Rows
attr_accessor :head, :foot, :body
def initialize head = [], foot = [], body = []
@head = head
@foot = foot
@body = body
end
alias [] send
# Public: Retrieve the rows grouped by section as a nested Array.
#
# Creates a 2-dimensional array of two element entries. The first element
# is the section name as a symbol. The second element is the Array of rows
# in that section. The entries are in document order (head, foot, body).
#
# Returns a 2-dimentional Array of rows grouped by section.
def by_section
[[:head, @head], [:body, @body], [:foot, @foot]]
end
# Public: Retrieve the rows as a Hash.
#
# The keys are the names of the section groups and the values are the Array of rows in that section.
# The keys are in document order (head, foot, body).
#
# Returns a Hash of rows grouped by section.
def to_h
{ head: @head, body: @body, foot: @foot }
end
end
# Public: Get/Set the columns for this table
attr_accessor :columns
# Public: Get/Set the Rows struct for this table (encapsulates head, foot
# and body rows)
attr_accessor :rows
# Public: Boolean specifies whether this table has a header row
attr_accessor :has_header_option
# Public: Get the caption for this table
attr_reader :caption
def initialize parent, attributes
super parent, :table
@rows = Rows.new
@columns = []
@has_header_option = attributes['header-option'] ? true : false
# smells like we need a utility method here
# to resolve an integer width from potential bogus input
if (pcwidth = attributes['width'])
if (pcwidth_intval = pcwidth.to_i) > 100 || pcwidth_intval < 1
pcwidth_intval = 100 unless pcwidth_intval == 0 && (pcwidth == '0' || pcwidth == '0%')
end
else
pcwidth_intval = 100
end
@attributes['tablepcwidth'] = pcwidth_intval
if @document.attributes['pagewidth']
@attributes['tableabswidth'] = (abswidth_val = (((pcwidth_intval / 100.0) * @document.attributes['pagewidth'].to_f).truncate DEFAULT_PRECISION)) == abswidth_val.to_i ? abswidth_val.to_i : abswidth_val
end
@attributes['orientation'] = 'landscape' if attributes['rotate-option']
end
# Internal: Returns whether the current row being processed is
# the header row
def header_row?
@has_header_option && @rows.body.empty?
end
# Internal: Creates the Column objects from the column spec
#
# returns nothing
def create_columns colspecs
cols = []
autowidth_cols = nil
width_base = 0
colspecs.each do |colspec|
colwidth = colspec['width']
cols << (Column.new self, cols.size, colspec)
if colwidth < 0
(autowidth_cols ||= []) << cols[-1]
else
width_base += colwidth
end
end
if (num_cols = (@columns = cols).size) > 0
@attributes['colcount'] = num_cols
width_base = nil unless width_base > 0 || autowidth_cols
assign_column_widths width_base, autowidth_cols
end
nil
end
# Internal: Assign column widths to columns
#
# This method rounds the percentage width values to 4 decimal places and
# donates the balance to the final column.
#
# This method assumes there's at least one column in the columns array.
#
# width_base - the total of the relative column values used for calculating percentage widths (default: nil)
#
# returns nothing
def assign_column_widths width_base = nil, autowidth_cols = nil
precision = DEFAULT_PRECISION
total_width = col_pcwidth = 0
if width_base
if autowidth_cols
if width_base > 100
autowidth = 0
logger.warn %(total column width must not exceed 100% when using autowidth columns; got #{width_base}%)
else
autowidth = ((100.0 - width_base) / autowidth_cols.size).truncate precision
autowidth = autowidth.to_i if autowidth.to_i == autowidth
width_base = 100
end
autowidth_attrs = { 'width' => autowidth, 'autowidth-option' => '' }
autowidth_cols.each {|col| col.update_attributes autowidth_attrs }
end
@columns.each {|col| total_width += (col_pcwidth = col.assign_width nil, width_base, precision) }
else
col_pcwidth = (100.0 / @columns.size).truncate precision
col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth
@columns.each {|col| total_width += col.assign_width col_pcwidth, nil, precision }
end
# donate balance, if any, to final column (using half up rounding)
@columns[-1].assign_width(((100 - total_width + col_pcwidth).round precision), nil, precision) unless total_width == 100
nil
end
# Internal: Partition the rows into header, footer and body as determined
# by the options on the table
#
# returns nothing
def partition_header_footer(attrs)
# set rowcount before splitting up body rows
@attributes['rowcount'] = @rows.body.size
num_body_rows = @rows.body.size
if num_body_rows > 0 && @has_header_option
head = @rows.body.shift
num_body_rows -= 1
# styles aren't applied to header row
head.each {|c| c.style = nil }
# QUESTION why does AsciiDoc use an array for head? is it
# possible to have more than one based on the syntax?
@rows.head = [head]
end
if num_body_rows > 0 && attrs['footer-option']
@rows.foot = [@rows.body.pop]
end
nil
end
end
# Public: Methods to manage the columns of an AsciiDoc table. In particular, it
# keeps track of the column specs
class Table::Column < AbstractNode
# Public: Get/Set the style Symbol for this column.
attr_accessor :style
def initialize table, index, attributes = {}
super table, :table_column
@style = attributes['style']
attributes['colnumber'] = index + 1
attributes['width'] ||= 1
attributes['halign'] ||= 'left'
attributes['valign'] ||= 'top'
update_attributes(attributes)
end
# Public: An alias to the parent block (which is always a Table)
alias table parent
# Internal: Calculate and assign the widths (percentage and absolute) for this column
#
# This method assigns the colpcwidth and colabswidth attributes.
#
# returns the resolved colpcwidth value
def assign_width col_pcwidth, width_base, precision
if width_base
col_pcwidth = (@attributes['width'].to_f * 100.0 / width_base).truncate precision
col_pcwidth = col_pcwidth.to_i if col_pcwidth.to_i == col_pcwidth
end
if parent.attributes['tableabswidth']
@attributes['colabswidth'] = (col_abswidth = ((col_pcwidth / 100.0) * parent.attributes['tableabswidth']).truncate precision) == col_abswidth.to_i ? col_abswidth.to_i : col_abswidth
end
@attributes['colpcwidth'] = col_pcwidth
end
def block?
false
end
def inline?
false
end
end
# Public: Methods for managing the a cell in an AsciiDoc table.
class Table::Cell < AbstractBlock
DOUBLE_LF = LF * 2
# Public: An Integer of the number of columns this cell will span (default: nil)
attr_accessor :colspan
# Public: An Integer of the number of rows this cell will span (default: nil)
attr_accessor :rowspan
# Public: An alias to the parent block (which is always a Column)
alias column parent
# Internal: Returns the nested Document in an AsciiDoc table cell (only set when style is :asciidoc)
attr_reader :inner_document
def initialize column, cell_text, attributes = {}, opts = {}
super column, :table_cell
@source_location = opts[:cursor].dup if @document.sourcemap
if column
cell_style = column.attributes['style'] unless (in_header_row = column.table.header_row?)
# REVIEW feels hacky to inherit all attributes from column
update_attributes column.attributes
end
# NOTE if attributes is defined, we know this is a psv cell; implies text needs to be stripped
if attributes
if attributes.empty?
@colspan = @rowspan = nil
else
@colspan, @rowspan = (attributes.delete 'colspan'), (attributes.delete 'rowspan')
# TODO delete style attribute from @attributes if set
cell_style = attributes['style'] || cell_style unless in_header_row
update_attributes attributes
end
if cell_style == :asciidoc
asciidoc = true
inner_document_cursor = opts[:cursor]
if (cell_text = cell_text.rstrip).start_with? LF
lines_advanced = 1
lines_advanced += 1 while (cell_text = cell_text.slice 1, cell_text.length).start_with? LF
# NOTE this only works if we remain in the same file
inner_document_cursor.advance lines_advanced
else
cell_text = cell_text.lstrip
end
elsif cell_style == :literal
literal = true
cell_text = cell_text.rstrip
# QUESTION should we use same logic as :asciidoc cell? strip leading space if text doesn't start with newline?
cell_text = cell_text.slice 1, cell_text.length while cell_text.start_with? LF
else
normal_psv = true
# NOTE AsciidoctorJ uses nil cell_text to create an empty cell
cell_text = cell_text ? cell_text.strip : ''
end
else
@colspan = @rowspan = nil
if cell_style == :asciidoc
asciidoc = true
inner_document_cursor = opts[:cursor]
end
end
# NOTE only true for non-header rows
if asciidoc
# FIXME hide doctitle from nested document; temporary workaround to fix
# nested document seeing doctitle and assuming it has its own document title
parent_doctitle = @document.attributes.delete('doctitle')
# NOTE we need to process the first line of content as it may not have been processed
# the included content cannot expect to match conditional terminators in the remaining
# lines of table cell content, it must be self-contained logic
# QUESTION should we reset cell_text to nil?
# QUESTION is is faster to check for :: before splitting?
inner_document_lines = cell_text.split LF, -1
if (unprocessed_line1 = inner_document_lines[0]).include? '::'
preprocessed_lines = (PreprocessorReader.new @document, [unprocessed_line1]).readlines
unless unprocessed_line1 == preprocessed_lines[0] && preprocessed_lines.size < 2
inner_document_lines.shift
inner_document_lines.unshift(*preprocessed_lines) unless preprocessed_lines.empty?
end
end unless inner_document_lines.empty?
@inner_document = Document.new inner_document_lines, standalone: false, parent: @document, cursor: inner_document_cursor
@document.attributes['doctitle'] = parent_doctitle unless parent_doctitle.nil?
@subs = nil
elsif literal
@content_model = :verbatim
@subs = BASIC_SUBS
else
if normal_psv && (cell_text.start_with? '[[') && LeadingInlineAnchorRx =~ cell_text
Parser.catalog_inline_anchor $1, $2, self, opts[:cursor], @document
end
@content_model = :simple
@subs = NORMAL_SUBS
end
@text = cell_text
@style = cell_style
end
# Public: Get the String text of this cell with substitutions applied.
#
# Used for cells in the head row as well as text-only (non-AsciiDoc) cells in
# the foot row and body.
#
# This method shouldn't be used for cells that have the AsciiDoc style.
#
# Returns the converted String text for this Cell
def text
apply_subs @text, @subs
end
# Public: Set the String text.
#
# This method shouldn't be used for cells that have the AsciiDoc style.
#
# Returns the new String text assigned to this Cell
def text= val
@text = val
end
# Public: Handles the body data (tbody, tfoot), applying styles and partitioning into paragraphs
#
# This method should not be used for cells in the head row or that have the literal or verse style.
#
# Returns the converted String for this Cell
def content
if (cell_style = @style) == :asciidoc
@inner_document.convert
elsif @text.include? DOUBLE_LF
(text.split BlankLineRx).map do |para|
cell_style && cell_style != :header ? (Inline.new parent, :quoted, para, type: cell_style).convert : para
end
elsif (subbed_text = text).empty?
[]
elsif cell_style && cell_style != :header
[(Inline.new parent, :quoted, subbed_text, type: cell_style).convert]
else
[subbed_text]
end
end
def lines
@text.split LF
end
def source
@text
end
# Public: Get the source file where this block started
def file
@source_location && @source_location.file
end
# Public: Get the source line number where this block started
def lineno
@source_location && @source_location.lineno
end
def to_s
"#{super.to_s} - [text: #@text, colspan: #{@colspan || 1}, rowspan: #{@rowspan || 1}, attributes: #@attributes]"
end
end
# Public: Methods for managing the parsing of an AsciiDoc table. Instances of this
# class are primarily responsible for tracking the buffer of a cell as the parser
# moves through the lines of the table using tail recursion. When a cell boundary
# is located, the previous cell is closed, an instance of Table::Cell is
# instantiated, the row is closed if the cell satisifies the column count and,
# finally, a new buffer is allocated to track the next cell.
class Table::ParserContext
include Logging
# Public: An Array of String keys that represent the table formats in AsciiDoc
#--
# QUESTION should we recognize !sv as a valid format value?
FORMATS = ['psv', 'csv', 'dsv', 'tsv'].to_set
# Public: A Hash mapping the AsciiDoc table formats to default delimiters
DELIMITERS = {
'psv' => ['|', /\|/],
'csv' => [',', /,/],
'dsv' => [':', /:/],
'tsv' => [?\t, /\t/],
'!sv' => ['!', /!/],
}
# Public: The Table currently being parsed
attr_accessor :table
# Public: The AsciiDoc table format (psv, dsv, or csv)
attr_accessor :format
# Public: Get the expected column count for a row
#
# colcount is the number of columns to pull into a row
# A value of -1 means we use the number of columns found
# in the first line as the colcount
attr_reader :colcount
# Public: The String buffer of the currently open cell
attr_accessor :buffer
# Public: The cell delimiter for this table.
attr_reader :delimiter
# Public: The cell delimiter compiled Regexp for this table.
attr_reader :delimiter_re
def initialize reader, table, attributes = {}
@start_cursor_data = (@reader = reader).mark
@table = table
if attributes.key? 'format'
if FORMATS.include?(xsv = attributes['format'])
if xsv == 'tsv'
# NOTE tsv is just an alias for csv with a tab separator
@format = 'csv'
elsif (@format = xsv) == 'psv' && table.document.nested?
xsv = '!sv'
end
else
logger.error message_with_context %(illegal table format: #{xsv}), source_location: reader.cursor_at_prev_line
@format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv')
end
else
@format, xsv = 'psv', (table.document.nested? ? '!sv' : 'psv')
end
if attributes.key? 'separator'
if (sep = attributes['separator']).nil_or_empty?
@delimiter, @delimiter_rx = DELIMITERS[xsv]
# QUESTION should we support any other escape codes or multiple tabs?
elsif sep == '\t'
@delimiter, @delimiter_rx = DELIMITERS['tsv']
else
@delimiter, @delimiter_rx = sep, /#{::Regexp.escape sep}/
end
else
@delimiter, @delimiter_rx = DELIMITERS[xsv]
end
@colcount = table.columns.empty? ? -1 : table.columns.size
@buffer = ''
@cellspecs = []
@cell_open = false
@active_rowspans = [0]
@column_visits = 0
@current_row = []
@linenum = -1
end
# Public: Checks whether the line provided starts with the cell delimiter
# used by this table.
#
# returns true if the line starts with the delimiter, false otherwise
def starts_with_delimiter?(line)
line.start_with? @delimiter
end
# Public: Checks whether the line provided contains the cell delimiter
# used by this table.
#
# returns Regexp MatchData if the line contains the delimiter, false otherwise
def match_delimiter(line)
@delimiter_rx.match(line)
end
# Public: Skip past the matched delimiter because it's inside quoted text.
#
# Returns nothing
def skip_past_delimiter(pre)
@buffer = %(#{@buffer}#{pre}#{@delimiter})
nil
end
# Public: Skip past the matched delimiter because it's escaped.
#
# Returns nothing
def skip_past_escaped_delimiter(pre)
@buffer = %(#{@buffer}#{pre.chop}#{@delimiter})
nil
end
# Public: Determines whether the buffer has unclosed quotes. Used for CSV data.
#
# returns true if the buffer has unclosed quotes, false if it doesn't or it
# isn't quoted data
def buffer_has_unclosed_quotes? append = nil
if (record = append ? (@buffer + append).strip : @buffer.strip) == '"'
true
elsif record.start_with? '"'
if ((trailing_quote = record.end_with? '"') && (record.end_with? '""')) || (record.start_with? '""')
((record = record.gsub '""', '').start_with? '"') && !(record.end_with? '"')
else
!trailing_quote
end
else
false
end
end
# Public: Takes a cell spec from the stack. Cell specs precede the delimiter, so a
# stack is used to carry over the spec from the previous cell to the current cell
# when the cell is being closed.
#
# returns The cell spec Hash captured from parsing the previous cell
def take_cellspec
@cellspecs.shift
end
# Public: Puts a cell spec onto the stack. Cell specs precede the delimiter, so a
# stack is used to carry over the spec to the next cell.
#
# returns nothing
def push_cellspec(cellspec = {})
# this shouldn't be nil, but we check anyway
@cellspecs << (cellspec || {})
nil
end
# Public: Marks that the cell should be kept open. Used when the end of the line is
# reached and the cell may contain additional text.
#
# returns nothing
def keep_cell_open
@cell_open = true
nil
end
# Public: Marks the cell as closed so that the parser knows to instantiate a new cell
# instance and add it to the current row.
#
# returns nothing
def mark_cell_closed
@cell_open = false
nil
end
# Public: Checks whether the current cell is still open
#
# returns true if the cell is marked as open, false otherwise
def cell_open?
@cell_open
end
# Public: Checks whether the current cell has been marked as closed
#
# returns true if the cell is marked as closed, false otherwise
def cell_closed?
!@cell_open
end
# Public: If the current cell is open, close it. In additional, push the
# cell spec captured from the end of this cell onto the stack for use
# by the next cell.
#
# returns nothing
def close_open_cell(next_cellspec = {})
push_cellspec next_cellspec
close_cell(true) if cell_open?
advance
nil
end
# Public: Close the current cell, instantiate a new Table::Cell, add it to
# the current row and, if the number of expected columns for the current
# row has been met, close the row and begin a new one.
#
# returns nothing
def close_cell(eol = false)
if @format == 'psv'
cell_text = @buffer
@buffer = ''
if (cellspec = take_cellspec)
repeat = cellspec.delete('repeatcol') || 1
else
logger.error message_with_context 'table missing leading separator; recovering automatically', source_location: Reader::Cursor.new(*@start_cursor_data)
cellspec = {}
repeat = 1
end
else
cell_text = @buffer.strip
@buffer = ''
cellspec = nil
repeat = 1
if @format == 'csv' && !cell_text.empty? && cell_text.include?('"')
# this may not be perfect logic, but it hits the 99%
if cell_text.start_with?('"') && cell_text.end_with?('"')
# unquote
if (cell_text = cell_text.slice(1, cell_text.length - 2))
# trim whitespace and collapse escaped quotes
cell_text = cell_text.strip.squeeze('"')
else
logger.error message_with_context 'unclosed quote in CSV data; setting cell to empty', source_location: @reader.cursor_at_prev_line
cell_text = ''
end
else
# collapse escaped quotes
cell_text = cell_text.squeeze('"')
end
end
end
1.upto(repeat) do |i|
# TODO make column resolving an operation
if @colcount == -1
@table.columns << (column = Table::Column.new(@table, @table.columns.size + i - 1))
if cellspec && (cellspec.key? 'colspan') && (extra_cols = cellspec['colspan'].to_i - 1) > 0
offset = @table.columns.size
extra_cols.times do |j|
@table.columns << Table::Column.new(@table, offset + j)
end
end
else
# QUESTION is this right for cells that span columns?
unless (column = @table.columns[@current_row.size])
logger.error message_with_context 'dropping cell because it exceeds specified number of columns', source_location: @reader.cursor_before_mark
return
end
end
cell = Table::Cell.new(column, cell_text, cellspec, cursor: @reader.cursor_before_mark)
@reader.mark
unless !cell.rowspan || cell.rowspan == 1
activate_rowspan(cell.rowspan, (cell.colspan || 1))
end
@column_visits += (cell.colspan || 1)
@current_row << cell
# don't close the row if we're on the first line and the column count has not been set explicitly
# TODO perhaps the colcount/linenum logic should be in end_of_row? (or a should_end_row? method)
close_row if end_of_row? && (@colcount != -1 || @linenum > 0 || (eol && i == repeat))
end
@cell_open = false
nil
end
private
# Internal: Close the row by adding it to the Table and resetting the row
# Array and counter variables.
#
# returns nothing
def close_row
@table.rows.body << @current_row
# don't have to account for active rowspans here
# since we know this is first row
@colcount = @column_visits if @colcount == -1
@column_visits = 0
@current_row = []
@active_rowspans.shift
@active_rowspans[0] ||= 0
nil
end
# Internal: Activate a rowspan. The rowspan Array is consulted when
# determining the effective number of cells in the current row.
#
# returns nothing
def activate_rowspan(rowspan, colspan)
1.upto(rowspan - 1) {|i| @active_rowspans[i] = (@active_rowspans[i] || 0) + colspan }
nil
end
# Internal: Check whether we've met the number of effective columns for the current row.
def end_of_row?
@colcount == -1 || effective_column_visits == @colcount
end
# Internal: Calculate the effective column visits, which consists of the number of
# cells plus any active rowspans.
def effective_column_visits
@column_visits + @active_rowspans[0]
end
# Internal: Advance to the next line (which may come after the parser begins processing
# the next line if the last cell had wrapped content).
def advance
@linenum += 1
end
end
end