lib/phlex/csv.rb



# frozen_string_literal: true

class Phlex::CSV
	FORMULA_PREFIXES = Set["=", "+", "-", "@", "\t", "\r"].freeze
	SPACE_CHARACTERS = Set[" ", "\t", "\r"].freeze

	def initialize(collection)
		@collection = collection
		@_headers = []
		@_current_row = []
		@_current_column_index = 0
		@_first = true
	end

	attr_reader :collection

	def call(buffer = +"", context: nil)
		unless escape_csv_injection? == true || escape_csv_injection? == false
			raise <<~MESSAGE
				You need to define escape_csv_injection? in #{self.class.name}, returning either `true` or `false`.

				CSV injection is a security vulnerability where malicious spreadsheet formulae are used to execute code or exfiltrate data when a CSV is opened in a spreadsheet program such as Microsoft Excel or Google Sheets.

				For more information, see https://owasp.org/www-community/attacks/CSV_Injection

				If you're sure this CSV will never be opened in a spreadsheet program, you can disable CSV injection escapes:

				  def escape_csv_injection? = false

				This is useful when using CSVs for byte-for-byte data exchange between secure systems.

				Alternatively, you can enable CSV injection escapes at the cost of data integrity:

				  def escape_csv_injection? = true

				Note: Enabling the CSV injection escapes will prefix any values that start with `=`, `+`, `-`, `@`, `\\t`, or `\\r` with a single quote `'` to prevent them from being interpreted as formulae by spreadsheet programs.

				Unfortunately, there is no one-size-fits-all solution to CSV injection. You need to decide based on your specific use case.
			MESSAGE
		end

		each_item do |record|
			yielder(record) do |*args, **kwargs|
				view_template(*args, **kwargs)

				if @_first && render_headers?
					buffer << @_headers.join(",") << "\n"
				end

				buffer << @_current_row.join(",") << "\n"
				@_current_column_index = 0
				@_current_row.clear
			end

			@_first = false
		end

		buffer
	end

	def filename
		nil
	end

	def content_type
		"text/csv"
	end

	private

	def column(header = nil, value)
		if @_first
			@_headers << __escape__(header)
		elsif header != @_headers[@_current_column_index]
			raise "Inconsistent header."
		end

		@_current_row << __escape__(value)
		@_current_column_index += 1
	end

	def each_item(&)
		collection.each(&)
	end

	def yielder(record)
		yield(record)
	end

	# Override and set to `false` to disable rendering headers.
	def render_headers?
		true
	end

	# Override and set to `true` to strip leading and trailing whitespace from values.
	def trim_whitespace?
		false
	end

	# Override and set to `false` to disable CSV injection escapes or `true` to enable.
	def escape_csv_injection?
		nil
	end

	def __escape__(value)
		value = trim_whitespace? ? value.to_s.strip : value.to_s
		first_char = value[0]
		last_char = value[-1]

		if escape_csv_injection? && FORMULA_PREFIXES.include?(first_char)
			# Prefix a single quote to prevent Excel, Google Docs, etc. from interpreting the value as a formula.
			# See https://owasp.org/www-community/attacks/CSV_Injection
			%("'#{value.gsub('"', '""')}")
		elsif (!trim_whitespace? && (SPACE_CHARACTERS.include?(first_char) || SPACE_CHARACTERS.include?(last_char))) || value.include?('"') || value.include?(",") || value.include?("\n")
			%("#{value.gsub('"', '""')}")
		else
			value
		end
	end
end