class Prism::Source

ranges.
conjunction with locations to allow them to resolve line numbers and source
This represents a source of Ruby code that has been parsed. It is used in

def self.for(source, start_line = 1, offsets = [])

are present in the source code.
specialized and more performant `ASCIISource` if no multibyte characters
be used instead of `new` and it will return either a `Source` or a
Create a new source object with the given source code. This method should
def self.for(source, start_line = 1, offsets = [])
  if source.ascii_only?
    ASCIISource.new(source, start_line, offsets)
  elsif source.encoding == Encoding::BINARY
    source.force_encoding(Encoding::UTF_8)
    if source.valid_encoding?
      new(source, start_line, offsets)
    else
      # This is an extremely niche use case where the file is marked as
      # binary, contains multi-byte characters, and those characters are not
      # valid UTF-8. In this case we'll mark it as binary and fall back to
      # treating everything as a single-byte character. This _may_ cause
      # problems when asking for code units, but it appears to be the
      # cleanest solution at the moment.
      source.force_encoding(Encoding::BINARY)
      ASCIISource.new(source, start_line, offsets)
    end
  else
    new(source, start_line, offsets)
  end
end

def character_column(byte_offset)

Return the column number in characters for the given byte offset.
def character_column(byte_offset)
  character_offset(byte_offset) - character_offset(line_start(byte_offset))
end

def character_offset(byte_offset)

Return the character offset for the given byte offset.
def character_offset(byte_offset)
  (source.byteslice(0, byte_offset) or raise).length
end

def code_units_cache(encoding)

unit offsets.
Generate a cache that targets a specific encoding for calculating code
def code_units_cache(encoding)
  CodeUnitsCache.new(source, encoding)
end

def code_units_column(byte_offset, encoding)

given byte offset.
Returns the column number in code units for the given encoding for the
def code_units_column(byte_offset, encoding)
  code_units_offset(byte_offset, encoding) - code_units_offset(line_start(byte_offset), encoding)
end

def code_units_offset(byte_offset, encoding)

character that has no equivalent in the given encoding.
boundary. Second, it's possible that the source code will contain a
possible that the given byte offset will not occur on a character
characters in this conversion. This happens for two reasons. First, it's
We purposefully replace invalid and undefined characters with replacement

encodings, it is not captured here.
concept of code units that differs from the number of characters in other
This method is tested with UTF-8, UTF-16, and UTF-32. If there is the

counting in code units for the given encoding.
Returns the offset from the start of the file for the given byte offset
def code_units_offset(byte_offset, encoding)
  byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
  if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
    byteslice.bytesize / 2
  else
    byteslice.length
  end
end

def column(byte_offset)

Return the column number for the given byte offset.
def column(byte_offset)
  byte_offset - line_start(byte_offset)
end

def encoding

parser or by the encoding magic comment.
Returns the encoding of the source code, which is set by parameters to the
def encoding
  source.encoding
end

def find_line(byte_offset)

byte offset.
Binary search through the offsets to find the line number for the given
def find_line(byte_offset)
  left = 0
  right = offsets.length - 1
  while left <= right
    mid = left + (right - left) / 2
    return mid if (offset = offsets[mid]) == byte_offset
    if offset < byte_offset
      left = mid + 1
    else
      right = mid - 1
    end
  end
  left - 1
end

def initialize(source, start_line = 1, offsets = [])

Create a new source object with the given source code.
def initialize(source, start_line = 1, offsets = [])
  @source = source
  @start_line = start_line # set after parsing is done
  @offsets = offsets # set after parsing is done
end

def line(byte_offset)

byte offset.
Binary search through the offsets to find the line number for the given
def line(byte_offset)
  start_line + find_line(byte_offset)
end

def line_end(byte_offset)

byte offset.
Returns the byte offset of the end of the line corresponding to the given
def line_end(byte_offset)
  offsets[find_line(byte_offset) + 1] || source.bytesize
end

def line_start(byte_offset)

byte offset.
Return the byte offset of the start of the line corresponding to the given
def line_start(byte_offset)
  offsets[find_line(byte_offset)]
end

def lines

Returns the lines of the source code as an array of strings.
def lines
  source.lines
end

def slice(byte_offset, length)

byte length.
Perform a byteslice on the source code using the given byte offset and
def slice(byte_offset, length)
  source.byteslice(byte_offset, length) or raise
end