class SyntaxTree::YARV::SeaOfNodes::Compiler

into a sea of nodes.
The compiler is responsible for taking a data flow graph and turning it

def cleanup_insn_nodes

Eliminate as many unnecessary nodes as we can.
def cleanup_insn_nodes
  nodes.dup.each do |node|
    next unless node.is_a?(InsnNode)
    case node.insn
    when AdjustStack
      # If there are any inputs to the adjust stack that are immediately
      # discarded, we can remove them from the input list.
      number = node.insn.number
      node.inputs.dup.each do |input_edge|
        next if input_edge.type != :data
        from = input_edge.from
        next unless from.is_a?(InsnNode)
        if from.inputs.empty? && from.outputs.size == 1
          number -= 1
          remove(input_edge.from)
        elsif from.insn.is_a?(Dup)
          number -= 1
          connect_over(from)
          remove(from)
          new_edge = node.inputs.last
          new_edge.from.outputs.delete(new_edge)
          node.inputs.delete(new_edge)
        end
      end
      if number == 0
        connect_over(node)
        remove(node)
      else
        next_node =
          if number == 1
            InsnNode.new(Pop.new, node.offset)
          else
            InsnNode.new(AdjustStack.new(number), node.offset)
          end
        next_node.inputs.concat(node.inputs)
        next_node.outputs.concat(node.outputs)
        # Dynamically finding the index of the node in the nodes array
        # because we're mutating the array as we go.
        nodes[nodes.index(node)] = next_node
      end
    when Jump
      # When you have a jump instruction that only has one input and one
      # output, you can just connect over top of it and remove it.
      if node.inputs.size == 1 && node.outputs.size == 1
        connect_over(node)
        remove(node)
      end
    when Pop
      from = node.inputs.find { |edge| edge.type == :data }.from
      next unless from.is_a?(InsnNode)
      removed =
        if from.inputs.empty? && from.outputs.size == 1
          remove(from)
          true
        elsif from.insn.is_a?(Dup)
          connect_over(from)
          remove(from)
          new_edge = node.inputs.last
          new_edge.from.outputs.delete(new_edge)
          node.inputs.delete(new_edge)
          true
        else
          false
        end
      if removed
        connect_over(node)
        remove(node)
      end
    end
  end
end

def cleanup_phi_nodes

first place.
some mess we left. Ideally we wouldn't create these problems in the
We don't always build things in an optimal way. Go back and fix up
def cleanup_phi_nodes
  nodes.dup.each do |node| # dup because we're mutating
    next unless node.is_a?(PhiNode)
    if node.inputs.size == 1
      # Remove phi nodes with a single input.
      connect_over(node)
      remove(node)
    elsif node.inputs.map(&:from).uniq.size == 1
      # Remove phi nodes where all inputs are the same.
      producer_edge = node.inputs.first
      consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) }
      connect(
        producer_edge.from,
        consumer_edge.to,
        :data,
        consumer_edge.label
      )
      remove(node)
    end
  end
end

def compile

def compile
  local_graphs = {}
  dfg.blocks.each do |block|
    local_graphs[block.id] = create_local_graph(block)
  end
  connect_local_graphs_control(local_graphs)
  connect_local_graphs_data(local_graphs)
  cleanup_phi_nodes
  cleanup_insn_nodes
  SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify)
end

def connect(from, to, type, label = nil)

Connect one node to another.
def connect(from, to, type, label = nil)
  raise if from == to
  raise if !to.is_a?(PhiNode) && type == :data && label.nil?
  edge = Edge.new(from, to, type, label)
  from.outputs << edge
  to.inputs << edge
end

def connect_local_graphs_control(local_graphs)

Connect control flow that flows between basic blocks.
def connect_local_graphs_control(local_graphs)
  dfg.blocks.each do |predecessor|
    predecessor_last = local_graphs[predecessor.id].last_fixed
    predecessor.outgoing_blocks.each_with_index do |successor, index|
      label =
        if index > 0 &&
             index == (predecessor.outgoing_blocks.length - 1)
          # If there are multiple outgoing blocks from this block, then
          # the last one is a fallthrough. Otherwise it's a branch.
          :fallthrough
        else
          :"branch#{index}"
        end
      connect(
        predecessor_last,
        local_graphs[successor.id].first_fixed,
        :control,
        label
      )
    end
  end
end

def connect_local_graphs_data(local_graphs)

Connect data flow that flows between basic blocks.
def connect_local_graphs_data(local_graphs)
  dfg.blocks.each do |predecessor|
    arg_outs = local_graphs[predecessor.id].outputs.values
    arg_outs.each_with_index do |arg_out, arg_n|
      predecessor.outgoing_blocks.each do |successor|
        successor_graph = local_graphs[successor.id]
        arg_in = successor_graph.inputs.values[arg_n]
        # We're connecting to a phi node, so we may need a special
        # label.
        raise unless arg_in.is_a?(PhiNode)
        label =
          case arg_out
          when InsnNode
            # Instructions that go into a phi node are labelled by the
            # offset of last instruction in the block that executed
            # them. This way you know which value to use for the phi,
            # based on the last instruction you executed.
            dfg.blocks.find do |block|
              block_start = block.block_start
              block_end =
                block_start + block.insns.sum(&:length) -
                  block.insns.last.length
              if (block_start..block_end).cover?(arg_out.offset)
                break block_end
              end
            end
          when PhiNode
            # Phi nodes to phi nodes are not labelled.
          else
            raise
          end
        connect(arg_out, arg_in, :data, label)
      end
    end
  end
end

def connect_over(node)

Connect all of the inputs to all of the outputs of a node.
def connect_over(node)
  node.inputs.each do |producer_edge|
    node.outputs.each do |consumer_edge|
      connect(
        producer_edge.from,
        consumer_edge.to,
        producer_edge.type,
        producer_edge.label
      )
    end
  end
end

def create_local_graph(block)

inputs and outputs will be left dangling, to be connected later.
Create a sub-graph for a single basic block - block block argument
def create_local_graph(block)
  block_flow = dfg.block_flows.fetch(block.id)
  # A map of instructions to nodes.
  insn_nodes = {}
  # Create a node for each instruction in the block.
  block.each_with_length do |insn, offset|
    node = InsnNode.new(insn, offset)
    insn_nodes[offset] = node
    nodes << node
  end
  # The first and last node in the sub-graph, and the last fixed node.
  previous_fixed = nil
  first_fixed = nil
  last_fixed = nil
  # The merge node for the phi nodes to attach to.
  merge_node = nil
  # If there is more than one predecessor and we have basic block
  # arguments coming in, then we need a merge node for the phi nodes to
  # attach to.
  if block.incoming_blocks.size > 1 && !block_flow.in.empty?
    merge_node = MergeNode.new(id_counter)
    nodes << merge_node
    previous_fixed = merge_node
    first_fixed = merge_node
    last_fixed = merge_node
  end
  # Connect local control flow (only nodes with side effects.)
  block.each_with_length do |insn, length|
    if insn.side_effects?
      insn_node = insn_nodes[length]
      connect previous_fixed, insn_node, :control if previous_fixed
      previous_fixed = insn_node
      first_fixed ||= insn_node
      last_fixed = insn_node
    end
  end
  # Connect basic block arguments.
  inputs = {}
  outputs = {}
  block_flow.in.each do |arg|
    # Each basic block argument gets a phi node. Even if there's only
    # one predecessor! We'll tidy this up later.
    phi = PhiNode.new(id_counter)
    connect(phi, merge_node, :info) if merge_node
    nodes << phi
    inputs[arg] = phi
    block.each_with_length do |_, consumer_offset|
      consumer_flow = dfg.insn_flows[consumer_offset]
      consumer_flow.in.each_with_index do |producer, input_index|
        if producer == arg
          connect(phi, insn_nodes[consumer_offset], :data, input_index)
        end
      end
    end
    block_flow.out.each { |out| outputs[out] = phi if out == arg }
  end
  # Connect local dataflow from consumers back to producers.
  block.each_with_length do |_, consumer_offset|
    consumer_flow = dfg.insn_flows.fetch(consumer_offset)
    consumer_flow.in.each_with_index do |producer, input_index|
      if producer.local?
        connect(
          insn_nodes[producer.length],
          insn_nodes[consumer_offset],
          :data,
          input_index
        )
      end
    end
  end
  # Connect dataflow from producers that leaves the block.
  block.each_with_length do |_, producer_pc|
    dfg
      .insn_flows
      .fetch(producer_pc)
      .out
      .each do |consumer|
        unless consumer.local?
          # This is an argument to the successor block - not to an
          # instruction here.
          outputs[consumer.name] = insn_nodes[producer_pc]
        end
      end
  end
  # A graph with only side-effect free instructions will currently have
  # no fixed nodes! In that case just use the first instruction's node
  # for both first and last. But it's a bug that it'll appear in the
  # control flow path!
  SubGraph.new(
    first_fixed || insn_nodes[block.block_start],
    last_fixed || insn_nodes[block.block_start],
    inputs,
    outputs
  )
end

def id_counter

Counter for synthetic nodes.
def id_counter
  @id_counter += 1
end

def initialize(dfg)

def initialize(dfg)
  @dfg = dfg
  @nodes = []
  # We need to put a unique ID on the synthetic nodes in the graph, so
  # we keep a counter that we increment any time we create a new
  # synthetic node.
  @id_counter = 999
end

def remove(node)

Remove a node from the graph.
def remove(node)
  node.inputs.each do |producer_edge|
    producer_edge.from.outputs.reject! { |edge| edge.to == node }
  end
  node.outputs.each do |consumer_edge|
    consumer_edge.to.inputs.reject! { |edge| edge.from == node }
  end
  nodes.delete(node)
end