class SyntaxTree::YARV::SeaOfNodes::Compiler
into a sea of nodes.
The compiler is responsible for taking a data flow graph and turning it
def cleanup_insn_nodes
def cleanup_insn_nodes nodes.dup.each do |node| next unless node.is_a?(InsnNode) case node.insn when AdjustStack # If there are any inputs to the adjust stack that are immediately # discarded, we can remove them from the input list. number = node.insn.number node.inputs.dup.each do |input_edge| next if input_edge.type != :data from = input_edge.from next unless from.is_a?(InsnNode) if from.inputs.empty? && from.outputs.size == 1 number -= 1 remove(input_edge.from) elsif from.insn.is_a?(Dup) number -= 1 connect_over(from) remove(from) new_edge = node.inputs.last new_edge.from.outputs.delete(new_edge) node.inputs.delete(new_edge) end end if number == 0 connect_over(node) remove(node) else next_node = if number == 1 InsnNode.new(Pop.new, node.offset) else InsnNode.new(AdjustStack.new(number), node.offset) end next_node.inputs.concat(node.inputs) next_node.outputs.concat(node.outputs) # Dynamically finding the index of the node in the nodes array # because we're mutating the array as we go. nodes[nodes.index(node)] = next_node end when Jump # When you have a jump instruction that only has one input and one # output, you can just connect over top of it and remove it. if node.inputs.size == 1 && node.outputs.size == 1 connect_over(node) remove(node) end when Pop from = node.inputs.find { |edge| edge.type == :data }.from next unless from.is_a?(InsnNode) removed = if from.inputs.empty? && from.outputs.size == 1 remove(from) true elsif from.insn.is_a?(Dup) connect_over(from) remove(from) new_edge = node.inputs.last new_edge.from.outputs.delete(new_edge) node.inputs.delete(new_edge) true else false end if removed connect_over(node) remove(node) end end end end
def cleanup_phi_nodes
some mess we left. Ideally we wouldn't create these problems in the
We don't always build things in an optimal way. Go back and fix up
def cleanup_phi_nodes nodes.dup.each do |node| # dup because we're mutating next unless node.is_a?(PhiNode) if node.inputs.size == 1 # Remove phi nodes with a single input. connect_over(node) remove(node) elsif node.inputs.map(&:from).uniq.size == 1 # Remove phi nodes where all inputs are the same. producer_edge = node.inputs.first consumer_edge = node.outputs.find { |e| !e.to.is_a?(MergeNode) } connect( producer_edge.from, consumer_edge.to, :data, consumer_edge.label ) remove(node) end end end
def compile
def compile local_graphs = {} dfg.blocks.each do |block| local_graphs[block.id] = create_local_graph(block) end connect_local_graphs_control(local_graphs) connect_local_graphs_data(local_graphs) cleanup_phi_nodes cleanup_insn_nodes SeaOfNodes.new(dfg, nodes, local_graphs).tap(&:verify) end
def connect(from, to, type, label = nil)
def connect(from, to, type, label = nil) raise if from == to raise if !to.is_a?(PhiNode) && type == :data && label.nil? edge = Edge.new(from, to, type, label) from.outputs << edge to.inputs << edge end
def connect_local_graphs_control(local_graphs)
def connect_local_graphs_control(local_graphs) dfg.blocks.each do |predecessor| predecessor_last = local_graphs[predecessor.id].last_fixed predecessor.outgoing_blocks.each_with_index do |successor, index| label = if index > 0 && index == (predecessor.outgoing_blocks.length - 1) # If there are multiple outgoing blocks from this block, then # the last one is a fallthrough. Otherwise it's a branch. :fallthrough else :"branch#{index}" end connect( predecessor_last, local_graphs[successor.id].first_fixed, :control, label ) end end end
def connect_local_graphs_data(local_graphs)
def connect_local_graphs_data(local_graphs) dfg.blocks.each do |predecessor| arg_outs = local_graphs[predecessor.id].outputs.values arg_outs.each_with_index do |arg_out, arg_n| predecessor.outgoing_blocks.each do |successor| successor_graph = local_graphs[successor.id] arg_in = successor_graph.inputs.values[arg_n] # We're connecting to a phi node, so we may need a special # label. raise unless arg_in.is_a?(PhiNode) label = case arg_out when InsnNode # Instructions that go into a phi node are labelled by the # offset of last instruction in the block that executed # them. This way you know which value to use for the phi, # based on the last instruction you executed. dfg.blocks.find do |block| block_start = block.block_start block_end = block_start + block.insns.sum(&:length) - block.insns.last.length if (block_start..block_end).cover?(arg_out.offset) break block_end end end when PhiNode # Phi nodes to phi nodes are not labelled. else raise end connect(arg_out, arg_in, :data, label) end end end end
def connect_over(node)
def connect_over(node) node.inputs.each do |producer_edge| node.outputs.each do |consumer_edge| connect( producer_edge.from, consumer_edge.to, producer_edge.type, producer_edge.label ) end end end
def create_local_graph(block)
Create a sub-graph for a single basic block - block block argument
def create_local_graph(block) block_flow = dfg.block_flows.fetch(block.id) # A map of instructions to nodes. insn_nodes = {} # Create a node for each instruction in the block. block.each_with_length do |insn, offset| node = InsnNode.new(insn, offset) insn_nodes[offset] = node nodes << node end # The first and last node in the sub-graph, and the last fixed node. previous_fixed = nil first_fixed = nil last_fixed = nil # The merge node for the phi nodes to attach to. merge_node = nil # If there is more than one predecessor and we have basic block # arguments coming in, then we need a merge node for the phi nodes to # attach to. if block.incoming_blocks.size > 1 && !block_flow.in.empty? merge_node = MergeNode.new(id_counter) nodes << merge_node previous_fixed = merge_node first_fixed = merge_node last_fixed = merge_node end # Connect local control flow (only nodes with side effects.) block.each_with_length do |insn, length| if insn.side_effects? insn_node = insn_nodes[length] connect previous_fixed, insn_node, :control if previous_fixed previous_fixed = insn_node first_fixed ||= insn_node last_fixed = insn_node end end # Connect basic block arguments. inputs = {} outputs = {} block_flow.in.each do |arg| # Each basic block argument gets a phi node. Even if there's only # one predecessor! We'll tidy this up later. phi = PhiNode.new(id_counter) connect(phi, merge_node, :info) if merge_node nodes << phi inputs[arg] = phi block.each_with_length do |_, consumer_offset| consumer_flow = dfg.insn_flows[consumer_offset] consumer_flow.in.each_with_index do |producer, input_index| if producer == arg connect(phi, insn_nodes[consumer_offset], :data, input_index) end end end block_flow.out.each { |out| outputs[out] = phi if out == arg } end # Connect local dataflow from consumers back to producers. block.each_with_length do |_, consumer_offset| consumer_flow = dfg.insn_flows.fetch(consumer_offset) consumer_flow.in.each_with_index do |producer, input_index| if producer.local? connect( insn_nodes[producer.length], insn_nodes[consumer_offset], :data, input_index ) end end end # Connect dataflow from producers that leaves the block. block.each_with_length do |_, producer_pc| dfg .insn_flows .fetch(producer_pc) .out .each do |consumer| unless consumer.local? # This is an argument to the successor block - not to an # instruction here. outputs[consumer.name] = insn_nodes[producer_pc] end end end # A graph with only side-effect free instructions will currently have # no fixed nodes! In that case just use the first instruction's node # for both first and last. But it's a bug that it'll appear in the # control flow path! SubGraph.new( first_fixed || insn_nodes[block.block_start], last_fixed || insn_nodes[block.block_start], inputs, outputs ) end
def id_counter
def id_counter @id_counter += 1 end
def initialize(dfg)
def initialize(dfg) @dfg = dfg @nodes = [] # We need to put a unique ID on the synthetic nodes in the graph, so # we keep a counter that we increment any time we create a new # synthetic node. @id_counter = 999 end
def remove(node)
def remove(node) node.inputs.each do |producer_edge| producer_edge.from.outputs.reject! { |edge| edge.to == node } end node.outputs.each do |consumer_edge| consumer_edge.to.inputs.reject! { |edge| edge.from == node } end nodes.delete(node) end