class HTML::Selector

def simple_selector(statement, values, can_negate = true)

cannot be negated.
negation. Called a second time with false since negation
Called the first time with +can_negate+ true to allow

+pseudo+ (classes) and +negation+.
Returns a hash with the values +tag_name+, +attributes+,

substitution values.
Creates a simple selector given the statement and array of
def simple_selector(statement, values, can_negate = true)
  tag_name = nil
  attributes = []
  pseudo = []
  negation = []
  # Element name. (Note that in negation, this can come at
  # any order, but for simplicity we allow if only first).
  statement.sub!(/^(\*|[[:alpha:]][\w\-]*)/) do |match|
    match.strip!
    tag_name = match.downcase unless match == "*"
    @source << match
    "" # Remove
  end
  # Get identifier, class, attribute name, pseudo or negation.
  while true
    # Element identifier.
    next if statement.sub!(/^#(\?|[\w\-]+)/) do
      id = $1
      if id == "?"
        id = values.shift
      end
      @source << "##{id}"
      id = Regexp.new("^#{Regexp.escape(id.to_s)}$") unless id.is_a?(Regexp)
      attributes << ["id", id]
      "" # Remove
    end
    # Class name.
    next if statement.sub!(/^\.([\w\-]+)/) do
      class_name = $1
      @source << ".#{class_name}"
      class_name = Regexp.new("(^|\s)#{Regexp.escape(class_name)}($|\s)") unless class_name.is_a?(Regexp)
      attributes << ["class", class_name]
      "" # Remove
    end
    # Attribute value.
    next if statement.sub!(/^\[\s*([[:alpha:]][\w\-:]*)\s*((?:[~|^$*])?=)?\s*('[^']*'|"[^*]"|[^\]]*)\s*\]/) do
      name, equality, value = $1, $2, $3
      if value == "?"
        value = values.shift
      else
        # Handle single and double quotes.
        value.strip!
        if (value[0] == ?" || value[0] == ?') && value[0] == value[-1]
          value = value[1..-2]
        end
      end
      @source << "[#{name}#{equality}'#{value}']"
      attributes << [name.downcase.strip, attribute_match(equality, value)]
      "" # Remove
    end
    # Root element only.
    next if statement.sub!(/^:root/) do
      pseudo << lambda do |element|
        element.parent.nil? || !element.parent.tag?
      end
      @source << ":root"
      "" # Remove
    end
    # Nth-child including last and of-type.
    next if statement.sub!(/^:nth-(last-)?(child|of-type)\((odd|even|(\d+|\?)|(-?\d*|\?)?n([+\-]\d+|\?)?)\)/) do |match|
      reverse = $1 == "last-"
      of_type = $2 == "of-type"
      @source << ":nth-#{$1}#{$2}("
      case $3
        when "odd"
          pseudo << nth_child(2, 1, of_type, reverse)
          @source << "odd)"
        when "even"
          pseudo << nth_child(2, 2, of_type, reverse)
          @source << "even)"
        when /^(\d+|\?)$/  # b only
          b = ($1 == "?" ? values.shift : $1).to_i
          pseudo << nth_child(0, b, of_type, reverse)
          @source << "#{b})"
        when /^(-?\d*|\?)?n([+\-]\d+|\?)?$/
          a = ($1 == "?" ? values.shift :
               $1 == "" ? 1 : $1 == "-" ? -1 : $1).to_i
          b = ($2 == "?" ? values.shift : $2).to_i
          pseudo << nth_child(a, b, of_type, reverse)
          @source << (b >= 0 ? "#{a}n+#{b})" : "#{a}n#{b})")
        else
          raise ArgumentError, "Invalid nth-child #{match}"
      end
      "" # Remove
    end
    # First/last child (of type).
    next if statement.sub!(/^:(first|last)-(child|of-type)/) do
      reverse = $1 == "last"
      of_type = $2 == "of-type"
      pseudo << nth_child(0, 1, of_type, reverse)
      @source << ":#{$1}-#{$2}"
      "" # Remove
    end
    # Only child (of type).
    next if statement.sub!(/^:only-(child|of-type)/) do
      of_type = $1 == "of-type"
      pseudo << only_child(of_type)
      @source << ":only-#{$1}"
      "" # Remove
    end
    # Empty: no child elements or meaningful content (whitespaces
    # are ignored).
    next if statement.sub!(/^:empty/) do
      pseudo << lambda do |element|
        empty = true
        for child in element.children
          if child.tag? || !child.content.strip.empty?
            empty = false
            break
          end
        end
        empty
      end
      @source << ":empty"
      "" # Remove
    end
    # Content: match the text content of the element, stripping
    # leading and trailing spaces.
    next if statement.sub!(/^:content\(\s*(\?|'[^']*'|"[^"]*"|[^)]*)\s*\)/) do
      content = $1
      if content == "?"
        content = values.shift
      elsif (content[0] == ?" || content[0] == ?') && content[0] == content[-1]
        content = content[1..-2]
      end
      @source << ":content('#{content}')"
      content = Regexp.new("^#{Regexp.escape(content.to_s)}$") unless content.is_a?(Regexp)
      pseudo << lambda do |element|
        text = ""
        for child in element.children
          unless child.tag?
            text << child.content
          end
        end
        text.strip =~ content
      end
      "" # Remove
    end
    # Negation. Create another simple selector to handle it.
    if statement.sub!(/^:not\(\s*/, "")
      raise ArgumentError, "Double negatives are not missing feature" unless can_negate
      @source << ":not("
      negation << simple_selector(statement, values, false)
      raise ArgumentError, "Negation not closed" unless statement.sub!(/^\s*\)/, "")
      @source << ")"
      next
    end
    # No match: moving on.
    break
  end
  # Return hash. The keys are mapped to instance variables.
  {:tag_name=>tag_name, :attributes=>attributes, :pseudo=>pseudo, :negation=>negation}
end