lib/wco/scrape_wsj_capy.rb



# require 'capybara/rspec'
# require 'byebug'
require 'capybara/rails'

Capybara.register_driver :selenium_chrome do |app|
  # options = Selenium::WebDriver::Chrome::Options.new
  # options.add_argument '--remote-debugging-port=4444'
  # Capybara::Selenium::Driver.new(app,
  #   browser: :chrome,
  #   options: options )

  Capybara::Selenium::Driver.new app, browser: :chrome
end

Capybara.default_driver = :selenium_chrome
Capybara.default_max_wait_time = 100 # seconds
Capybara.server = :webrick

class Wco::ScrapeTest
  include Capybara::DSL

  def initialize

    visit 'https://www.wsj.com/'

    # all('h6').each_with_index do |h6, idx|
    (1...2).each do |idx|
      headline = find(:xpath, "(//h3)[#{idx}]").text
      # puts "+++ Title: #{title}"

      Wco::Headline.create!({
        name: headline,
        site: wsj,
        date: Time.now.to_date,
      })
      print '.'
    end

    # byebug
  end

end