Make Google Groups scraper work with latest chromedriver

This commit is contained in:
Gerhard Schlager 2019-03-25 16:10:37 +01:00
parent cc2bac86e9
commit 453ba2da7b

@ -7,13 +7,13 @@ gemfile(true) do
gem "net-http-persistent" gem "net-http-persistent"
gem "nokogiri" gem "nokogiri"
gem "selenium-webdriver" gem "webdrivers"
end end
require "fileutils" require "fileutils"
require "nokogiri" require "nokogiri"
require "optparse" require "optparse"
require "selenium-webdriver" require "webdrivers"
require 'selenium/webdriver/remote/http/persistent' require 'selenium/webdriver/remote/http/persistent'
require "set" require "set"
require "yaml" require "yaml"
@ -23,7 +23,7 @@ DEFAULT_OUTPUT_PATH = "/shared/import/data"
def driver def driver
@driver ||= begin @driver ||= begin
chrome_args = ["headless", "disable-gpu"] chrome_args = ["headless", "disable-gpu"]
chrome_args << "no-sandbox" << "disable-dev-shm-usage" if inside_container? chrome_args << "no-sandbox" if inside_container?
options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args) options = Selenium::WebDriver::Chrome::Options.new(args: chrome_args)
http_client = Selenium::WebDriver::Remote::Http::Persistent.new http_client = Selenium::WebDriver::Remote::Http::Persistent.new
Selenium::WebDriver.for(:chrome, options: options, http_client: http_client) Selenium::WebDriver.for(:chrome, options: options, http_client: http_client)
@ -203,9 +203,8 @@ rescue Selenium::WebDriver::Error::TimeOutError
nil nil
end end
def exit_with_error(message) def exit_with_error(*messages)
puts driver.current_url STDERR.puts messages
STDERR.puts message
exit 1 exit 1
end end
@ -248,16 +247,14 @@ def parse_arguments
begin begin
parser.parse! parser.parse!
rescue OptionParser::ParseError => e rescue OptionParser::ParseError => e
STDERR.puts e.message, "", parser exit_with_error(e.message, "", parser)
exit 1
end end
mandatory = [:email, :password, :groupname] mandatory = [:email, :password, :groupname]
missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? } missing = mandatory.select { |name| instance_variable_get("@#{name}").nil? }
if missing.any? if missing.any?
STDERR.puts "Missing arguments: #{missing.join(', ')}", "", parser exit_with_error("Missing arguments: #{missing.join(', ')}", "", parser)
exit 1
end end
@path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil? @path = File.join(DEFAULT_OUTPUT_PATH, @groupname) if @path.nil?