Selenium与PhantomJS的URLError(在Windows下有效,在Ubuntu16.04下失败)[英] Selenium with PhantomJS URLError( Works in Windows fails in Ubuntu16.04)

问题描述

我的抓取代码在我的本地计算机(Windows 8)中完美运行,但在 Digital Ocean droplet(Ubuntu 16.04)中失败..在第二个循环中总是失败,给出错误状态行或 URLError..已经花了几个小时谷歌搜索这个问题...

设置:

PhantomJS 2.1.1

硒 2.53.6

Python 2.7

class Elitebet:
    t1 = time.time()
    driver = webdriver.PhantomJS()

    def controller(self):
        self.driver.get("http://www.elitebetkenya.com/coupon.php?d")
        element = WebDriverWait(self.driver, 10).until(
        EC.presence_of_element_located((By.ID, "page")))    
        soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
        page_number = self.number_of_pages(soup)
        self.eliteparser(soup)
        for i in range(0, page_number - 10):
            page_click = self.driver.find_element_by_xpath("//input[@value='Next']")
            page_click.click() 
            element = WebDriverWait(self.driver, 10).until(
        EC.presence_of_element_located((By.ID, "page")))
            time.sleep(randint(1,2))
            soup = BeautifulSoup(self.driver.page_source.encode('utf-8'),"html.parser")
            self.eliteparser(soup)


        t2 = time.time() - self.t1
        print t2

    def number_of_pages(self, x):
        numbers = x.find("div", {"class" : "pgLnx"}).contents[2]
        return int(re.findall(r'\d+', numbers)[0])

    def eliteparser(self,x):
        tbody = x.find("tbody")
        # league level

        for i in tbody.findAll("tr", {"class": "league"}):
            league = i.get_text()
            handicap01_1, handicap01_draw, handicap01_2, handicap10_1, \
            handicap10_2, handicap10_draw, overfh15, underfh15, under25,\
            over25 = None,None, None, None, None, None, None, None, None, None
        # fixture level
            for each in i.find_next_siblings("tr"):
                if "league" in each.get("class", []):
                    break
                if "fixture" in each.get("class", []):
                    home = each.find("span", {"class" :"home uc"}).get_text(strip=True)
                    away = each.find("span", {"class":"away uc"}).get_text(strip=True)
                    fixture_time = each.br.get_text().strip()
                    # print "{} vs {}".format(home,away)
                    for foo in each.find_next_siblings("tr"):
                        if "fixture" in foo.get("class", []):
                            break
                        tds =  foo.findAll("td")                    
                        if tds[0].get_text().strip() == "Win-Draw-Win":
                            home_odds = tds[3].get_text()
                            draw_odds = tds[4].get_text()
                            away_odds = tds[5].get_text()
                        elif tds[0].text == "Handicap (0:1)":
                            handicap01_1 =  tds[3].get_text()
                            handicap01_draw =  tds[4].get_text()
                            handicap01_2 =  tds[5].get_text()
                        elif tds[0].text == "Double Chance":
                            oneordraw =  tds[3].get_text()
                            oneortwo =  tds[4].get_text()
                            drawortwo =  tds[5].get_text()
                        elif tds[0].text == "Asian (Draw No Bet)":
                            asian1 =  tds[3].get_text()
                            asian2 =  tds[4].get_text()
                        elif tds[0].text == "Goal Under/Over (2.5)":
                            under25 =  tds[3].get_text()
                            over25 =  tds[4].get_text()
                        elif tds[0].text == "1st Half Goal Under/Over (1.5)":
                            underfh15 =  tds[3].get_text()
                            overfh15 =  tds[4].get_text()
                        elif tds[0].text == "Goal - No Goal":
                            goal =  tds[3].get_text()
                            no_goal =  tds[4].get_text()
                        elif tds[0].text == "Odd - Even Goal":
                            odd =  tds[3].get_text()
                            even =  tds[4].get_text()
                        elif tds[0].text == "Handicap (1:0)":
                            handicap10_1 =  tds[3].get_text()
                            handicap10_draw =  tds[4].get_text()
                            handicap10_2 =  tds[5].get_text()   
                    print league
                    print "{} vs {}".format(home,away)



elite = Elitebet()

elite.controller()

错误信息如下:

  File "elitebet.py", line 147, in <module>
    elite.controller()
  File "elitebet.py", line 45, in controller
    page_click.click()
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 72, in click
    self._execute(Command.CLICK_ELEMENT)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webelement.py", line 461, in _execute
    return self._parent.execute(command, params)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/webdriver.py", line 234, in execute
    response = self.command_executor.execute(driver_command, params)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 401, in execute
    return self._request(command_info[0], url, body=data)
  File "/usr/local/lib/python2.7/dist-packages/selenium/webdriver/remote/remote_connection.py", line 471, in _request
    resp = opener.open(request, timeout=self._timeout)
  File "/usr/lib/python2.7/urllib2.py", line 429, in open
    response = self._open(req, data)
  File "/usr/lib/python2.7/urllib2.py", line 447, in _open
    '_open', req)
  File "/usr/lib/python2.7/urllib2.py", line 407, in _call_chain
    result = func(*args)
  File "/usr/lib/python2.7/urllib2.py", line 1228, in http_open
    return self.do_open(httplib.HTTPConnection, req)
  File "/usr/lib/python2.7/urllib2.py", line 1198, in do_open
    raise URLError(err)
urllib2.URLError: <urlopen error [Errno 111] Connection refused>

推荐答案

这可能是SSL错误导致的,我建议你使用这些命令行选项:

--ignore-ssl-errors=true --ssl-protocol=any --debug=true

你需要使用onResourceError 回调,找到出去,怎么回事.

本文地址:https://www.itbaoku.cn/post/1740098.html