python selenium下载失败

阅读: 评论:0

python selenium下载失败

python selenium下载失败

简易下载器的实现

支持代理、失败重试、确保包含指定ID元素(可根据需求自定义修改)

# coding: utf-8

from Utils import logging

from bs4 import BeautifulSoup as bs

from selenium import webdriver

from selenium.webdrivermon.by import By

from selenium.webdrivermon.proxy import ProxyType

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.support.ui import WebDriverWait

class HtmlDownloader:

def __init__(self):

self.driver = webdriver.PhantomJS()

def setProxy(self, proxyStr):

# 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId

proxy=webdriver.Proxy()

proxy.proxy_type=ProxyType.MANUAL

proxy.http_proxy=proxyStr

# 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中

proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)

self.driver.start_session(webdriver.DesiredCapabilities.PHANTOMJS)

def rmProxy(self):

# 还原为系统代理

proxy=webdriver.Proxy()

proxy.proxy_type=ProxyType.DIRECT

proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)

browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)

def download(self, returnType, url, ensureId, proxyStr = None):

if proxyStr:

self.setProxy(proxyStr)

else:

(url)

# special for xxx

# your code here

# ensure for some element

try:

WebDriverWait(self.driver, 30).until(EC.presence_of_element_located((By.ID, ensureId)))

if returnType == "html":

downloadResult = self.driver.page_source

elif returnType == "bs":

downloadResult = bs(self.driver.page_source, 'lxml')

logging("i", "download %s bytes" % len(self.driver.page_source))

return downloadResult

except Exception,e:

logging("e", str(e))

finally:

self.driver.close()

def safeDownload(self, returnType, url, ensureId, proxyStr = None):

downloadResult = None

failTimes = 0

while not downloadResult:

downloadResult = self.download(returnType, url, ensureId, proxyStr)

if not downloadResult:

failTimes += 1

if failTimes == 5:

logging("w", "failed %s times, will abort" % failTimes)

break

logging("w", "failed %s times, will retry" % failTimes)

return downloadResult

元素不可见导致不能操作的错误

# ElementNotVisibleException: Message: {"errorMessage":"Element is not currently visible and may not be manipulated"

# Screenshot: available via screen

首先尝试设定窗口大小

self.driver.set_window_size(1024, 768)

不行的话再尝试滚动页面,如滚动到底部:

本文发布于:2024-02-01 04:32:56,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170673317633881.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:python   selenium
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23