基础爬虫,爬取代理池并进行测试
import requests
import re
from requests import adaptersheader = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
}
def ip_66(data_text1):global IPS_66IPZ_66 = repile('<td>(d+.d+.d+.d+)</td><td>(d+)</td>',re.S)IPS_66 = re.findall(IPZ_66,data_text1)return IPS_66
def port_p(data_text):global DKSDKZ = repile('<td data-title="PORT">(d+)</td>',re.S)DKS = re.findall(DKZ,data_text)return DKS
def ip_ip(data_text):global IPSIPZ = repile('<td data-title="IP">(d+.d+.d+.d+)</td>',re.S)IPS = re.findall(IPZ,data_text)return IPS
def pinjie():# 功能需求:实现两个IPS和DKS的拼接,结果是IPS:DSKip_z= []for i in range(0,len(IPS)):ip_z.append(IPS[i]+':'+DKS[i])# 输出结果 ['xxxx:xxxx']dl_write(ip_z)return ip_z
# def merge_list(list1,list2):
# python 有个拉链函数, zip(list1,list2),可以实现合并功能
# print(list(zip(list1,list2)))
def dl_write(ip_z):with open(','w',encoding='utf-8') as f:for DIP in ip_z:f.write(DIP+'n')test_corr()
def test_corr():corr=[]with open(','r',encoding='utf-8') as f:for i in f:try:# 设置重连次数adapters.DEFAULT_RETRIES = 3# IP = random.choice(IPAgents)proxy = f"{i}"# thisIP = "".join(IP.split(":")[0:1])# print(thisIP)res = (url="/", timeout=2, proxies={"http": proxy})if (res.status_code == 200):print("代理IP:"+i+"有效")corr.append(i)else:print("代理IP:"+i+"无效")except:print("错误")text_qualified(corr)return corr
def text_qualified(corr):with open(','w',encoding='utf-8') as f:for i in corr:f.write(i)
def ipyc():with open(','r',encoding='utf-8') as f:for i in f:proxy = f"{i}"data = ('/',headers= header,proxies={"http": proxy})data_1('/',headers= header,proxies={"http": proxy})data_text1=data_text = if data.status_code == 200:port_p(data_text)ip_ip(data_text)pinjie()ip_66(data_text1)break
ipyc()
本文发布于:2024-01-30 13:37:38,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170659306220372.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |