python基础爬虫

阅读: 评论:0

python基础爬虫

python基础爬虫

 基础爬虫,爬取代理池并进行测试


import requests
import re
from requests import adaptersheader = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
}
def ip_66(data_text1):global IPS_66IPZ_66 = repile('<td>(d+.d+.d+.d+)</td><td>(d+)</td>',re.S)IPS_66 = re.findall(IPZ_66,data_text1)return IPS_66
def port_p(data_text):global DKSDKZ = repile('<td data-title="PORT">(d+)</td>',re.S)DKS = re.findall(DKZ,data_text)return DKS
def ip_ip(data_text):global IPSIPZ = repile('<td data-title="IP">(d+.d+.d+.d+)</td>',re.S)IPS = re.findall(IPZ,data_text)return IPS
def pinjie():# 功能需求:实现两个IPS和DKS的拼接,结果是IPS:DSKip_z= []for i in range(0,len(IPS)):ip_z.append(IPS[i]+':'+DKS[i])# 输出结果  ['xxxx:xxxx']dl_write(ip_z)return ip_z
# def merge_list(list1,list2):
#      python 有个拉链函数, zip(list1,list2),可以实现合并功能
#     print(list(zip(list1,list2)))
def dl_write(ip_z):with open(&#','w',encoding='utf-8') as f:for DIP in ip_z:f.write(DIP+'n')test_corr()
def test_corr():corr=[]with open(&#','r',encoding='utf-8') as f:for i in f:try:# 设置重连次数adapters.DEFAULT_RETRIES = 3# IP = random.choice(IPAgents)proxy = f"{i}"# thisIP = "".join(IP.split(":")[0:1])# print(thisIP)res = (url="/", timeout=2, proxies={"http": proxy})if (res.status_code == 200):print("代理IP:"+i+"有效")corr.append(i)else:print("代理IP:"+i+"无效")except:print("错误")text_qualified(corr)return corr         
def text_qualified(corr):with open(&#','w',encoding='utf-8') as f:for i in corr:f.write(i)
def ipyc():with open(&#','r',encoding='utf-8') as f:for i in f:proxy = f"{i}"data = ('/',headers= header,proxies={"http": proxy})data_1&#('/',headers= header,proxies={"http": proxy})data_text1=data_text = if data.status_code == 200:port_p(data_text)ip_ip(data_text)pinjie()ip_66(data_text1)break
ipyc()

本文发布于:2024-01-30 13:37:38,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170659306220372.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:爬虫   基础   python
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23