没有逐页爬取,想多爬的自己改下,我上个博客里面有例子。
直接上代码吧:
# -*- coding:utf-8 -*-
import re
import requestsclass Baidutupian(object):def __init__(self, Leixing_name):# self.headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36"}self.base_url = "/"self.Leixing_name = str(Leixing_name)def send_request(self, url, params ={}):try:html = (url, params = params, headers = self.headers).contentreturn htmlexcept Exception, err:print errdef load_page(self, html):pattern = '"objURL":"(.*?)",'pic_list = re.findall(pattern, html, re.S)for link in pic_list:print(link)data = self.send_request(link)self.write_image(data, link[-10:])def write_image(self, data, filename):print "[INFO]: 正在下载%s..." % filenamewith open(u"D:图片哈哈lpl" + filename, "wb") as f:f.write(data)def start_work(self):html = self.send_request(self.base_url + "/search/flip?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1" +"&fm=result&fr=&sf=1&fmq=1543200380146_R&pv=&ic=0&nc=1&z=&se=1&" +"showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ctd=154" +"3200380146%5E00_1291X734&word=" + self.Leixing_name)self.load_page(html)if __name__ == "__main__":Leixing_name = raw_input("请输入需要爬取的图片类型:")Tupian = Baidutupian(Leixing_name)Tupian.start_work()
也不知道爬点啥!!所以就又爬美女了!!!嘻嘻
本文发布于:2024-02-05 09:18:01,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170728589165235.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |