# 导入模块
import requests
from lxml import etree
import time
# 导入mysql封装的class类
from mysql import MysqlHelper
# 实例化mysql类
mc = MysqlHelper()
def wujia(url,headers):# 定义要爬取的页数for i in range(1,21):fullurl = url.format(i)response = (fullurl,headers=headers)html = html = etree.HTML(html)ul_list = html.xpath('//ul[@class="pList"]/li')# print(ul_list)for i in ul_list:# print(i)# 获取地址dizhi = i.xpath('./div[2]/div/p[2]/text()')# print(dizhi)if dizhi:dizhi = dizhi[0].replace('·','').strip()print(dizhi)else:print('无地址')#获取租房介绍room_data = i.xpath('./div[2]/div[1]/p[1]/text()')[0].replace(' ','').replace('·','')print(room_data)# 获取价钱/月money = i.xpath('./div[2]/div[1]/div//strong/text()')[0]money = int(money)print(money)
#写sql语句,存入数据库sql = 'insert into 5i5j(dizhi,room_data,money) values ("%s","%s","%s")'data = (dizhi,room_data,money)#调用mysql类中的执行方法mc.execute_modify_sql(sql,data)
#设置休息时间
# time.sleep(3)
#执行函数和传参
if __name__ == '__main__':url = '{}/'headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',# 'Accept-Encoding': 'gzip, deflate, br',# 'Accept-Language': 'zh-CN,zh;q=0.9','Cache-Control': 'max-age=0','Connection': 'keep-alive','Cookie': '_Jo0OQK=1349C4397FAF634DC28494B7372F7EE6A6BAFA7626819FB7027758023F7044ABBF04A8994B1C24AC6546C9F8AD8FBB6099D9577A4343988ABD03B1C034CCCF5A512DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1IQ==; _ga=GA1.2.1741003594.1534567087; _gid=GA1.2.429105984.1534567087; yfx_c_g_u_id_10000001=_ck18081812380714557549321883736; PHPSESSID=a1eros861f9teh5n4r6lv6f30j; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534567088,1534572065; zufang_BROWSES=41290006; domain=bj; yfx_f_l_v_t_10000001=f_t_1534567087448__r_t_1534567087448__v_t_1534591789846__r_c_0; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534591798','Host': 'bj.5i5j','Referer': '/','Upgrade-Insecure-Requests': '1','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',}wujia(url,headers)
本文发布于:2024-02-01 17:28:26,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170678086138277.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |