# -*- coding:utf-8 -*- # 仅需修改这个地方{}rs/ 将jn换成你所在城市的拼写首字母小写 import requests from lxml import etree import time import random import csv import requests import jsonclass LianjiaSpider(object):def __init__(self):self.url = "={}&boardType=8&districtId=0&cateId={}&offset=0&limit=50"self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1"}def get_page(self, url,i,j):print(url)res = (url=url, headers=self.ding = "utf-8"html = # if(html == '{"totalSize":0,"saleBoardPoiList":[],"boardDigest":null}'):# html ={"totalSize":50,"saleBoardPoiList":[{"id":0,"name":"","weekSaleCount":"周销量 0","score":0,"avgPrice":0,"cateName":"","areaName":"","distance":"","rank":0,"frontImg":"/","oneSentence":"","saleBoardPoiGroup":null,"saleBoardPoiCoupon":{"icon":"/","content":""},"saleBoardPoiPay":null,"branchList":null}],"boardDigest":null}# print(i)# print(html)# results_temp = place('{"totalSize":50,"saleBoardDealList":', "").replace("}}]}", "")# results = results_temp + "}}]"# print(results)self.parse_page(html,i,j)# print(html)# print(i)def parse_page(self, html,i,j):print(i)results = html[35:-20]# print(len(results))print(results)# print(results.find("["))if (results.find("[") != 0):prefix = "["results = prefix + resultsprint(results)print(len(results))for list in json.loads(results):#print(list)id = list["id"]#print(id)name = list["name"]#print(name)weekSaleCount = list["weekSaleCount"]score = list["score"]avgPrice = list["avgPrice"]cateName = list["cateName"]areaName = list["areaName"]distance = list["distance"]rank = list["rank"]frontImg = list["frontImg"]oneSentence = list["oneSentence"]if (i == 20): cityId = "广州"if (i == 57): cityId = "武汉"if (i == 105): cityId = "哈尔滨"if (i == 66): cityId = "沈阳"if (i == 59): cityId = "成都"if (i == 55): cityId = "南京"if (i == 42): cityId = "西安"if (i == 116): cityId = "长春"if (i == 96): cityId = "济南"if (i == 50): cityId = "杭州"if (j == 1):catId = "火锅"elif (j == 2):catId = "自助餐"elif (j == 3):catId = "烧烤龙虾"elif (j == 4):catId = "地方菜"elif (j == 5):catId = "异国料理"elif (j == 6):catId = "小吃快餐"elif (j == 7):catId = "甜点饮品"elif (j == 8):catId = "蛋糕"with open('meituan.csv', 'a', newline='', encoding='utf-8')as f:write = csv.writer(f)write.writerow([ cityId,catId,id,name, weekSaleCount, score, avgPrice, cateName, areaName, distance, rank, frontImg,oneSentence])def main(self):#20广州、香港118,165白山,170鹤岗cityId_lists = [20,57,105,66,59,55,42,116,96,50]for i in cityId_lists: # 第二个实例# print(i)for j in range(1, 9):# print(j)time.sleep(random.randint(3, 5))url = self.url.format(i,j)# print(_page(url,i,j)# print(j)if __name__ == '__main__':start = time.time()spider = LianjiaSpider()spider.main()end = time.time()print("执行时间:%.2f" % (end - start))
本文发布于:2024-02-02 06:05:22,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170682512141854.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |