实战:爬取音乐网站(Requests)
import re # python 的正则库
import requests # python 的requests库
import time# page=int(input("请输入您要爬取的页数:"))songID=[]
songName=[]page_size = int(input("请问要爬取第几页呢:"))for i in range(0,page_size):url="="+str(i)+"&pageSize=20"#$url = ""#构造请求头信息
header = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=0",
"Connection": "keep-alive",
#"Cookie":" __cfduid=d54ff2470d42e999d942b3b64cc266a9f1594821751; BAIDU_SSP_lcr==zPqjAbMqG9O52ECGWPxIoo5nIDVDpw6DO0i0JHZqbgy&wd=&eqid=e3b52e830007c46a000000035f0f0c70; blk=0; Hm_lvt_74e11efe27096f6ef1745cd53f168168=1594821752; isPlay=0; jploop=false; Hm_lpvt_74e11efe27096f6ef1745cd53f168168=1594828497",
"Host": "www.htqyy",
"Referer": "",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36"
}#获取音乐榜单的网页信息
html(url,headers = header)strrpat1=r'title="(.*?)" sid'
pat2=r'sid="(.*?)"'idlist=re.findall(pat2,strr)
titlelist=re.findall(pat1,d(idlist)
d(titlelist)for i in range(0,len(songID)):songurl="/"+str(songID[i])+"/mp3/7"songname=songName[i]data(songurl).contentprint("正在下载第",i+1,"首,""歌曲名为:",songName[i])with open("/Users/yuanshuai/Downloads/music/{}.mp3".format(songname),"wb") as f:f.write(data)time.sleep(0.5)
本文发布于:2024-02-02 07:44:52,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170683109342359.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |