学校里有要求每日收集相应就业信息内容
def create_excel(excel_data):localtime = time.localtime(time.time())name = _mon) + _mday) + college + ".xlsx"app = xw.App(visible=True, add_book=False)# 新建工作簿wb = app.books.add()# 存储excel# wb.save('example.xlsx')# 引用工作表sht = wb.sheets['sheet1']sht.range('A1').options(expand='table').value = excel_dataprint(sht.range('A1').value)wb.save(name)# 退出工作簿wb.close()# 推出excelapp.quit()returndef write_excel(excels):for excel in excels:for word in excel:print(word)return# 上海 软件工程
def get_html(job_pos, search):target_url = get_url(job_pos, 3, urllib.parse.quote(search))print(target_url)response = request.urlopen(target_url)html_doc = ad().decode('gbk')soup = BeautifulSoup(html_doc, 'html.parser')div_test = soup.find_all("script")# div_test = div_test.find_all("script")# print(div_test)ans = re.findall(r"window.__SEARCH_RESULT__s=s({.*})", div_test.__str__())ans = re.findall(r"job_href":"(https:[^"]*t=0)", ans[0])for an in ans:job_list.append(str(an).replace("\", ""))return job_list# 收集职业就业网页信息
def get_information(aim_websites):i = 1job_information_list.append(type_table)for website in aim_websites:job_information = []if i == 11:break# ".html?s=01&t=0"response = request.urlopen(website)html_doc = ad().decode('gbk')# print(html_doc)soup = BeautifulSoup(html_doc, 'html.parser')# 输出公司名字company_name = soup.find(class_="com_msg").p['title']# 输出公司类型company_type = soup.find(class_="com_tag").p['title']# 输出招聘信息cn = soup.find(class_="cn")job_name = cn.h1['title']stuff_info = soup.find(class_="cn").find(class_="msg ltype")['title']ans = stuff_info.split("|")# 生成相应内容detail = []aim_create.append(type_table)for an in ans:detail.append(str(an).replace(u'xa0', u''))print('--------------------' + str(i) + '----------------------')# 序号# print("序号:")job_information.append(i)i += 1# 类别# print("类别:")job_information.append(college)# 创建时间# print("创建时间:")localtime = time.localtime(time.time())name = _year) + '/' + _mon) + '/' + _mday)job_information.append(name)# 过期时间# print("过期时间:")job_information.append('')# 工作地点# print("工作地点:" + job_list[1])job_information.append(detail[0])# 公司名称# print("公司名称:" + company_name)job_information.append(company_name)# 链接地址# print("链接地址:")job_information.append(website)# 职务名称# print("职务名称:" + job_name)job_information.append(job_name)# 是否推荐job_information.append('')# 公司性质job_information.append(company_type)# 职务性质job_information.append('实习')# 教育背景# print("教育背景" + job_list[2])job_information.append(detail[2])# 信用良好# print("信用良好:" + '1')job_information.append('1')# 不良信用# print("不良信用")job_information.append('')# 500强# print("500强")job_information.append('')# 上市# print("上市")job_information.append('')# 200人以上# print("200人以上")job_information.append('1')# 200人以下# print("200人以下")job_information.append('')# 岗位数# print("岗位数:" + '1')job_information.append('1')# 需求人数# print("需求数:" + job_list[3])job_information.append(detail[3])job_information_list.append(py())return job_information_listdef get_pos():return# 生成搜索接口
def get_url(pos_name, num, search):postion = pos[pos_name]num = company_size[3]target = url + "/list/"+postion+",000000,0000,00,9,99,"+urllib.parse.quote(search)+",2,1.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize="+num+"&ord_field=0&dibiaoid=0&line=&welfare="return targetdef turn_page(source):ans_list = []for i in range(1, 10):ans = re.sub(r',(d*).html', "," + str(i) + ".html", source)ans_list.append(ans)return ans_listdef position(chars):string = ""for char in chars:string = char + ""return stringdef to_unicode(string):ret = ''for v in string:ret = ret + hex(ord(v)).upper().replace('0X', '\u')return ret
部分内容有误 需求人数/教育背景/发布时间有误,由于本身位置有误(有待改进)
本文发布于:2024-01-30 17:54:22,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170660846421788.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |