从我监测到他是通过ajax获取 json串的时候,剩下的就是时间了。。。。
1.python
urllib2获取数据 ,再json解析 , 结果解析各种无力吐槽
#coding=utf-8 import json;
import urllib2page=urllib2.urlopen(".json?source=1976748459").read()
jsonVal = json.loads(page)
for val in jsonVal: print val["created_at"] print val["text"]
2.C#
HttpWebRequest + Newtonsoft.Json
Json不是一般的好用啊,其中DELHTML是去掉HTML的标签
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json.Linq;
using System.Net;
using System.IO;
using Newtonsoft.Json;
namespace JSON
{class Program{static void Main(string[] args){FileStream fs = new FileStream("C:\",FileMode.Append); StreamWriter sw = new StreamWriter(fs, Encoding.Default);int x = 208306240;int cnt = 0;while (true){string url = ".php?id=28231&kind=all&lastId=" + x;HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(url);httpRequest.Timeout = 5000;httpRequest.Method = "GET";HttpWebResponse httpResponse = (HttpWebResponse)httpRequest.GetResponse();StreamReader sr = new StreamReader(httpResponse.GetResponseStream(), System.Text .Encoding.GetEncoding("gb2312"));string result = sr.ReadToEnd();result = result.Replace("r", "").Replace("n", "").Replace("t", "");int status = (int)httpResponse.StatusCode;sr.Close();//JArray jarray = new JArray(result);JArray ja = (JArray)JsonConvert.DeserializeObject(result);//jarray.ToArray();string bl = " ";for (int i = 0; i < ja.Count; i++){// Console.Write(ja[i]["id"].ToString()+bl);// Console.Write(ja[i]["time"].ToString()+bl);// Console.Write(DelHTML(ja[i]["title"].ToString()));//var jobj = JObject.Parse(ja[i]);// if (ja[i]Property("name") != null)// if (ja[i]["type"].ToString() != ""){/*Console.WriteLine(ja[i]["id"]);Console.Write(ja[i]["id"].ToString() + bl);Console.Write(ja[i]["time"].ToString() + bl);Console.Write(DelHTML(ja[i]["title"].ToString()));Console.WriteLine();* */}// else{Console.WriteLine(ja[i]["id"]);sw.Write(ja[i]["id"].ToString() + bl);sw.Write(ja[i]["time"].ToString() + bl);sw.Write(DelHTML(ja[i]["title"].ToString()));sw.WriteLine();//for(int j=0;j<ja[i].)//Console.WriteLine(ja[i]["id"].ToString()+ja[i]["time"].ToString()+ja[i]["photo"]+ja[i]["title"].ToString());}x = Math.Min(x, Convert.ToInt32(ja[i]["id"]));}if (ja.Count == 0) break;//cnt++;}sw.Close(); fs.Close();}public static string DelHTML(string Htmlstring)//将HTML去除{ #region//删除脚本Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<script[^>]*?>.*?</script>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);//删除HTMLHtmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<(.[^>]*)>","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"([/r/n])[/s]+","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"-->","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<!--.*","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);//Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<A>.*</A>","");//Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"<[a-zA-Z]*=/.[a-zA-Z]*/?[a-zA-Z]+=/d&/w=%[a-zA-Z]*|[A-Z0-9]","");Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(quot|#34);",""",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(amp|#38);","&",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(lt|#60);","<",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(gt|#62);",">",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(nbsp|#160);"," ",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(iexcl|#161);","/xa1",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring = System.Text.RegularExpressions.Regex.Replace(Htmlstring,@"&(cent|#162);","/xa2",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(pound|#163);","/xa3",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring,@"&(copy|#169);","/xa9",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring =System.Text.RegularExpressions. Regex.Replace(Htmlstring, @"&#(/d+);","",System.Text.RegularExpressions.RegexOptions.IgnoreCase);Htmlstring.Replace("<","");Htmlstring.Replace(">","");Htmlstring.Replace("/r/n","");//Htmlstring=HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();#endregionreturn Htmlstring;}}
}
3.NlpAnalysis
就剩这货来检验一发了
本文发布于:2024-01-29 11:41:50,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170649971215021.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |