依赖:
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.10-FINAL</version></dependency><dependency><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId><version>4.5.3</version></dependency>
<!-- <dependency>-->
<!-- <groupId>org.jsoup</groupId>-->
<!-- <artifactId>jsoup</artifactId>-->
<!-- <version>1.11.3</version>-->
<!-- </dependency>--><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.10.2</version></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId><scope>provided</scope></dependency>
实体类: (这个实体类是可以自定义的)
import com.fasterxml.jackson.annotation.JsonFormat;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;import java.util.List;@Data
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class Product {private Long id;private Long number;private String price;private String url;private String title;private String img;private String dianP;private String createTime;private String updateTime;private List<String> color;//这里getset和构造方法我用注解了,可以自己换成getset和构造方法
}
dome方法:
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.ptilestm.Product;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.hods.CloseableHttpResponse;
import org.apache.hods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import des.Document;
import des.Element;
import org.jsoup.select.Elements;import java.util.ArrayList;
import java.util.List;public class ProductServiceImp {private final static String taobaoUrlSign = "taobao";private final static String tmallUrlSign = "tmall";private final static String jingdongUrlSign = "jd";private final static String TMALL_PRODUCT_DETAIL = ".htm?id=";private final static String TAOBAO_PRODUCT_DETAIL = ".htm?id=";private final static String JD_PRODUCT_DETAIL = "/";public static Product soupTmallDetailByid(String url) {try {//需要爬取商品信息的网站地址//截取idString id=url.split("&id=")[1];id=id.split("&")[0];// 动态模拟请求数据CloseableHttpClient httpclient = ateDefault();HttpGet httpGet = new HttpGet(url);// 模拟浏览器浏览(user-agent的值可以通过浏览器浏览,查看发出请求的头文件获取)httpGet.setHeader("user-agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36");CloseableHttpResponse response = ute(httpGet);// 获取响应状态码int statusCode = StatusLine().getStatusCode();HttpEntity entity = Entity();// 如果状态响应码为200,则获取html实体内容或者json文件if (statusCode ==200) {String html = String(entity, Consts.UTF_8);// 提取HTML得到商品信息结果</span>Document doc = null;// doc获取整个页面的所有数据</span>doc = Jsoup.parse(html);//输出doc可以看到所获取到的页面源代码</span>//System.out.println(doc);// 通过浏览器查看商品页面的源代码,找到信息所在的div标签,再对其进行一步一步地解析Element item = doc.select("div[class='tb-wrap']").get(0);//Elements liList = ulList.select("div[class='product']");//循环liList的数据(具体获取的数据值还得看doc的页面源代码来获取,可能稍有变动)</span>//System.out.println("item = " + item);Product product = new Product();//for (Element item : ulList) {// 商品IDtry {product.setNumber(Long.valueOf(id));String title = item.select("div[class='tb-detail-hd']").select("h1").text();product.setTitle(title);product.setUrl(url);//System.out.println("商品title:"+ title);//颜色List<String> color=new ArrayList<>();Element liList = ElementsByClass("tm-img-prop").get(0);Element liList1 = ElementsByClass("J_TSaleProp").get(0);Elements liLists = liList1.select("li");for (Element element:liLists){String c=element.select("li").attr("title");color.add(c);}product.setColor(color);//商品图片Element itemImage = doc.select("div[id='J_DetailMeta']").get(0);String img=itemImage.select("img[id='J_ImgBooth']").attr("src");product.setImg(img);//店铺名称Element itemDp = doc.select("div[id='headerCon']").get(0);String dianP = itemDp.select("strong").text();product.setDianP(dianP);//价格try {//因为商品价格走的是Ajax异步请求,这里只能从底部Ajax里面截取出来String htmlDocString();String htmlDoc1=htmlDoc.split("TShop.Setup\(")[1];String htmlDoc2=htmlDoc1.split("\)")[0];JSONObject stm=JSONObject.parseObject(htmlDoc2);JSONObject itemDOJSONObject("itemDO");product.String("reservePrice"));}catch (Exception e){product.setPrice("价格获取失败");}return product;}catch (Exception e) {product.setId(0L);product.setTitle("商品不存在");return product;}// }}}catch (Exception e){e.printStackTrace();}return null;}public static void main(String[] args) {System.out.println(".htm?spm=a220m.1000858.1000725.13.4ba27a9bSAEts0&id=627829903600&skuId=4448961788888&user_id=2997265729&cat_id=2&is_b=1&rn=12f94c881dda696dc3848c212911076a");}
}
本文发布于:2024-01-31 03:13:29,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170664201224934.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |