1.所需jar包 jsoup-1.11.2.jar ;htmlunit2.2.9.jar
2.核心代码
public class HkStockSpiderUtil
{private final Logger logger = Logger(getClass());//url为调用地址public List<DzHkStockBusiInfo> getGgtTransNoticeFromSH(String url){int i = 0;List noticeList = new ArrayList();try{Document hkDoc = t(url).get();Elements elementTimes = ElementsByTag("span");String publicTimes = elementTimes.html();String[] noticeTimes = publicTimes.split("n");this.logger.info("noticeTimes:{}", JSONString(noticeTimes));Elements elements = ElementsByTag("a");for (Element element : elements){String linkUrl = element.attr("href");String tittle = ();String noticeDate = noticeTimes[i];DzHkStockBusiInfo dzHkStockBusiInfo = createStockOfSHModel(linkUrl, tittle, noticeDate, "1");noticeList.add(dzHkStockBusiInfo);i++;if (i > 4){break;}}this.logger.info("noticeList:{}", JSONString(noticeList));}catch (IOException e){e.printStackTrace();}return noticeList;}public List<DzHkStockBusiInfo> getGgtTransNoticeFromSZ(String url){int i = 0;List noticeList = new ArrayList();try{WebClient webClient = new WebClient(BrowserVersion.CHROME);WebClientOptions clientOptions = Options();clientOptions.setJavaScriptEnabled(true);clientOptions.setCssEnabled(false);clientOptions.setThrowExceptionOnScriptError(false);clientOptions.setTimeout(90000);HtmlPage htmlPage = (Page(url);webClient.waitForBackgroundJavaScript(5000L);String pageAsXml = htmlPage.asXml();Document document = Jsoup.parse(pageAsXml, url);Elements times = document.select("span.time");String publicTimes = times.html();String[] noticeTimes = publicTimes.split("n");this.logger.info("noticeTimes:{}", JSONString(noticeTimes));Elements elements = document.select("a.ellipsis.art-list-link");for (Element element : elements){String linkUrl = element.attr("href");String tittle = ();String noticeDate = noticeTimes[i];DzHkStockBusiInfo dzHkStockBusiInfo = createStockOfSHModel(linkUrl, tittle, noticeDate, "2");noticeList.add(dzHkStockBusiInfo);i++;if (i > 4){break;}}webClient.close();this.logger.info("noticeList:{}", JSONString(noticeList));}catch (IOException e){e.printStackTrace();}return noticeList;}//实体
public DzHkStockBusiInfo createStockOfSHModel(String linkUrl, String tittle, String noticeDate, String noticeType){DzHkStockBusiInfo busiInfo = new DzHkStockBusiInfo();if (!StringUtils.isEmpty(linkUrl)){String linkUrlTemp = "";if (noticeType.equals("1")){linkUrlTemp = "" + linkUrl;}if (noticeType.equals("2")){linkUrl = linkUrl.substring(1, linkUrl.length());linkUrlTemp = "" + linkUrl;}busiInfo.setLinkUrl(linkUrlTemp);}if (!StringUtils.isEmpty(tittle)){busiInfo.setTittle(tittle);}if (!StringUtils.isEmpty(noticeDate)){busiInfo.setNoticeDate(noticeDate);}if (!StringUtils.isEmpty(noticeType)){busiInfo.setNoticeType(noticeType);}return busiInfo;}
}
本文发布于:2024-02-02 04:40:08,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170682000741413.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |