小Demo转自csdn某作者,
本例子只作为测试,页面个数直接设置了100个,可以可能会少或者多,容易报错,更优化的一种方式是获取“下一页”按钮的地址,然后再访问,当访问不到“下一页”的内容时跳出
多线程只体现在文件提取,也可以在elements循环中再加一个多线程访问页面的
本案例需要jsoup包的支持,可到下方url下载
jsoup jar包
Test.java==============>主方法
st.main;import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.MalformedURLException;
import java.URL;
import java.URLConnection;
import java.util.ArrayList;
import java.util.List;
import urrent.ExecutorService;
import urrent.Executors;import org.jsoup.Jsoup;
import des.Document;
import des.Element;
import org.jsoup.select.Elements;public class Test {public static void main(String[] args) {ExecutorService executor = wFixedThreadPool(5);Document doc = null;FileWriter writer = null;String rui="index";List<String> alist = new ArrayList<String>();//int keyword = 4;for(int keyword=4;keyword<100;keyword++){try {//创建页面对象doc = t(""+keyword+".shtml").userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36").timeout(10000).get();//根据标签和class id获取元素Elements div = doc.select("div.lb_box");//根据标签获取元素Elements dl = div.select("dl");Elements dd = div.select("dd");Elements pages = dd.select("a");for(Element e : pages){System.out.());System.out.println(e.attr("href"));Document imgdoc = t(e.attr("href")).userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36").timeout(10000).get(); Elements scroll = imgdoc.select("div.effect_scroll");Elements li = scroll.select("li");Elements urls = li.select("a");int i=0;for(Element ipage : urls){Document imgpage = t(ipage.attr("href")).userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36").timeout(10000).get(); Elements imgediv = imgpage.select("div.l_effect_img_mid");Element img = imgediv.select("img").first();FileOutUtils fo =new FileOutUtils(img, e.text());fo.start();System.out.println(i);i++;}}} catch (IOException e) {e.printStackTrace();}}}/*** 单线程下载* @author wangshiyu * @param e* @param filepath*/public static void savefile(Element e,String filepath){String src=e.attr("src");//获取img中的src路径// System.out.println(src);//获取后缀名String imageName = src.substring(src.lastIndexOf("/") + 1,src.length());//连接urlURL url;System.out.println(src);try {url = new URL(src);URLConnection uri=url.openConnection();//获取数据流InputStream isInputStream();//写入数据流File file = new File("E://imgs//"+filepath);if(!ists()){file.mkdirs();}OutputStream os = new FileOutputStream(new File("E://imgs//"+filepath+"//", imageName)); byte[] buf = new byte[1024]; int l=0; while((lad(buf))!=-1){os.write(buf, 0, l);} } catch (MalformedURLException e1) {e1.printStackTrace();} catch (FileNotFoundException e1) {e1.printStackTrace();} catch (IOException e1) {e1.printStackTrace();}}
}
FileOutUtils.java ==============>多线程保存到本地
st.main;import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.MalformedURLException;
import java.URL;
import java.URLConnection;import des.Element;public class FileOutUtils extends Thread {private Element e;private String filepath;public FileOutUtils(Element e, String filepath) {this.e = e;this.filepath = filepath;}/*** 多线程下载* @author wangshiyu* @param e* @param filepath*/public void savefile(Element e,String filepath){String src=e.attr("src");//获取img中的src路径// System.out.println(src);//获取后缀名String imageName = src.substring(src.lastIndexOf("/") + 1,src.length());//连接urlURL url;try {url = new URL(src);URLConnection uri=url.openConnection();//获取数据流InputStream isInputStream();//写入数据流File file = new File("E://imgs//"+filepath);if(!ists()){file.mkdirs();}OutputStream os = new FileOutputStream(new File("E://imgs//"+filepath+"//", imageName)); byte[] buf = new byte[1024]; int l=0; while((lad(buf))!=-1){os.write(buf, 0, l);} } catch (MalformedURLException e1) {e1.printStackTrace();} catch (FileNotFoundException e1) {e1.printStackTrace();} catch (IOException e1) {e1.printStackTrace();}}public void run() {this.savefile(this.e,this.filepath);}
}
本文发布于:2024-01-31 01:46:52,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170663681124446.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |