yixingzhou

//最近对网页爬虫感兴趣。下面是下载百度图片中图片

public static void getPicture(){

WebClient webClient=new WebClient();
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.waitForBackgroundJavaScript(600*1000);

String url = "http://image.baidu.com/";

HtmlPage page=webClient.getPage(url);


sop("get page success...");

final HtmlForm form = page.getFormByName("f1");

final HtmlTextInput textField = form.getInputByName("word");

textField.setValueAttribute("范冰冰1");

List list = page.getByXPath("//form/input[@type=\"submit\"]");

HtmlSubmitInput go = (HtmlSubmitInput)list.get(0);
HtmlPage p =(HtmlPage)go.click();
webClient.waitForBackgroundJavaScript(3*1000);

//println "p=="+p

//List imgList = p.getByXPath("//div[@class=\'list\']/div/div[@class=\'imgshadow\']");
List imgList = p.getByXPath("//div[@id=\'imgid\']/div[@class=\'imgpage\']");

//println "imgList"+imgList
HtmlDivision imgDiv = null;
HtmlAnchor link = null;
HtmlElement element = null;
String str=null;
int begin=0;
int end = 0;
int k=1;
for(int i=0;i<imgList.size();i++){

imgDiv =(HtmlDivision)imgList.get(i);

//println "imgDiv"+imgDiv

element = (HtmlElement) imgDiv.getLastElementChild().getLastElementChild();
str = element.toString();

//println "str"+str
if(str.contains("data-objurl") && (str.contains(".png")||str.contains(".jpeg")||str.contains(".jpg"))){

//println "str=======22=="+"111"
begin = str.indexOf("data-objurl")+13;

if(str.contains(".png")){

end = str.indexOf(".png")+4;

}else if(str.contains(".jpeg")){

end = str.indexOf(".jpeg")+5;

}else if(str.contains(".jpg")){

end = str.indexOf(".jpg")+4;

}

str = str.substring(begin,end);
str = URLDecoder.decode(str);

download(str,"f:/src/");

sop("下载成功:");
}else{
str = "";
}
if(!str.equals("")){
sop("百度图片地址"+k+++": "+str);
}
}


}
public static void sop(Object obj){
//System.out.println(obj);
}
//根据图片网络地址下载图片
public static void download(String url,String path){

//println "url=="+url
File file= null;
File dirFile=null;
FileOutputStream fos=null;
HttpURLConnection httpCon = null;
URLConnection con = null;
URL urlObj=null;
InputStream ins =null;
byte[] size = new byte[1024];
int num=0;
try {
//String downloadName= "fbb"+new Date().getTime()+"fbb."+url.substring(url.lastIndexOf(".")+1);
String downloadName= url.substring(url.lastIndexOf("/")+1);

//println "downloadName"+downloadName
dirFile = new File(path);
if(!dirFile.exists()){
if(dirFile.mkdir()){
if(path.length()>0){
sop("creat document file \""+path.substring(0,path.length()-1)+"\" success...\n");
}
}
}else{

//println "path+downloadName"+path+downloadName
file = new File(path+downloadName);

fos = new FileOutputStream(file);

if(url.startsWith("http")){

//println "url++"+url
urlObj = new URL(url);
con = urlObj.openConnection();
httpCon =(HttpURLConnection) con;
ins = httpCon.getInputStream();
while((num=ins.read(size)) != -1){
for(int i=0;i<num;i++)
fos.write(size[i]);
}
}
}

} catch (Exception e) {
e.printStackTrace();
} finally{
try {
fos.close();
ins.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}

//有借鉴、请见谅

分类:

技术点:

相关文章: