//最近对网页爬虫感兴趣。下面是下载百度图片中图片
public static void getPicture(){
WebClient webClient=new WebClient();
webClient.getOptions().setCssEnabled(true);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.waitForBackgroundJavaScript(600*1000);
String url = "http://image.baidu.com/";
HtmlPage page=webClient.getPage(url);
sop("get page success...");
final HtmlForm form = page.getFormByName("f1");
final HtmlTextInput textField = form.getInputByName("word");
textField.setValueAttribute("范冰冰1");
List list = page.getByXPath("//form/input[@type=\"submit\"]");
HtmlSubmitInput go = (HtmlSubmitInput)list.get(0);
HtmlPage p =(HtmlPage)go.click();
webClient.waitForBackgroundJavaScript(3*1000);
//println "p=="+p
//List imgList = p.getByXPath("//div[@class=\'list\']/div/div[@class=\'imgshadow\']");
List imgList = p.getByXPath("//div[@id=\'imgid\']/div[@class=\'imgpage\']");
//println "imgList"+imgList
HtmlDivision imgDiv = null;
HtmlAnchor link = null;
HtmlElement element = null;
String str=null;
int begin=0;
int end = 0;
int k=1;
for(int i=0;i<imgList.size();i++){
imgDiv =(HtmlDivision)imgList.get(i);
//println "imgDiv"+imgDiv
element = (HtmlElement) imgDiv.getLastElementChild().getLastElementChild();
str = element.toString();
//println "str"+str
if(str.contains("data-objurl") && (str.contains(".png")||str.contains(".jpeg")||str.contains(".jpg"))){
//println "str=======22=="+"111"
begin = str.indexOf("data-objurl")+13;
if(str.contains(".png")){
end = str.indexOf(".png")+4;
}else if(str.contains(".jpeg")){
end = str.indexOf(".jpeg")+5;
}else if(str.contains(".jpg")){
end = str.indexOf(".jpg")+4;
}
str = str.substring(begin,end);
str = URLDecoder.decode(str);
download(str,"f:/src/");
sop("下载成功:");
}else{
str = "";
}
if(!str.equals("")){
sop("百度图片地址"+k+++": "+str);
}
}
}
public static void sop(Object obj){
//System.out.println(obj);
}
//根据图片网络地址下载图片
public static void download(String url,String path){
//println "url=="+url
File file= null;
File dirFile=null;
FileOutputStream fos=null;
HttpURLConnection httpCon = null;
URLConnection con = null;
URL urlObj=null;
InputStream ins =null;
byte[] size = new byte[1024];
int num=0;
try {
//String downloadName= "fbb"+new Date().getTime()+"fbb."+url.substring(url.lastIndexOf(".")+1);
String downloadName= url.substring(url.lastIndexOf("/")+1);
//println "downloadName"+downloadName
dirFile = new File(path);
if(!dirFile.exists()){
if(dirFile.mkdir()){
if(path.length()>0){
sop("creat document file \""+path.substring(0,path.length()-1)+"\" success...\n");
}
}
}else{
//println "path+downloadName"+path+downloadName
file = new File(path+downloadName);
fos = new FileOutputStream(file);
if(url.startsWith("http")){
//println "url++"+url
urlObj = new URL(url);
con = urlObj.openConnection();
httpCon =(HttpURLConnection) con;
ins = httpCon.getInputStream();
while((num=ins.read(size)) != -1){
for(int i=0;i<num;i++)
fos.write(size[i]);
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally{
try {
fos.close();
ins.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
//有借鉴、请见谅