【发布时间】:2018-12-29 04:56:59
【问题描述】:
我正在尝试网络抓取https://www.nba.com/standings#/
这是我的代码
我尝试使用的是 page.getByXPath("//caption[@class='standings__header']/span")
如果我的 Xpath 错了,我不明白哪个应该拉回东部和西部,但它却什么也拉不回来?
package Standings;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.HtmlSpan;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Standings {
private static final String baseUrl = "https://www.nba.com/standings#/";
public static void main(String[] args) {
WebClient client = new WebClient();
client.getOptions().setJavaScriptEnabled(false);
client.getOptions().setCssEnabled(false);
client.getOptions().setUseInsecureSSL(true);
String jsonString = "";
ObjectMapper mapper = new ObjectMapper();
try {
HtmlPage page = client.getPage(baseUrl);
System.out.println(page.asXml());
page.getByXPath("//caption[@class='standings__header']/span")
} catch (IOException e) {
e.printStackTrace();
}
}
}
【问题讨论】:
标签: java web-scraping htmlunit