【发布时间】:2024-01-22 23:44:01
【问题描述】:
.................................................. ....................................
我编写了以下代码来使用 curl 执行多 url。
首先将 url 存储在数组中。
我使用 foreach 来执行。
但我不知道为什么不能从 urls 中获取数据?
$urls = array(
"http://emalls.ir/%D9%84%DB%8C%D8%B3%D8%AA-%D9%82%DB%8C%D9%85%D8%AA_%D9%84%D9%86%D8%B2-%D8%AF%D9%88%D8%B1%D8%A8%DB%8C%D9%86-%D9%81%DB%8C%D9%84%D9%85-%D8%A8%D8%B1%D8%AF%D8%A7%D8%B1%DB%8C~Category~643",
"http://emalls.ir/%D9%84%DB%8C%D8%B3%D8%AA-%D9%82%DB%8C%D9%85%D8%AA_%D9%84%D9%86%D8%B2-%D8%AF%D9%88%D8%B1%D8%A8%DB%8C%D9%86-%D9%81%DB%8C%D9%84%D9%85-%D8%A8%D8%B1%D8%AF%D8%A7%D8%B1%DB%8C~Category~643",
);
$browsers = array(
"standard" => array (
"user_agent" => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13",
"language" => "en-us,en;q=0.5"
),
);
foreach ($urls as $url) {
echo "URL: $url\n";
foreach ($browsers as $test_name => $browser) {
$ch = curl_init();
// set url
curl_setopt($ch, CURLOPT_URL, $url);
// set browser specific headers
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
"User-Agent: {$browser['user_agent']}",
"Accept-Language: {$browser['language']}"
));
// we don't want the page contents
curl_setopt($ch, CURLOPT_NOBODY, 1);
// we need the HTTP Header returned
curl_setopt($ch, CURLOPT_HEADER, 1);
// return the results instead of outputting it
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_ENCODING, 'UTF-8');
$page = curl_exec($ch);
$dom = new DOMDocument('1.0', 'utf-8');
libxml_use_internal_errors(true);
@$dom->loadHTML(mb_convert_encoding($page, 'HTML-ENTITIES', 'UTF-8'));
libxml_clear_errors();
$xpath = new DOMXpath($dom);
$data = array();
$table_rows = $xpath->query("//table[@id='grdprice']/tr"); // target the row (the browser rendered <tbody>, but actually it really doesnt have one)
if($table_rows->length <= 0) { // exit if not found
echo 'no table rows found';
exit;
}
foreach($table_rows as $tr) { // foreach row
$row = $tr->childNodes;
if($row->item(0)->tagName != 'th') { // avoid headers
$data[] = array(
'title' => trim($row->item(0)->nodeValue),
'sensor' => trim($row->item(2)->nodeValue),
);
}
}
}
echo'<pre>';
print_r($data);
}
最终:
网址:
没有找到表行
【问题讨论】:
-
你调试过你的代码吗?它在哪里失败?什么是预期的输出?实际输出是多少?您是否启用了错误报告?你得到什么错误信息?它在哪一行失败?