完美运行的代码:
<?php
set_time_limit(0);
function get_curl_output($link)
{
$channel = curl_init();
curl_setopt($channel, CURLOPT_URL, $link);
curl_setopt($channel, CURLOPT_RETURNTRANSFER, true);
curl_setopt($channel, CURLOPT_CONNECTTIMEOUT, 4);
curl_setopt($channel, CURLOPT_TIMEOUT, 120);
curl_setopt($channel, CURLOPT_VERBOSE, true);
curl_setopt($channel, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; Windows; U; Windows NT 5.1; en-US; rv:1.8.1.2) Gecko/20070219');
curl_setopt($channel, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($channel, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
$output = curl_exec($channel);
curl_close($channel);
return $output;
}
$content = get_curl_output('http://www.greenbook.org/market-research-companies/united-states-of-america');
$article_count = substr_count($content, '<article class="article article-lrg ">');
$article_data = array();
$company_data = array();
function article_getter($content, $index = 0)
{
$article_pos_1 = strpos($content, '<article class="article article-lrg ">', $index);
$article_pos_2 = strpos($content, '</article>', $article_pos_1+38);
$article_str = substr($content, $article_pos_1+38, ($article_pos_2-38)-$article_pos_1);
return array($article_str, $article_pos_2);
}
for ($i = 0; $i < $article_count; $i++)
{
if ( $i === 0 )
{
$article_data[$i] = article_getter($content);
}
else
{
$article_data[$i] = article_getter($content, $article_data[$i - 1][1]);
}
}
function filter($article)
{
$name_pos_1 = strpos($article, '<b>');
$name_pos_2 = strpos($article, '</b>', $name_pos_1+3);
$name = substr($article, $name_pos_1+3, ($name_pos_2-3)-$name_pos_1);
$telephone_pos_1 = strpos($article, '<span class="strong">Telephone:</span>', $name_pos_2);
$telephone_pos_2 = strpos($article, ' <span', $telephone_pos_1+38);
$telephone = substr($article, $telephone_pos_1+38, ($telephone_pos_2-38)-$telephone_pos_1);
$telephone = trim($telephone);
return array($name, $telephone);
}
for ($i = 0; $i < count($article_data); $i++)
{
$company_data[$i] = filter($article_data[$i][0]);
}
var_dump($company_data); //do whatever you want this array
?>