【发布时间】:2019-04-02 19:38:27
【问题描述】:
我正在尝试在localhost 上使用CURL 连接到一个网站,我尝试了许多代理,但大多数都不起作用。
代码如下:
$url= 'https://stubhub.com';
$curl = curl_init();
curl_setopt( $curl, CURLOPT_URL, $url );
curl_setopt($curl, CURLOPT_REFERER, $url);
curl_setopt( $curl, CURLOPT_AUTOREFERER, TRUE );
curl_setopt( $curl, CURLOPT_HEADER, FALSE );
curl_setopt( $curl, CURLOPT_CONNECTTIMEOUT, 0 );
curl_setopt( $curl, CURLOPT_TIMEOUT, 0 );
curl_setopt( $curl, CURLOPT_RETURNTRANSFER, TRUE );
curl_setopt( $curl, CURLOPT_FOLLOWLOCATION, TRUE );
curl_setopt($curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
//certificate
$cacert='C:/xampp/cacert.pem';
curl_setopt( $curl, CURLOPT_CAINFO, $cacert );
//SSL
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
//Cookies
curl_setopt($curl, CURLOPT_COOKIEFILE,__DIR__."/cookie.txt");
//User-Agent
curl_setopt( $curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' );
//Proxy
$proxy = '50.115.194.97:8080';
curl_setopt($curl, CURLOPT_PROXY, $proxy);
curl_setopt($curl, CURLOPT_PROXYPORT, 443);
curl_setopt($curl, CURLOPT_HTTPPROXYTUNNEL, TRUE);
//Errors
curl_setopt($curl, CURLOPT_VERBOSE, TRUE);
$data = curl_exec( $curl );
$info = curl_getinfo( $curl );
$error = curl_error( $curl );
curl_close( $curl );
$allInfo = array($data, Info, $error);
echo '<pre>';
print_r($allInfo);
echo '</pre>';
我收到以下回复:
Array
(
[0] =>
[1] => Array
(
[url] => https://stubhub.com/
[content_type] =>
[http_code] => 0
[header_size] => 0
[request_size] => 0
[filetime] => -1
[ssl_verify_result] => 0
[redirect_count] => 0
[total_time] => 20.203
[namelookup_time] => 0
[connect_time] => 0
[pretransfer_time] => 0
[size_upload] => 0
[size_download] => 0
[speed_download] => 0
[speed_upload] => 0
[download_content_length] => -1
[upload_content_length] => -1
[starttransfer_time] => 0
[redirect_time] => 0
[redirect_url] =>
[primary_ip] =>
[certinfo] => Array
(
)
[primary_port] => 0
[local_ip] =>
[local_port] => 0
)
[2] => Failed to connect to 50.115.194.97 port 8080: Timed out
)
我尝试使用支持https、cookie 和具有443 端口的代理。
是否有用于此目的的 API/库?
【问题讨论】:
-
50.115.194.97 似乎是一个已死的公共代理。
-
@BhaskarChoudhary,如何获得新的代理并确保它没有死?
-
为了确保代理没有死,您必须在一个数组中加载所有代理,然后在循环中使用代理并尝试加载 example.com。如果在尝试加载 example.com 时发生错误,请检查该错误是否为“超时”。这样,您就可以从列表中消除死代理。
-
你能用上面的网站测试一个工作代理吗?
-
您必须为其创建一个新问题并提供链接。 "0.0.0.0", "port"=>"8888"], ["ip"=>"1.1.1.1", "port"=>"9999"] , ]; $working_proxies=[]; foreach($proxy_arr as $proxy){ $url= 'stubhub.com'; $curl = curl_init(); ... curl_close( $curl ); if($error == ''){ $working_proxies[] = $proxy; } } var_dump($working_proxies);
标签: php curl web-scraping https proxy