【发布时间】:2014-05-17 09:07:32
【问题描述】:
尝试从google上抓取一些链接,学习python
import urllib2
from bs4 import BeautifulSoup
response = urllib2.urlopen('http://www.google.com.au/search?q=python')
html = response.read()
print html
response.close()
我做错了什么?我收到以下错误?
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-4-d990999e71f4> in <module>()
9
10 import urllib2
---> 11 response = urllib2.urlopen('http://www.google.com.au/search?q=python')
12 html = response.read()
13 print html
C:\Python27\lib\urllib2.pyc in urlopen(url, data, timeout)
124 if _opener is None:
125 _opener = build_opener()
--> 126 return _opener.open(url, data, timeout)
127
128 def install_opener(opener):
C:\Python27\lib\urllib2.pyc in open(self, fullurl, data, timeout)
395 for processor in self.process_response.get(protocol, []):
396 meth = getattr(processor, meth_name)
--> 397 response = meth(req, response)
398
399 return response
C:\Python27\lib\urllib2.pyc in http_response(self, request, response)
508 if not (200 <= code < 300):
509 response = self.parent.error(
--> 510 'http', request, response, code, msg, hdrs)
511
512 return response
C:\Python27\lib\urllib2.pyc in error(self, proto, *args)
433 if http_err:
434 args = (dict, 'default', 'http_error_default') + orig_args
--> 435 return self._call_chain(*args)
436
437 # XXX probably also want an abstract factory that knows when it makes
C:\Python27\lib\urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
367 func = getattr(handler, meth_name)
368
--> 369 result = func(*args)
370 if result is not None:
371 return result
C:\Python27\lib\urllib2.pyc in http_error_default(self, req, fp, code, msg, hdrs)
516 class HTTPDefaultErrorHandler(BaseHandler):
517 def http_error_default(self, req, fp, code, msg, hdrs):
--> 518 raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
519
520 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 403: Forbidden
【问题讨论】:
-
使用google search API,不要试图爬谷歌。
标签: python html beautifulsoup