学习了一下python网络爬虫以下是我做的实例
#coding=gbk
‘’’
Created on 2019年7月12日
@author: lenovo
‘’’
import urllib.request
data=urllib.request.urlopen(“https://www.csdn.net”).read()
print(data)
#自动提取课程页面的QQ群
import urllib.request
import re
data=urllib.request.urlopen(“https://edu.csdn.net/huiyiCourse/detail/253").read().decode("utf-8”)
pat="
(\d*?)
"result=re.compile(pat).findall(data)
print(result)
import urllib.request
import re
data=urllib.request.urlopen(“https://read.douban.com/provider/all").read().decode("utf-8”)
pat=’
result=re.compile(pat).findall(data)
print(result)
file=open(‘F:\Test1\Input.txt’,‘a+’)
for i in result:
file.write("\n"+i)
file.close()#coding=gbk
import urllib.request
import re
#urlretrieve(网址,本地文件存储地址)直接下载网页到本地
‘’’
urllib.request.urlretrieve(“http://www.baidu.com”,“F:\Test1\dow.html”)
urllib.request.urlcleanup()
#info()看相应的简介
file=urllib.request.urlopen(“https://read.douban.com/provider/all”)
print(file.info())
#getcode()判断状态码
print(file.getcode())
#geturl()返回当前网页的url
print(file.geturl())
‘’’
‘’’
#超时设置
for i in range(0,100):
try:
file=urllib.request.urlopen(“http://www.baidu.com”, timeout=1)
print(len(file.read()))
except Exception as err:
print(“出现异常”+str(err))
‘’’
keyword=“Python”
url=“http://www.baidu.com/s?wd”+keyword
data=urllib.request.urlopen(url).read().decode(“utf-8”)
pat=“title:’(.*?)’,”
rst=re.compile(pat).findall(data)
print(rst)