import requests import csv from lxml import html from bs4 import BeautifulSoup Header = { \'User-Agent\': \'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3928.4 Safari/537.36\' } def getdata(url): resp=requests.get(url) ht=resp.text soup=BeautifulSoup(ht,"lxml") title=soup.find(\'h1\',class_=\'ph\') print(title.text) p = soup.find(\'div\', class_=\'blockquote\') print(p.text) time=soup.find(\'span\',class_=\'time\') print(time.text) #保存在csv文件当中 with open("微信小程序.csv", "a", newline="") as cf: w = csv.writer(cf) w.writerow([title.text, p.text, time.text]) cf.close() def parse_page(url): resp=requests.get(url,headers=Header) resp.encoding = resp.apparent_encoding temp = resp.text ht = html.fromstring(temp) informations = ht.xpath(\'//*[@id="itemContainer"]/div/div/h3/a\') #抓取在该网页下的超链接 for inf in informations: url2 = "http://www.wxapp-union.com/" + inf.get(\'href\') getdata(url2) print(\'微信小程序全部爬取完成\') def began(): #小程序页面共计有107页 url = "http://www.wxapp-union.com/portal.php?mod=list&catid=1&page={}" for i in range(1,108): new_url=url.format(i) parse_page(new_url) if __name__ == \'__main__\': began()