import requests from bs4 import BeautifulSoup import bs4 def get_html_text(url): try: #kv = {'user-agent':'Mozilla/5.0'} r = requests.get(url, timeout = 30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "crawl error" def fill_info(ulist, html): soup = BeautifulSoup(html, 'html.parser') for tr in soup.find('tbody').children: if isinstance(tr, bs4.element.Tag): tds = tr.find_all('td') ulist.append([tds[0].string, tds[1].string, tds[2].string]) def print_info(ulist, num): for i in range(num): u = ulist[i] uu = "" uu += u[0] + "\t\t\t\t" uu += u[1] + "\t\t\t\t" uu += u[2] print(uu) if __name__ == "__main__": uinfo = [] url = "http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html" html = get_html_text(url) fill_info(uinfo, html) print_info(uinfo, 104)
相关文章: