【发布时间】:2019-07-21 19:35:58
【问题描述】:
from bs4 import BeautifulSoup
import requests
from fake_useragent import UserAgent
from xlsxwriter import Workbook
ua = UserAgent()
header = {'user_agent':ua.chrome}
main_url = 'https://www.ebay.com/sch/i.html?_from=R40&_trksid=m570.l1313&_nkw=laptop&_sacat=0'
page = requests.get(main_url, headers=header)
soup = BeautifulSoup(page.content, 'lxml')
links = soup.find_all('div', class_='s-item__info clearfix')
for item in links:
workbook = Workbook('Data.xlsx')
worksheet = workbook.add_worksheet()
worksheet.write(0, 0, item.h3.text)
worksheet.write(0, 1, item.a['href'].split('?')[0])
worksheet.write(0, 2, item.div.next_sibling.next_sibling.text)
worksheet.write(0, 3, item.div.next_sibling.next_sibling.next_sibling.text)
worksheet.write(0, 4, item.find('span', class_='s-item__price').text)
workbook.close()
【问题讨论】:
标签: excel python-3.x web-scraping