好吧,如果我理解这一点,首先你必须在 Excel 文件中有用户列表,我没有,所以在我的情况下,我使用此代码获得前 25 名并将其保存到 xlsx 文件中:
from openpyxl import load_workbook, Workbook
from lxml.html import fromstring
import requests
def get_number_of_views_and_subscriptions(socialblade_url="https://socialblade.com/youtube/"):
"""Function returns account names, account urls, number of subscribers and number of views
from socialblade web-site using requests and xpath"""
request = requests.get(socialblade_url)
tree = fromstring(request.content)
account_names = tree.xpath("/html/body/div[9]/div[1]/div/div[3]/a/text()")
account_urls = ["https://socialblade.com" + _ for _ in tree.xpath("/html/body/div[9]/div[1]/div/div[3]/a/@href")]
subscribers = tree.xpath("/html/body/div[9]/div[1]/div/div[5]/text()")
views = tree.xpath("/html/body/div[9]/div[1]/div/div[6]/text()")
data = zip(account_names, account_urls, subscribers, views)
return data
def writing_to_excel(file_path="users_data.xlsx", data=get_number_of_views_and_subscriptions()):
"""Function writes data of type ["account names", "account urls", "number of subscribers", "number of views"]
to an xlsx file"""
workbook = Workbook()
worksheet = workbook.create_sheet("Socialblade", 0)
worksheet.append(["account names", "account urls", "number of subscribers", "number of views"])
for item in data:
worksheet.append(item)
workbook.save(file_path)
接下来是获取链接和抓取信息,我会使用以下代码:
def get_excel_user_links(file_path="users_data.xlsx"):
"""Functions returns all values of the first row of Excel file"""
workbook = load_workbook(filename=file_path)
worksheet = workbook.active # or workbook.get_sheet_by_name("Sheet1")
values = [row[1].value for row in worksheet.iter_rows() if row[1].value != "account urls"]
return values
def scrape_and_save_to_excel(file_path="scraped_data.xlsx", user_links=get_excel_user_links()):
"""Function scrapes users data and saves it to xlsx"""
data = [["user link", "number of views", "number of subscribers"]]
for user_link in user_links:
request = requests.get(user_link)
tree = fromstring(request.content)
number_of_views = tree.xpath('//*[@id="YouTubeUserTopInfoBlock"]/div[4]/span[2]/text()')[0]
number_of_subscribers = tree.xpath('//*[@id="YouTubeUserTopInfoBlock"]/div[3]/span[2]/text()')[0]
data.append([user_link, number_of_views, number_of_subscribers])
workbook = Workbook()
worksheet = workbook.create_sheet("Socialblade", 0)
for item in data:
worksheet.append(item)
workbook.save(file_path)