sun-10387834
# 抓取梨视频网站的短视频
from lxml import etree
import os
import requests
import re
dirName="./videoLibs"
if not os.path.exists(dirName):
    os.mkdir(dirName)
url="https://www.pearvideo.com/category_59"
headers = {
    \'User-Agent\':\'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36\',
}
# session=requests.Session()
response_text=requests.get(url,headers=headers).text
tree=etree.HTML(response_text)
li_list=tree.xpath(\'//*[@id="listvideoListUl"]/li\')
for li in li_list:
    video_href="https://www.pearvideo.com/"+li.xpath(\'./div/a/@href\')[0]
    video_title=li.xpath(\'./div/a/div[2]/text()\')[0]
#     print(video_href,video_title)
    video_detail_page=requests.get(video_href,headers=headers).text #每一个视频详情页源码
    regex=\'srcUrl="(.*?)",vdoUrl\'
    video_url=re.findall(regex,video_detail_page,re.S)[0] #获得每一个视频的源地址
    video_path=dirName+"/"+video_title+".mp4"
    video_content=requests.get(video_url,headers=headers).content
    with open(video_path,"wb") as fp:
        fp.write(video_content)
    print(video_title,"下载完毕...")

 

分类:

技术点:

相关文章:

  • 2021-11-19
  • 2021-09-15
  • 2021-10-14
  • 2021-08-12
  • 2021-11-05
  • 2022-12-23
  • 2021-12-08
猜你喜欢
  • 2021-11-19
  • 2021-11-07
  • 2021-11-14
  • 2022-01-21
  • 2021-07-01
  • 2022-01-15
  • 2021-10-19
相关资源
相似解决方案