# 一个校花网图片下载的案例,也适合大文件处理,多个文件视频,音频处理
工程流程 --
scrapy startproject xx
cd xx
scrapy genspider hh www.xx.com
爬虫执行 scrapy crawl hh
import scrapy from yyl.items import YylItem class ExampleSpider(scrapy.Spider): name = 'xiaohua' # allowed_domains = ['example.com'] start_urls = ['http://www.521609.com/daxuemeinv/'] def parse(self, response): li_lst = response.xpath('//*[@>) # print(li_lst) for li in li_lst: item = YylItem() #实例化 # item['src'] = 'http://www.521609.com{}'.format(li.xpath('./a/img/@src').extract_first()) item['src'] = 'http://www.521609.com' + li.xpath('./a/img/@src').extract_first() # 拼接完整地址 yield item # 管道负责 下载url 视频 压缩包 大文件下载的机制