from  boss.items import  BossItem

class  ZhiPinSpider(CrwalSpider):
    name='Zhipin'
    allwed_domains=['zhipin.com']
    start_urls=['https://www.zhipin.com/c100010000/?query=python&page=1']

    rules={
        #匹配职位列表页的规则
        Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),follow=True)

        #匹配职位详情页的规则
        Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),callback="parse_job",follow=False)

    def  parse_job(self,response):
        title=response.xpath('//h1[@class="name"]/text()').get().strip()
        company=response.xpath('//div[@class="info-company"]//a/text()').get()
        item=BossItem(title=title,company=company)
        yield  item

 

相关文章:

  • 2021-09-30
  • 2021-12-27
  • 2021-04-01
  • 2022-12-23
  • 2021-09-13
  • 2021-12-19
  • 2022-12-23
  • 2021-08-14
猜你喜欢
  • 2021-12-07
  • 2021-07-30
  • 2021-06-09
  • 2021-07-28
  • 2022-12-23
  • 2022-02-09
  • 2021-05-29
相关资源
相似解决方案