import os
from time import sleep

import faker
import requests
from lxml import etree

fake = faker.Faker()

base_url = "http://angelimg.spbeen.com"

def get_next_link(url):
content = downloadHtml(url)
html = etree.HTML(content)
next_url = html.xpath("//a[@class='ch next']/@href")
if next_url:
return base_url + next_url[0]
else:
return False

def downloadHtml(ur):
user_agent = fake.user_agent()
headers = {'User-Agent': user_agent,"Referer":"http://angelimg.spbeen.com/"}
response = requests.get(url, headers=headers,timeout=20)
if response.status_code != 200:
return None
else:
return response.text

def getImgUrl(content):
html = etree.HTML(content)
img_url = html.xpath('//*[@ + url)
i = i + 1
except Exception as e:
print(str(e))
except Exception as e:
print(str(e))

结果

python 爬虫 循环分页

 

 python 爬虫 循环分页

 

相关文章:

  • 2022-12-23
  • 2021-11-17
  • 2021-07-29
  • 2021-12-06
  • 2021-12-10
  • 2022-12-23
猜你喜欢
  • 2022-12-23
  • 2022-12-23
  • 2021-08-30
  • 2022-12-23
  • 2021-12-14
  • 2022-12-23
  • 2022-12-23
相关资源
相似解决方案