liujinxin123
"""请求网页"""
import requests
import re
import time
import os
headers={
\'user-agent\':\'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537\'
}
response=requests.get(\'https://www.vmgirls.com/12985.html\',headers=headers)
#print(response.request.headers)
#print(response.text)
html=response.text
"""解析网页"""
dir_name=re.findall(\'<h1 class="post-title h3">(.*?)</h1>\',html)[-1]#文件
if not os.path.exists(dir_name):#检查文件
    os.mkdir(dir_name)
urls=re.findall(\'<a href="(.*?)" alt=".*?" title=".*?">\',html)
print(urls)
"""保存图片"""
for url in urls:
    time.sleep(1)
    #图片的名字
    file_name=url.split(\'/\')[-1]
    response = requests.get(url, headers=headers)
    with open(dir_name+\'/\'+file_name,\'wb\') as f:
        f.write(response.content

 

分类:

技术点:

相关文章: