import requests from urllib.parse import urlencode # 引入异常类 from requests.exceptions import RequestException # 保存文件时用到json格式 import json # 正则表达式 import re def get_page_index(offset,keyword): data = { 'autoload':'true', 'count':'20', 'cur_tab':1, 'format':'json', 'from':'search_tab', 'keyword':keyword, 'offset':offset } url = 'https://www.toutiao.com/search_content/?'+ urlencode(data) try: response = requests.get(url) if response.status_code == 200: return response.text return None except RequestException: print('请求索引页出错') return None
def parse_page_index(html):
data = json.loads(html)
if data and 'data' in data.keys():
for item in data.get('data'):
yield item.get('article_url')
def get_page_detail(url):
try:
response = requests.get(url)
if response.status_code == 200:
return response.text
return None
except RequestException:
print('请求详情页出错',url)
return None
def parse_page_detail(html):
soup = bs(html,'lxml')
title = soup.select('title')[0].get_text()
print(title)
images_pattern = re.compile('gallery: JSON.parse("(.*?);',re.S)
1 BASE_DATA.galleryInfo = { 2 title: '路人街拍,个子不高,穿得却很有范,而且时尚潮流', 3 isOriginal: false, 4 mediaInfo: BASE_DATA.mediaInfo, 5 gallery: JSON.parse( 6 "{\"count\":5,\"sub_images\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/15324800265505b953ab972\",\"width\":640,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/15324800265505b953ab972\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/pgc-image\\/15324800265505b953ab972\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/pgc-image\\/15324800265505b953ab972\"}],\"uri\":\"origin\\/pgc-image\\/15324800265505b953ab972\",\"height\":959},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/1532480004156480c21a63e\",\"width\":640,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/1532480004156480c21a63e\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/pgc-image\\/1532480004156480c21a63e\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/pgc-image\\/1532480004156480c21a63e\"}],\"uri\":\"origin\\/pgc-image\\/1532480004156480c21a63e\",\"height\":1011},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/1532480016958bdc65dea62\",\"width\":640,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/1532480016958bdc65dea62\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/pgc-image\\/1532480016958bdc65dea62\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/pgc-image\\/1532480016958bdc65dea62\"}],\"uri\":\"origin\\/pgc-image\\/1532480016958bdc65dea62\",\"height\":929},{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/pgc-image\\/15324800362354f9d1c3ec1\",\"width\":640,\"url_list\":[{\"url\":\"http:\\/\\/p9.pstatp.com\\/origin\\/pgc-image\\/15324800362354f9d1c3ec1\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/pgc-image\\/15324800362354f9d1c3ec1\"},{\"url\":\"http:\\/\\/pb3.pstatp.com\\/origin\\/pgc-image\\/15324800362354f9d1c3ec1\"}],\"uri\":\"origin\\/pgc-image\\/15324800362354f9d1c3ec1\",\"height\":960},{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/15324800476353e6c3e0c25\",\"width\":640,\"url_list\":[{\"url\":\"http:\\/\\/p3.pstatp.com\\/origin\\/pgc-image\\/15324800476353e6c3e0c25\"},{\"url\":\"http:\\/\\/pb9.pstatp.com\\/origin\\/pgc-image\\/15324800476353e6c3e0c25\"},{\"url\":\"http:\\/\\/pb1.pstatp.com\\/origin\\/pgc-image\\/15324800476353e6c3e0c25\"}],\"uri\":\"origin\\/pgc-image\\/15324800476353e6c3e0c25\",\"height\":986}],\"max_img_width\":640,\"labels\":[\"\\u65f6\\u88c5\\u642d\\u914d\"],\"sub_abstracts\":[\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\",\" \",\" \",\" \",\" \"],\"sub_titles\":[\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\",\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\",\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\",\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\",\"\\u8def\\u4eba\\u8857\\u62cd\\uff0c\\u4e2a\\u5b50\\u4e0d\\u9ad8\\uff0c\\u7a7f\\u5f97\\u5374\\u5f88\\u6709\\u8303\\uff0c\\u800c\\u4e14\\u65f6\\u5c1a\\u6f6e\\u6d41\"]}"), 7 siblingList: [{"comments_count": 72, "media_avatar_url": "//p3.pstatp.com/large/8b6000041e42267916a3", 8 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,高跟控的时尚穿搭参考,让你展现与众不同的时尚个性", 9 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 10 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6577212971246682628/", 11 "source": "时尚街拍Dog", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 12 "video_play_count": 0, 13 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1531376421040010f39e595", 14 "group_id": "6577212971246682628", "is_related": true, "media_url": "/c/user/5441627343/"}, 15 {"comments_count": 36, "media_avatar_url": "//p6.pstatp.com/large/382f0005b0e7dbc47d8d", 16 "is_feed_ad": false, "is_diversion_page": false, "title": "北京街拍,都是乱穿衣的时候,怎么好看就怎么穿", 17 "single_mode": true, "gallary_image_count": 10, "middle_mode": false, "has_video": false, 18 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6562767784663581191/", 19 "source": "海玲时尚", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 20 "video_play_count": 0, 21 "image_url": "//p1.pstatp.com/list/300x170/pgc-image/1528013395598fac4c4efe1", 22 "group_id": "6562767784663581191", "is_related": true, "media_url": "/c/user/58444595361/"}, 23 {"comments_count": 9, "media_avatar_url": "//p3.pstatp.com/large/8b6000041e42267916a3", 24 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,高贵洋气的潮流穿搭参考,让你展现高贵女神气质", 25 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 26 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6570846557128622600/", 27 "source": "时尚街拍Dog", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 28 "video_play_count": 0, 29 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1529894002540131cde7818", 30 "group_id": "6570846557128622600", "is_related": true, "media_url": "/c/user/5441627343/"}, 31 {"comments_count": 16, "media_avatar_url": "//p1.pstatp.com/large/6c310002c52d08feb562", 32 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,时尚又洋气的穿搭参考,让你气场变大气质飙升", 33 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 34 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6570524966184288776/", 35 "source": "六六大顺1286", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 36 "video_play_count": 0, 37 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/152981831396458c08bc0c3", 38 "group_id": "6570524966184288776", "is_related": true, "media_url": "/c/user/81174905516/"}, 39 {"comments_count": 8, "media_avatar_url": "//p1.pstatp.com/large/6c310002c52d08feb562", 40 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,时尚又减龄的穿搭参考,让你看起来至少年轻好几岁", 41 "single_mode": true, "gallary_image_count": 8, "middle_mode": false, "has_video": false, 42 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6566070892542034445/", 43 "source": "六六大顺1286", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 44 "video_play_count": 0, 45 "image_url": "//p1.pstatp.com/list/300x170/pgc-image/1528782064341c24abc274f", 46 "group_id": "6566070892542034445", "is_related": true, "media_url": "/c/user/81174905516/"}, 47 {"comments_count": 15, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f9299624bf43", 48 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,美如花开的时尚穿搭参考,让你美如鲜花盛开!", 49 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 50 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6576006257222615556/", 51 "source": "天天美女图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 52 "video_play_count": 0, 53 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1531095687103b57abfdf57", 54 "group_id": "6576006257222615556", "is_related": true, "media_url": "/c/user/98918905833/"}, 55 {"comments_count": 12, "media_avatar_url": "//p1.pstatp.com/large/6c310002c52d08feb562", 56 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,时下最新潮的穿搭参考,让你穿出超越女神的气质", 57 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 58 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6577224195824943620/", 59 "source": "六六大顺1286", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 60 "video_play_count": 0, 61 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1531378675945f7e5f3dc4e", 62 "group_id": "6577224195824943620", "is_related": true, "media_url": "/c/user/81174905516/"}, 63 {"comments_count": 23, "media_avatar_url": "//p6.pstatp.com/large/382f0005b0e7dbc47d8d", 64 "is_feed_ad": false, "is_diversion_page": false, "title": "北京街拍,不对称西服外套与白衬衣的叠穿,更显层次感", 65 "single_mode": true, "gallary_image_count": 10, "middle_mode": false, "has_video": false, 66 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6552352941250970126/", 67 "source": "海玲时尚", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 68 "video_play_count": 0, 69 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1525588437307f582c869c9", 70 "group_id": "6552352941250970126", "is_related": true, "media_url": "/c/user/58444595361/"}, 71 {"comments_count": 8, "media_avatar_url": "//p1.pstatp.com/large/8b610003fea5104fa804", 72 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍路人,黑色T永不过时的穿搭参考,让你穿出迷人青春范", 73 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 74 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6572015578561643012/", 75 "source": "每日分享社", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 76 "video_play_count": 0, 77 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1530166166448957eb30a67", 78 "group_id": "6572015578561643012", "is_related": true, "media_url": "/c/user/52415621433/"}, 79 {"comments_count": 14, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f5928763adad", 80 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,时尚大气的休闲穿搭参考,让你够酷,够美,够大气!", 81 "single_mode": true, "gallary_image_count": 6, "middle_mode": false, "has_video": false, 82 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6575640177157341699/", 83 "source": "天天美女照", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 84 "video_play_count": 0, 85 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1531010389181b3166e603a", 86 "group_id": "6575640177157341699", "is_related": true, "media_url": "/c/user/6686213364/"}, 87 {"comments_count": 42, "media_avatar_url": "//p3.pstatp.com/large/6eed0002747fdb857784", 88 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍北京:个子高挑,衣品时尚,搭配潮流,长得好看!", 89 "single_mode": true, "gallary_image_count": 6, "middle_mode": false, "has_video": false, 90 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6581196161003029000/", 91 "source": "时尚好看19", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 92 "video_play_count": 0, 93 "image_url": "//p1.pstatp.com/list/300x170/pgc-image/1532304059555dc3da79df0", 94 "group_id": "6581196161003029000", "is_related": true, "media_url": "/c/user/98512844552/"}, 95 {"comments_count": 7, "media_avatar_url": "//p3.pstatp.com/large/5e7c0000cc291f5e42d3", 96 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍:约会简约搭配方式,纯色连衣裙搭配平底尖脚鞋气质出众", 97 "single_mode": true, "gallary_image_count": 7, "middle_mode": false, "has_video": false, 98 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6577348572038038019/", 99 "source": "Fashion第六感", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 100 "video_play_count": 0, 101 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1531408080685595da561c6", 102 "group_id": "6577348572038038019", "is_related": true, "media_url": "/c/user/6236329656/"}, 103 {"comments_count": 17, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f9299624bf43", 104 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,真心养眼的时尚穿搭参考,让你穿的好看,美的养眼!", 105 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 106 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6575811480208278030/", 107 "source": "天天美女图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 108 "video_play_count": 0, 109 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1531050304667eb82ae53fb", 110 "group_id": "6575811480208278030", "is_related": true, "media_url": "/c/user/98918905833/"}, 111 {"comments_count": 2, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f5928763adad", 112 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,一身优雅的穿搭,穿出女人端庄的气质", 113 "single_mode": true, "gallary_image_count": 6, "middle_mode": false, "has_video": false, 114 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6579159866898645507/", 115 "source": "天天美女照", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 116 "video_play_count": 0, 117 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1531829948291cec7f1e419", 118 "group_id": "6579159866898645507", "is_related": true, "media_url": "/c/user/6686213364/"}, 119 {"comments_count": 2, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f9299624bf43", 120 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,尽显优雅绝伦的时尚穿搭参考,让你美的令人一见倾心", 121 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 122 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6575441122204582404/", 123 "source": "天天美女图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 124 "video_play_count": 0, 125 "image_url": "//p1.pstatp.com/list/300x170/pgc-image/1530964112002b59991e98e", 126 "group_id": "6575441122204582404", "is_related": true, "media_url": "/c/user/98918905833/"}, 127 {"comments_count": 9, "media_avatar_url": "//p1.pstatp.com/large/888e0001ca2d8afc54d4", 128 "is_feed_ad": false, "is_diversion_page": false, "title": "街拍,休闲+减龄的气质女生夏天穿衣搭配,日常穿搭秒杀路人", 129 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 130 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6579356829992616455/", 131 "source": "街拍美女图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 132 "video_play_count": 0, 133 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1531875794891cbda4529a6", 134 "group_id": "6579356829992616455", "is_related": true, "media_url": "/c/user/98867532852/"}, 135 {"comments_count": 32, "media_avatar_url": "//p3.pstatp.com/large/6eea0002dbb5ffffa1f1", 136 "is_feed_ad": false, "is_diversion_page": false, "title": "女生街拍,黄金比例闺蜜,穿黑色凉鞋气质真好!", 137 "single_mode": true, "gallary_image_count": 10, "middle_mode": false, "has_video": false, 138 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6580886941489693191/", 139 "source": "kk潮品街拍", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 140 "video_play_count": 0, 141 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/15322320709553059d33c7c", 142 "group_id": "6580886941489693191", "is_related": true, "media_url": "/c/user/98013496587/"}, 143 {"comments_count": 22, "media_avatar_url": "//p3.pstatp.com/large/8b61000302495b74702e", 144 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,年轻就是本钱,穿什么都充满时尚美!", 145 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 146 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6577151772521923086/", 147 "source": "天天街拍图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 148 "video_play_count": 0, 149 "image_url": "//p9.pstatp.com/list/300x170/pgc-image/1531362350533dbf5878e98", 150 "group_id": "6577151772521923086", "is_related": true, "media_url": "/c/user/100250154069/"}, 151 {"comments_count": 7, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f9299624bf43", 152 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,女人要会穿才显漂亮,夏季这样穿搭让你尽显迷人气质美", 153 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 154 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6578614959142339075/", 155 "source": "天天美女图", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 156 "video_play_count": 0, 157 "image_url": "//p3.pstatp.com/list/300x170/pgc-image/1531702883449de4a8be734", 158 "group_id": "6578614959142339075", "is_related": true, "media_url": "/c/user/98918905833/"}, 159 {"comments_count": 7, "media_avatar_url": "//p3.pstatp.com/large/6eed0004f5928763adad", 160 "is_feed_ad": false, "is_diversion_page": false, "title": "路人街拍,好身材也要会穿搭,这样穿让你气质迷人", 161 "single_mode": true, "gallary_image_count": 5, "middle_mode": false, "has_video": false, 162 "video_duration_str": null, "source_url": "https://www.toutiao.com/group/6575273365118910983/", 163 "source": "天天美女照", "more_mode": null, "article_genre": "gallery", "has_gallery": false, 164 "video_play_count": 0, 165 "image_url": "//p1.pstatp.com/list/300x170/pgc-image/153092495402580bddfcc1d", 166 "group_id": "6575273365118910983", "is_related": true, "media_url": "/c/user/6686213364/"}], 167 publish_time: '2018-07-25 08:55:51', 168 group_id: '6581952133069799949', 169 item_id: '6581952133069799949', 170 share_url: 'https://m.toutiao.com/group/6581952133069799949/', 171 abstract: ''.replace( / < br \ / > / ig, ''), 172 repin: 0 173 }