【发布时间】:2021-10-07 17:24:36
【问题描述】:
我在一个函数中有一个循环,可以从一个著名的 mma 网站上抓取战斗机元数据。 但是该函数可以工作,如果有任何 soup.find 或 soup.findall 变量引发属性或索引错误,我需要分配一个默认值 None。
我尝试使用这个功能没有成功:
def ex_handler(i):
try:
return i
except (AttributeError, IndexError):
return None
last_name = ex_handler(soup.find('span', class_='fn').text.strip().split()[1])
代码:
def get_fighter_meta(fighter_urls):
"""Scrapes meta from fighters page"""
for counter, fighter_url in enumerate(fighter_urls, start=1):
soup = get_soup(fighter_url)
try:
first_name = soup.find('span', class_='fn').text.strip().split()[0]
except (AttributeError, IndexError):
first_name = None
# todo try & except alternative to catch exceptions on all soup variables
last_name = soup.find('span', class_='fn').text.strip().split()[1]
full_name = f'{first_name} {last_name}'
nickname = soup.find('span', class_='nickname').text.strip()
image_url = f"https://www.xxxxxx.com/{soup.find('img', attrs={'itemprop': 'image'})['src']}"
dob = soup.find('span', attrs={'itemprop': 'birthDate'}).text.strip()
location = soup.find('span', class_='locality').text.strip()
nationality = soup.find('strong', attrs={'itemprop': 'nationality'}).text.strip()
association = soup.find('span', attrs={'itemprop': 'name'}).text.strip()
height = soup.find('span', class_='item height').text.strip()[-9:-2]
weight = soup.find('span', class_='item weight').text.strip()[-9:-2].strip()
weight_class = soup.find('strong', class_='title').text.strip()
win_loss_loop = [i.text.strip() for i in soup.find_all('span', class_='counter')]
wins = win_loss_loop[0]
losses = win_loss_loop[1]
graph_tag_loop = [i.text.strip() for i in soup.find_all('span', class_='graph_tag')]
win_ko = graph_tag_loop[0][:2].strip()
win_submission = graph_tag_loop[1][:2].strip()
win_decisions = graph_tag_loop[2][:2].strip()
loss_ko = graph_tag_loop[3][0][:2].strip()
loss_submission = graph_tag_loop[4][:2].strip()
loss_decisions = graph_tag_loop[5][:2].strip()
fighter_meta = {
'First_name': first_name,
'Last_name': last_name,
'Full name': full_name,
'Nickname': nickname,
'Image_url': image_url,
'Date_of_birth': dob,
'Location': location,
'Nationality': nationality,
'Association': association,
'Height': height,
'Weight': weight,
'Weight_class': weight_class,
'Wins': wins,
'Losses': losses,
'Win_by_ko': win_ko,
'Win_by_submission': win_submission,
'Win_decision': win_decisions,
'Loss_by_ko': loss_ko,
'Loss_by_submission': loss_submission,
'Loss_by_desision': loss_decisions
}
fighter_data.append(fighter_meta)
print(f'Saving: {full_name} - {counter} of {len(fighter_urls)}')
感谢您的帮助!
【问题讨论】:
标签: python beautifulsoup try-catch