import requests
from bs4 import BeautifulSoup
def get_players(column):
players = []
for table in column.select('table:has(th:contains("Targetes"))'):
for row in table.select('tr:has(td)'):
tds = [td.get_text(strip=True) for td in row.select('td')]
players.append([row.span.text, *tds[1:], 'Yellow' if row.select_one('.groga-s') else 'Red'])
return players
url = 'https://www.fcf.cat/acta/2021/futbol-11/preferent-infantil/grup-1/pi/atletic-sant-just-f-c-a/pi/barcelona-fc-b'
soup = BeautifulSoup(requests.get(url, verify=False).content, 'html.parser')
main_columns = soup.select('.col-md-4.p-0_ml')
players = {'Team Home': get_players(main_columns[0]), 'Team Away': get_players(main_columns[2])}
print(players)
打印:
{'Team Home': [['17', 'KOLOMIETS , FYODOR', "22'", 'Red'], ['18', 'RUGGIERO , ANTONIO', "60'", 'Yellow']], 'Team Away': [['11', 'SO DELGADO PINTO, SIDNEY JOSE', "64'", 'Yellow']]}
对于其中一个团队没有收到卡片的比赛,例如url = 'https://www.fcf.cat/acta/2021/futbol-11/preferent-infantil/grup-1/pi/castelldefels-ue-a/pi/rapitenca-ue-b',它会打印:
{'Team Home': [], 'Team Away': [['12', 'BELTRAN SOSPEDRA, MARC', "60'", 'Yellow']]}