首先你可以只取body标签内的所有script,然后你可以使用json.loads()来访问你想要的数据。
你可以试试这个:
代码:
import requests
from bs4 import BeautifulSoup
import json
URL = "https://paytm.com/movies/coimbatore/"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html.parser')
movies_showing_now_div = soup.find('body').find_all('script',attrs={"type":"application/ld+json"})
movies = []
for script in movies_showing_now_div:
jsonscript = json.loads(script.text)
if jsonscript['@type'] and jsonscript['@type'] == 'Movie':
movie = {
'title': jsonscript['name'],
'genre': jsonscript['genre']
}
movies.append(movie)
print(movies)
结果:
[{'genre': 'drama', 'title': ' Vaanam Kottatum'},
{'genre': 'drama', 'title': 'Seeru'},
{'genre': 'drama, thriller', 'title': 'Psycho'},
{'genre': 'action, adventure, crime', 'title': 'Birds of Prey'},
{'genre': 'horror, romance', 'title': 'Malang'},
{'genre': 'action, drama', 'title': 'Darbar'},
{'genre': 'drama', 'title': '1917'},
{'genre': 'drama, comedy', 'title': 'Naadodigal 2'},
{'genre': 'drama, historical, romantic', 'title': 'Shikara'},
{'genre': 'drama', 'title': 'Jaanu'},
{'genre': 'drama', 'title': 'Ala Vaikunthapurramuloo'},
{'genre': 'drama', 'title': 'Little Women'},
{'genre': 'action, drama', 'title': 'Pattas'},
{'genre': 'thriller, crime, mystery', 'title': 'Anjaam Pathiraa'},
{'genre': 'action, thriller, crime', 'title': 'Bad Boys For Life'},
{'genre': 'drama', 'title': 'Anveshanam'},
{'genre': 'drama', 'title': 'Dagaalty'},
{'genre': 'horror, comedy', 'title': 'Sandimuni '},
{'genre': 'action, thriller, crime', 'title': 'Bad Boys For Life'}]