数据隐藏在该页面上脚本内的变量中。提取它是一个有点复杂的过程(可能还有其他我没有想到的过程),但这应该可以让你到达那里(使用几个 python 库):
import requests
from bs4 import BeautifulSoup as bs
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:83.0) Gecko/20100101 Firefox/83.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Referer': 'https://www.macrotrends.net/stocks/charts/AAPL/apple/market-cap',
'Upgrade-Insecure-Requests': '1',
'TE': 'Trailers',
}
params = (
('t', 'AAPL'),
)
response = requests.get('https://www.macrotrends.net/assets/php/market_cap.php', headers=headers, params=params)
soup = bs(response.text,'lxml')
#the above gets you the page contents, with the target data; we now look for the scripts on that page
scpt = soup.select('script')
val_dict = {} #intialize a dictionary to house the data at the end
#there are numerous scripts on that page; we need to single out the relevant script
for s in scpt:
if len(s.contents)>0 and 'var chartData = ' in s.contents[0]:
#the above selects the one script with the data; from now on, everything is string and list manipulations to extract the data and append it to the dictionary
vr = s.contents[0].split('var chartData = ')[1].split(';\r')[0]
vals = vr.split(',')
for d,v in zip(vals[::2],vals[1::2]):
val_dict[d.split(':')[1]]= v.split(':')[1].replace('}','')
输出的随机部分:
date: "2005-06-10" value: 29.19
date: "2005-06-13" value: 29.26
date: "2005-06-14" value: 29.34
date: "2005-06-15" value: 30.27
date: "2005-06-16" value: 30.96
等等