【发布时间】:2021-04-21 21:28:06
【问题描述】:
如何删除“window.__INITIAL_STATE__=”并从那里获取数据?
我也不需要来自 "window.__CONFIG__=" 或 "window.__USER_ID__="
的数据最好是 JSON。
我有这个代码:
def html():
response = urllib.request.urlopen(url)
soup = BeautifulSoup(response.read(), "html.parser")
results = soup.select_one("script:-soup-contains('user_id')").string
print(results)
我得到了这样的回应:
window.__INITIAL_STATE__={"room_info":{"my_id":45316761,"user_id":45316761,"loginname":"ahmad Talent????","status":10,"avatar":"https://nono-vpic-dl.akamaized.net/download/file/fra/nonolive-fra/nnphotos/45316761/7acc36198d61d535b556b5c9da4722a8.jpg","intro":"","anchor_group":["official_idol"],"anchor_intro":"تحدي الوكالة الاول الي اليوم الساعة ١٠ المسا بتوقيت سوريا","anchor_live":13,"pic":"https://nono-vpic-dl.akamaized.net/download/file/fra/nonolive-fra/imgs/3f95b611-bce2-4090-a354-74884b986105.jpg","fans":204,"exp":20673.649999999998,"level":49,"location":"Iran","country":"Iran","finance_country":"Syria","user_cluster":"aws_singapore"},"competition_live_room_list":[18433562,23281295,34344391,47277372,47256825,54286750,47255406,47353920,47256699,47795451,47262679,8646078,23419136,21453881,29813714,29710653,47262066,29368045,13938893,48391752,14673269,29333298,18485050,20545338,14485392,19220336,14597081,32203926,32284062,15130785,47543990,8623919,34033944,34030962,34099216,34403020,19973173,12376400,35225245,35303307,35277251,35357151,35397160,35157680,35486592,35517567,35517948,35530929,26480251,35541332,19267293,35791502,17367640,35003909,35857349,35684312,36294570,35858792,8181931,8181894,8181904,20519246,100008,36155173,36346677,36641437,36641273,36639480,36641374,36639155,36643700,36671965,37993657,37209374,31627285,37397273,38191007,34969242,8021848,37256644,38890560,35679023,35963867,35678785,35664149,35678453,36146858,38566654,47623047,38565866,33489767,38566762,40605811,37683851,36172817,36114494,37669650,40589540,36277491,41085963,38965463,38575592,39590981,36771882,33514817,37409947,37557443,38814672,36878613,39786744,38985315,40227952,39768448,39597105,2880999,745773,43248166,40693308,38018122,36730051,37930534,42377740,36912971,38283433,47397760,48544218,47928342,47288183,34803161,47353280,47660138,47851530,36240127,41677978,31433574,34134849,48223842,44517516,41686787,44084034,32136191,30911886,32764558],"default_title":"Nonolive - Game and Video Live Streaming"};
window.__CONFIG__={"mwebsvr":"https://mob.nonolive.com","pcwebsvr":"https://www.nonolive.com","dispatchersvr":"https://disp.nonolive.com","dispatcher_env":"","cookie_domain":"nonolive.com","front_end_env":"production"};
window.__USER_ID__= 45316761 ;
我知道可以使用正则表达式来完成,但我不知道该怎么做。这对我来说太复杂了。有没有其他办法?
如果没有,如果有人可以帮助我解决正则表达式部分,我将不胜感激。
【问题讨论】:
-
可以分享网址吗?
-
您需要执行 JavaScript。使用 Selenium WebDriver 而不是 BS4。
-
@AndrejKesely nonolive.com/45316761
标签: python python-3.x beautifulsoup