【发布时间】:2021-11-01 13:14:28
【问题描述】:
我需要一些建议:我必须创建一个 Python 程序来抓取网站,例如使用 javascript 编码。
我试图让 beautifulsoup 使用元素 window.initialState 但它不起作用;如何从这个 json 中获取“Title”和“longURL”。
这个元素是 url 的一部分
('window.initialState = {"kw":null,"originalKw":"","title":"Vitamins and '
'Supplements","numRecordsFrom":"1","numRecordsTo":"24","numRecordsTotal":"5353","adobeRecords":[{"productVendor":"Swanson '
'Premium","brandUrl":null,"productName":"NAC N-Acetyl '
'Cysteine","productPartNumber":"SW854","productDetails":"600 mg 100 '
'Caps","productPillSize":"O","productPotency":"600 '
'mg","productDiscountPrice":"10.44","offerMaxQty":"99999","rating":4.8,"shippingWeight":0.225,"statusMessage":"In '
'stock","statusId":"I","swansonItem":true,"tooLowToShow":false,"promoDetail":null,"bogo":false,"numReviews":"206","totalQuestions":null,"totalAnswers":null,"servingSize":"1 '
'capsule ","servingPotency":"600 mg '
'","servings":"100 '
'","retailPrice":"10.99","everyDayLowPrice":"10.99","percentDiscount":0.05,"outletmall":false,"discountable":true,"newWebItem":false,"vegan":false,"vegetarian":false,"kosher":false,"glutenFree":false,"organicUSDA":false,"nonGMO":false,"payPalExcluded":false,"easyRefillAllowed":true,"brandRefresh":false,"longURL":"swanson-premium-nac-n-acetyl-cysteine-600-mg-100-caps","masterGroupItem":"","flagMap":{"shoprunner":"shoprunner","highdemand":"highdemand","customerfavorite":"customerfavorite","canadanodisc":"canadanodisc"},"itemCategoriesDTO":{"breadCrumbsDTOList":[{"breadCrumbsList":[{"name":"Health '
'Concerns","seoURL":"/view/health-concerns","searchTerm":false},{"name":"Respiratory","seoURL":"/view/respiratory","searchTerm":false},{"name":"NAC","seoURL":"/view/nac","searchTerm":false}],"topLevelKeyword":"NAC","bottomLevelKeyword":"Health '
'Concerns","singleItem":false}]},"otherSizes":null,"numExtraXlImages":null,"reviewList":null,"suggestedAdobeRecord":null,"suggestedAdobeRecordString":null,"supFactsTemplate":false,"sfHtml":null,"catalogItem":null,"numListReviewsPages":0,"productDescriptionList":null,"bulletsList":null,"discount":0.05,"srEligible":true,"statusUnavailable":false},{"productVendor":"Swanson '
'Premium","brandUrl":null,"productName":"Vitamin D3 - Highest '
'Potency","productPartNumber":"SW1371","productDetails":"5,000 IU (125 mcg) '
'250 Sgels","productPillSize":"D","productPotency":"5,000 IU (125 '
'mcg)","productDiscountPrice":"8.95","offerMaxQty":"99999","rating":4.9,"shippingWeight":0.247,"statusMessage":"In '
'stock","statusId":"I","swansonItem":true,"tooLowToShow":false,"promoDetail":null,"bogo":false,"numReviews":"633","totalQuestions":null,"totalAnswers":null,"servingSize":"1 '
'softgel ","servingPotency":"5,000 IU (125 mcg) '
'","servings":"250 '
'","retailPrice":"11.19","everyDayLowPrice":"11.19","percentDiscount":0.2,"outletmall":false,"discountable":true,"newWebItem":false,"vegan":false,"vegetarian":false,"kosher":false,"glutenFree":false,"organicUSDA":false,"nonGMO":false,"payPalExcluded":false,"easyRefillAllowed":true,"brandRefresh":false,"longURL":"swanson-premium-highest-potency-vitamin-d-3-5000-iu-5000-iu-250-sgels","masterGroupItem":"","flagMap":{"highdemand":"highdemand","shoprunner":"shoprunner","customerfavorite":"customerfavorite"},"itemCategoriesDTO":{"breadCrumbsDTOList":[{"breadCrumbsList":[{"name":"Health '
'Concerns","seoURL":"/view/health-concerns","searchTerm":false},{"name":"Bone '
'Health","seoURL":"/view/bone-health","searchTerm":false},{"name":"Vitamin '
'D","seoURL":"/view/vitamin-d","searchTerm":false},{"name":"Vitamin '
'D3","seoURL":"/view/vitamin-d3","searchTerm":false}],"topLevelKeyword":"Vitamin+D3","bottomLevelKeyword":"Health '
'Concerns","singleItem":false}]},"otherSizes":null,"numExtraXlImages":null,"reviewList":null,"suggestedAdobeRecord":null,"suggestedAdobeRecordString":null,"supFactsTemplate":false,"sfHtml":null,"catalogItem":null,"numListReviewsPages":0,"productDescriptionList":null,"bulletsList":null,"discount":0.2,"srEligible":true,"statusUnavailable":false},{"productVendor":"Swanson '
'Premium","brandUrl":null,"productName":"Quercetin - High '
'Potency","productPartNumber":"SW1671","productDetails":"475 mg 60 Veg '
'Caps","productPillSize":"OO","productPotency":"475 '
'mg","productDiscountPrice":"10.63","offerMaxQty":"3","rating":4.7,"shippingWeight":0.158,"statusMessage":"In '
'stock","statusId":"I","swansonItem":true,"tooLowToShow":false,"promoDetail":null,"bogo":false,"numReviews":"95","totalQuestions":null,"totalAnswers":null,"servingSize":"1 '
'capsule ","servingPotency":"475 mg '
'","servings":"60 '
'","retailPrice":"13.29","everyDayLowPrice":"13.29","percentDiscount":0.2,"outletmall":false,"discountable":true,"newWebItem":false,"vegan":false,"vegetarian":true,"kosher":false,"glutenFree":false,"organicUSDA":false,"nonGMO":false,"payPalExcluded":false,"easyRefillAllowed":true,"brandRefresh":false,"longURL":"swanson-premium-high-potency-quercetin-475-mg-60-veg-caps","masterGr
page = requests.get(url)
soup = bsoup(page.text, 'html.parser')
script = soup.find_all('script')
pprint(script)
所以,我尝试使用
data = re.search(r'window.initialState = {.*}', html_data)
但它无法转换为 json
【问题讨论】:
标签: python html beautifulsoup