【发布时间】:2020-04-17 11:25:38
【问题描述】:
目标是启动 html 页面,输入来自 booking.com 的 url,单击按钮,然后在控制台中返回抓取的酒店名称、评级等。
到目前为止,单击按钮时它不会返回任何内容。 它在 URL 被硬编码时起作用,但它以这种形式表示“声明了 main,但从不读取值”。我是否错误地调用了该函数?我还是 puppeteer 的新手,也许我忽略了什么?
这里是 app.js
function main()
{
var Url = document.getElementById('inputUrl').value
const puppeteer = require('puppeteer');
let bookingUrl = Url;
(async () => {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.goto(bookingUrl);
// get hotel details
let hotelData = await page.evaluate(() => {
let hotels = [];
// get the hotel elements
let hotelsElms = document.querySelectorAll('div.sr_property_block[data-hotelid]');
// get the hotel data
hotelsElms.forEach((hotelelement) => {
let hotelJson = {};
try {
hotelJson.name = hotelelement.querySelector('span.sr-hotel__name').innerText;
hotelJson.reviews = hotelelement.querySelector('div.bui-review-score__text').innerText;
hotelJson.rating = hotelelement.querySelector('div.bui-review-score__badge').innerText;
if(hotelelement.querySelector('div.bui-price-display__value.prco-inline-block-maker-helper'))
{
hotelJson.price = hotelelement.querySelector('div.bui-price-display__value.prco-inline-block-maker-helper').innerText;
}
hotelJson.imgUrl = hotelelement.querySelector('img.hotel_image').attributes.src.textContent;
}
catch (exception){
}
hotels.push(hotelJson);
});
return hotels;
});
console.dir(hotelData);
})();
}
这里是 index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<script src="app.js" type="text/javascript"></script>
<title></title>
<meta name="description" content="">
<link rel="stylesheet" href="">
</head>
<input id = "inputUrl" type="text" placeholder = "type url here"/>
<button id = "button" button onclick="main();"> click</button>
<body>
<script src="" async defer></script>
</body>
</html>
【问题讨论】:
标签: javascript html node.js web-scraping puppeteer