【发布时间】:2018-07-29 14:16:52
【问题描述】:
我正在使用 Puppeteer 抓取网站页面的表格行。我有代码来抓取内容并将它们分配给表中每个对象的对象。在每个表格行中,我需要在新页面(puppeteer)中打开一个链接,然后抓取特定元素,然后将其分配给同一个对象,并将带有新键的整个对象返回给 puppeteer。 Puppeteer 怎么可能?
async function run() {
const browser = await puppeteer.launch({
headless: false
})
const page = await browser.newPage()
await page.goto('https://tokenmarket.net/blockchain/', {waitUntil: 'networkidle0'})
await page.waitFor(5000)
var onlink = ''
var result = await page.$$eval('table > tbody tr .col-actions a:first-child', (els) => Array.from(els).map(function(el) {
//running ajax requests to load the inner page links.
$.get(el.children[0].href, function(response) {
onlink = $(response).find('#page-wrapper > main > div.container > div > table > tbody > tr > td:nth-child(2)').text()
})
return {
icoImgUrl: el.children[0].children[0].children[0].currentSrc,
icoDate: el.children[2].innerText.split('\n').shift() === 'To be announced' ? null : new Date( el.children[2].innerText.split('\n').shift() ).toISOString(),
icoName:el.children[1].children[0].innerText,
link:el.children[1].children[0].children[0].href,
description:el.children[3].innerText,
assets :onlink
}
}))
console.log(result)
UpcomingIco.insertMany(result, function(error, docs) {})
browser.close()
}
run()
【问题讨论】:
标签: node.js web-scraping puppeteer