【发布时间】:2020-09-15 19:35:38
【问题描述】:
我正在尝试同时查找 ID 以 "pub" 开头的所有标签,例如:<*div id="pub-1"> 和 <*div id="pub-foo">,以存储这些 ID 结果并将它们插入到数组中。我有以下代码:
'use strict';
const puppeteer = require('puppeteer');
var fs = require('fs');
var data = fs.readFileSync('sites.csv')
.toString()
.split('\n')
.map(e => e.trim())
.map(e => e.split(',').map(e => e.trim()));
(async () => {
for (let i = 0; i < data.length; i++) {
try{
const url = data[i];
const browser = await puppeteer.launch({
headless: false,
args: ['--window-size=1920,1080'],
defaultViewport: null
});
const page = await browser.newPage();
await page.goto(`${url}`, { waitUntil: 'networkidle0' });
console.log('Loading page: '+ data[i]);
const spanVal = await page.$eval('#pub-1', el => el.innerHTML); // my problems lies here
console.log('\n \n \n Log: '+ spanVal)
await browser.close();
} catch(err){
console.log('Error ' + err)
}
}
await browser.close();
})();
也欢迎任何关于代码质量的建议。 :)
【问题讨论】:
标签: javascript node.js arrays web-scraping puppeteer