我认为你收集的那张桌子不合适。
我发现在结果页面中,至少有 5 个表。
所以你必须为包含的数据表找到正确的选择器。
右边的选择器是table[role="grid"]。并且您可以在不使用 tabletojson 的情况下抓取搜索结果中的所有页面。我制作了这个脚本来将搜索结果表数据写入 CSV 文件。
const puppeteer = require ('puppeteer')
const fs = require ('fs-extra')
const selectElementID_first = 'formHome:tabOperadora:solbSituacoesPrincipais'
const selectElementID_second = 'formHome:tabOperadora:solbAbrangenciasGeograficas'
const selectedOption_first = 'Liberada'
const selectedOption_second = 'Nacional'
const saveFileCSV = 'porpiano.csv'
let lastPage = 0
;(async () => {
const browser = await puppeteer.launch ({
headless : false,
devtools : false
})
const [page] = await browser.pages ()
const open = await page.goto ('http://www.ans.gov.br/ConsultaPlanosConsumidor/', { waitUntil: 'networkidle0', timeout: 0 })
const porPianoClick = await page.evaluate ( () => document.querySelector('a[href="#formHome:tabOperadora:panelPorPlano"]').click() )
// FIRST SELECT OPTION
while ( !await page.evaluate( selectElementID_first => document.querySelector(`div[id="${selectElementID_first}"]`).classList.contains('ui-state-focus'), selectElementID_first ) ) {
await page.keyboard.press('Tab')
await page.waitFor(300)
}
await page.keyboard.press('Space')
while ( await page.evaluate( selectElementID_first => document.querySelector(`div[id="${selectElementID_first}_panel"]`).style.display === 'none', selectElementID_first ) ) {
await page.waitFor(300)
}
while ( await page.evaluate( (selectElementID_first, selectedOption_first) => document.querySelector(`div[id="${selectElementID_first}_panel"] ul > li.ui-state-highlight`).innerText !== selectedOption_first, selectElementID_first, selectedOption_first ) ) {
await page.keyboard.press('ArrowDown')
await page.waitFor(300)
}
await page.keyboard.press('Enter')
await page.waitFor(1000)
// SECOND SELECT OPTION
while ( !await page.evaluate( selectElementID_second => document.querySelector(`div[id="${selectElementID_second}"]`).classList.contains('ui-state-focus'), selectElementID_second ) ) {
await page.keyboard.press('Tab')
await page.waitFor(300)
}
await page.keyboard.press('Space')
while ( await page.evaluate( selectElementID_second => document.querySelector(`div[id="${selectElementID_second}_panel"]`).style.display === 'none', selectElementID_second ) ) {
await page.waitFor(300)
}
while ( await page.evaluate( (selectElementID_second, selectedOption_second) => document.querySelector(`div[id="${selectElementID_second}_panel"] ul > li.ui-state-highlight`).innerText !== selectedOption_second, selectElementID_second, selectedOption_second ) ) {
await page.keyboard.press('ArrowDown')
await page.waitFor(300)
}
await page.keyboard.press('Enter')
await page.waitFor(1000)
await page.evaluate( () => document.querySelector('div[id="formHome:tabOperadora:panelPorPlano"] button[type="submit"]').click() )
await fs.writeFile ( saveFileCSV, 'Número do Registro / Código do Plano, Nome Comercial do Plano, Segmentação Assistencial, Tipo de Contratação, Abrangência Geográfica, Tipo de Plano, Comercialização\n' )
const loadTableGrid = async () => {
// WAIT FOR PAGE LOADING
await page.waitForSelector('span.ui-paginator-current', {timeout: 0})
while ( lastPage === await page.evaluate ( () => document.querySelector('span.ui-paginator-current').innerText.split(' de ')[0] ) ) {
await page.waitFor(250)
}
await page.waitForSelector('table[role="grid"]', {timeout: 0})
// SCRAPE DATA
var tableDataPage = await page.evaluate( () => {
var tableDataPage = []
var tableDataColumn1 = []
var tableDataColumn2 = []
var tableDataColumn3 = []
var tableDataColumn4 = []
var tableDataColumn5 = []
var tableDataColumn6 = []
var tableDataColumn7 = []
document.querySelectorAll('td[role="gridcell"]:nth-of-type(1)').forEach( tableData => tableDataColumn1.push( tableData.innerText ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(2)').forEach( tableData => tableDataColumn2.push( `"${tableData.innerText}"` ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(3)').forEach( tableData => tableDataColumn3.push( tableData.innerText ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(4)').forEach( tableData => tableDataColumn4.push( tableData.innerText ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(5)').forEach( tableData => tableDataColumn5.push( tableData.innerText ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(6)').forEach( tableData => tableDataColumn6.push( tableData.innerText ) )
document.querySelectorAll('td[role="gridcell"]:nth-of-type(7)').forEach( tableData => tableDataColumn7.push( tableData.innerText ) )
for ( let num in tableDataColumn1 ) {
tableDataPage[num] =
[
tableDataColumn1[num],
tableDataColumn2[num],
tableDataColumn3[num],
tableDataColumn4[num],
tableDataColumn5[num],
tableDataColumn6[num],
tableDataColumn7[num]
]
}
return tableDataPage
})
// WRITE COLLECTED DATA TO CSV FORMAT
for ( let row in tableDataPage ) {
await fs.appendFile ( saveFileCSV, `${tableDataPage[row]}\n` )
await page.waitFor(100)
}
var paginatorValue = await page.evaluate ( () => document.querySelector('span.ui-paginator-current').innerText.split(' de ') )
lastPage = paginatorValue[0]
if ( paginatorValue[0] !== paginatorValue[1] ) {
await page.evaluate ( () => document.querySelector('a[aria-label="Next Page"]').click() )
await loadTableGrid ()
} else {
console.log ('SCRAPE ALL TABLE DATA FINISHED\nCLOSING PUPPETEER BROWSER!')
await browser.close ()
}
}
await loadTableGrid ()
})()