【发布时间】:2021-02-15 13:02:29
【问题描述】:
我一直在研究一个抓取工具,一切都很好,直到我尝试将抓取的数据对象推送到一个数组中。
现在我有这个:
exports.parseData = (getLink, getDescription, getPrice, getPicture) => {
const apartments = [];
apartments = {
link: getLink,
descr: getDescription,
price: getPrice,
picture: getPicture,
};
console.log(apartments);
};
但它会抛出错误:TypeError: Assignment to constant variable。
现在我认为这是因为数据是如何发送到解析器的。 以下是我使用此代码时数据的外观:
exports.parseData = (getLink, getDescription, getPrice, getPicture) => {
console.log(getLink);
console.log(getDescription);
console.log(getPrice);
console.log(getPicture);
console.log('-----------------');
};
https://www.sant.ba/nekretnine/nekretnina-3467-dvosoban-stan-sa-liftom-u-samom-centru-grada-63-m2
dvosoban stan sa liftom u samom centru grada , 63 m2
199000.00
https://www.sant.ba/thumb.php?file=photos/3467/3467_1_1563271355.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3689-troiposoban-stan-u-naselju-sunca
Troiposoban stan u naselju Sunca
360000.00
https://www.sant.ba/thumb.php?file=photos/3689/3689_1_1612344465.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3677-trosoban-renoviran-stan-u-naselju-dolac-malta-73-m2
trosoban, renoviran stan u naselju Dolac malta, 73 m2
150000.00
https://www.sant.ba/thumb.php?file=photos/3677/3677_1_1608550332.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3537-cetverosoban-stan-u-novogradnji-sa-prelijepim-pogledom-na-grad-118-38-m2
Četverosoban stan u novogradnji sa prelijepim pogledom na grad, 118,38 m2
538090.37
https://www.sant.ba/thumb.php?file=photos/3537/3537_1_1573217155.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3536-trosoban-stan-u-novogradnji-sa-prelijepim-pogledom-na-grad-105-34-m2
Trosoban stan u novogradnji sa prelijepim pogledom na grad, 105,34 m2
478817.70
https://www.sant.ba/thumb.php?file=photos/3536/3536_2_1573216638.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3535-luksuzni-cetverosoban-stan-na-cobaniji-117-43m2
Luksuzni četverosoban stan na Čobaniji,117,43m2
533772.19
https://www.sant.ba/thumb.php?file=photos/3535/3535_13_1573215220.jpg&maxw=273&maxh=205
-----------------
https://www.sant.ba/nekretnine/nekretnina-3534-trosoban-stan-u-novogradnji-90-m2
trosoban stan u novogradnji, 90 m2
376923.46
https://www.sant.ba/thumb.php?file=photos/3534/3534_8_1573214216.jpg&maxw=273&maxh=205
-----------------
etc....
现在我想要的输出是:拥有一个对象数组,其中对象填充有数据,如上例所示。因此,为每个getLink, getDescription, getPrice, getPicture 创建新对象并将其推送到数组中(就像我在第一个代码块中使用它一样)。
我想得到的输出是:
[
{
link: https://www.sant.ba/nekretnine/nekretnina-3467-dvosoban-stan-sa-liftom-u-samom-centru-grada-63-m2
descr: dvosoban stan sa liftom u samom centru grada , 63 m2
price: 199000.00
picture: https://www.sant.ba/thumb.php?file=photos/3467/3467_1_1563271355.jpg&maxw=273&maxh=205
},
{
link:https://www.sant.ba/nekretnine/nekretnina-3689-troiposoban-stan-u-naselju-sunca
descr: Troiposoban stan u naselju Sunca
price: 360000.00
picture: https://www.sant.ba/thumb.php?file=photos/3689/3689_1_1612344465.jpg&maxw=273&maxh=205
},
etc...
]
这样的事情容易实现吗? 谢谢!
这里是这些变量的发送位置:
const cheerio = require('cheerio');
const axios = require('axios');
const parsing = require('./parseData');
exports.olxScraper = () => {
const url =
'https://www.olx.ba/pretraga?vrsta=samoprodaja&kategorija=23&sort_order=desc&kanton=9&sacijenom=sacijenom&stranica=1';
const getRawData = async () => {
try {
await axios.get(url).then((res) => {
const $ = cheerio.load(res.data);
$('div[id="rezultatipretrage"] > div')
.not('div[class="listitem artikal obicniArtikal i index"]')
.not('div[class="obicniArtikal"]')
.each((index, element) => {
$('span[class="prekrizenacijena"]').remove();
const getLink = $(element)
.find('div[class="naslov"] > a')
.attr('href');
const getDescription = $(element)
.find('div[class="naslov"] > a > p')
.text();
const getPrice = $(element)
.find('div[class="datum"] > span')
.text()
.replace(/\.| ?KM$/g, '')
.replace(' ', '');
const getPicture = $(element)
.find('div[class="slika"] > img')
.attr('src');
parsing.parseData(getLink, getDescription, getPrice, getPicture);
});
});
} catch (error) {
console.log(error);
}
};
getRawData();
};
exports.santScraper = () => {
const url = `https://www.sant.ba/pretraga/prodaja-1/tip-2/cijena_min-20000/stranica-1`;
const getRawData = async () => {
try {
await axios.get(url).then((response) => {
const $ = cheerio.load(response.data);
$('div[class="col-xxs-12 col-xss-6 col-xs-6 col-sm-6 col-lg-4"]').each(
(index, element) => {
const getLink = $(element).find('a[class="re-image"]').attr('href');
const getDescription = $(element).find('a[class="title"]').text();
const getPrice = $(element)
.find('div[class="prices"] > h3[class="price"]')
.text()
.replace(/\.| ?KM$/g, '')
.replace(',', '.');
const getPicture = $(element).find('img').attr('data-original');
/*const getSquaremeters = $(element)
.find('span[class="infoCount"]')
.first()
.text()
.replace(',', '.')
.split('m')[0];
const pricepersquaremeter =
parseFloat(getPrice) / parseFloat(getSquaremeters);
articles[index] = {
id: getLink.substring(42, 46),
link: getLink,
descr: getDescription,
price: Math.round(getPrice),
pictures: getPicture,
sqm: Math.round(getSquaremeters),
ppm2: Math.round(pricepersquaremeter),
};*/
parsing.parseData(getLink, getDescription, getPrice, getPicture);
}
);
});
} catch (error) {
console.log(console.log(error));
}
};
getRawData();
};
this.olxScraper();
this.santScraper();
【问题讨论】:
标签: javascript node.js arrays object