【发布时间】:2019-09-06 05:37:54
【问题描述】:
所以基本上我使用 puppeteer 下载 CSV 文件,然后读取 CSV 文件,然后使用我的命名结构创建一个新文件,并将该数据存放在 Dropbox 文件夹中。对于不同的帐户,此过程会一次又一次地重复。问题是由于 node.js 的异步行为,它开始下载另一个文件并且不等待文件写入过程完成。由于这个问题,文件被存储在错误的文件夹中,因为文件夹名称也在循环中传递。我该如何解决这个问题?
const puppeteer = require('puppeteer');
var datetime = require('node-datetime');
var dt = datetime.create();
dt.offsetInDays(-2);
var start_date = dt.format('Y-m-d');
dt.offsetInDays(1);
var end_date = dt.format('Y-m-d');
dt.offsetInDays(0);
var date = dt.format('Y-m-d');
var fs = require('fs'),
path = require('path'),
_ = require('underscore');
var user_details = [
{
username : 'user1',
password : 'password1',
folder_name : 'folder1'
},
{
username : 'user2',
password : 'password2',
folder_name : 'folder2'
},
{
username : 'user3',
password : 'password3',
folder_name : 'folder3'
},
{
username : 'user4',
password : 'password4',
folder_name : 'folder4'
}];
(async () => {
console.log(user_details.length);
for( var i=0; i<user_details.length; i++){
var username = user_details[i]['username'];
var password = user_details[i]['password'];
var folder_name = user_details[i]['folder_name'];
console.log(username);
console.log(folder_name);
const browser = await puppeteer.launch({ headless: false});
const page = await browser.newPage();
await page.goto('url', {waitUntil: 'networkidle2'});
await page.waitFor('input[name=name]');
await page.$eval('input[name=name]', (el, _username) => el.value = _username, username);
await page.$eval('input[name=password]', (el, _password) => el.value = _password, password);
await page.click('input[type=submit]');
await page.waitFor('.breadcrumb')
await page.goto('url', {waitUntil: 'networkidle2'});
await page.waitForSelector('.csv_toggle');
await page.click('.action');
await page.click('.fa-files-o');
await page.waitFor(10000);
var dir = '/users/user/Downloads/';
var files = fs.readdirSync(dir);
// use underscore for max()
var file_name = _.max(files, function (f) {
var fullpath = path.join(dir, f);
// ctime = creation time is used
// replace with mtime for modification time
return fs.statSync(fullpath).ctime;
});
var filePath = path.join('/users/user/Downloads/',file_name);
fs.readFile(filePath, {encoding: 'utf-8'}, function(err,data){
if (!err) {
lines = data;
fs.writeFile("/users/user/Dropbox/Puppeteer/"+folder_name+"/data_"+folder_name+"_"+date+"_.csv", data, function(err) {
if(err) {
console.log(err);
}
console.log("The file was saved!");
console.log('Closing Browser');
browser.close();
});
} else {
console.log('-----SOMETHING IS WRONG-----');
}
});
await page.waitFor(10000);
}
})();
上面的代码工作得很好,因为我使用了 10 秒的延迟来完成编写过程,但这是一种 hack,当互联网连接速度慢或机器速度慢时肯定不会工作。我想要的是它应该首先等待文件完全下载,然后等待文件写入过程完成,然后为下一个用户继续循环。
【问题讨论】:
标签: node.js automation google-chrome-devtools puppeteer google-chrome-headless