【发布时间】:2015-06-07 11:36:47
【问题描述】:
我有一个节点模块,我在其中将 .csv 文件解析为 mongodb 文档的集合。
对于每条传入记录,我添加了一些额外的元数据字段,用于描述该记录。
由于某种原因,只有第一个插入的文档有这些字段,其余的没有。 添加这些字段的正确方法是什么,以便将它们保留在每个插入的文档中?
这是我的代码:
exports.excelFileParser = function(fileName, tabName, metadataFields){
var assert = require('assert');
console.log(metadataFields);
if(typeof require !== 'undefined') XLSX = require('xlsx');
var mongodb = require('mongodb');
var _ = require('underscore');
var fs = require('fs');
var Converter=require("csvtojson").core.Converter;
var distinctDataFields;
var MongoClient = mongodb.MongoClient;
var url = 'mongodb://localhost:27017/datamanager-03-test';
var workbook = XLSX.readFile('./uploads/' + fileName);
var worksheet = workbook.Sheets[tabName];
var metaFields = metadataFields;
var json_conversion = XLSX.utils.sheet_to_json(worksheet);
var csv_conversion = XLSX.utils.sheet_to_csv(worksheet);
var csv_lines = csv_conversion.split('\n');
var dirtyHeaderLine = csv_lines[0];
var cleanHeaderLine = dirtyHeaderLine.replace(/\./g,"")
.replace(/ /g,"")
.replace(/"'"/g,"")
.replace(/","/g,"")
.replace(/"\/"/g,"")
.replace(/"#"/g,"")
.replace(/\(/g,"")
.replace(/\)/g,"");
cleanHeaderLine = cleanHeaderLine.replace(/,+$/, "");
console.log(cleanHeaderLine);
csv_lines[0] = cleanHeaderLine;
var newCsvLines = [];
csv_lines.forEach(function(line){
if (line.charAt(0) == ',') { line = line.slice(1); }
var currentIndex = csv_lines.indexOf(line);
if(currentIndex === 0){
newCsvLines.push(line);
}
else {
newCsvLines.push("\n" + line);
}
});
var csvFileName = './uploads/' + fileName + '_clean.csv';
fs.writeFileSync(csvFileName, newCsvLines);
var fileStream=fs.createReadStream(csvFileName);
//new converter instance
var csvConverter=new Converter({constructResult:true});
//end_parsed will be emitted once parsing finished
csvConverter.on("end_parsed",function(jsonObj){
//console.log(jsonObj); //here is your result json object
makeRecords(jsonObj);
});
//read from file
fileStream.pipe(csvConverter);
function makeRecords(result){
console.log(result.length);
MongoClient.connect(url, function (err, db) {
if (err) {
console.log('Unable to connect to the mongoDB server. Error:', err);
} else {
console.log('Connection established to', url);
var categoryName = metaFields.subjectCategory;
var collectionName = categoryName.replace(/ /g,"_");
db.open(function(err, client){
client.createCollection(collectionName, function(err, col) {
});
var collection = db.collection(collectionName);
for(var i =0;i < result.length; i++){
//console.log(result[i]);
var dataRecord = result[i];
dataRecord.OriginalDocumentName = fileName;
dataRecord.DocumentAuthor = metaFields.documentAuthor;
dataRecord.TabName = tabName;
dataRecord.SubjectCategory = metaFields.subjectCategory;
dataRecord.Subject = metaFields.subject;
dataRecord.DateDocumentProduced = metaFields.dateDocumentProduced;
dataRecord.DateDocumentReceived = metaFields.dateDocumentReceived;
dataRecord.DocumentSubmitter = metaFields.documentSubmitter;
dataRecord.DocumentReviewer = metaFields.documentReviewer;
dataRecord.OriginalSource = metadataFields.originalSource,
dataRecord.DataVersion = metadataFields.dataVersion,
dataRecord.DataFields = newCsvLines[0];
var inserted = 0;
for(var i = 0;i < result.length; i++) {
var dataRecord = result[i];
collection.insert(dataRecord, function (err) {
if (++inserted == result.length) {
console.log("finished");
db.close();
}
});
}
}
});
//console.log("finished");
//db.close();
}
});
}
};
【问题讨论】:
标签: javascript node.js mongodb mean-stack