【问题标题】:JavaScript heap out of memory increase node memory or optimize my codeJavaScript heap out of memory 增加节点内存或优化我的代码
【发布时间】:2017-04-24 23:50:03
【问题描述】:

我的 Node.js 应用程序中有一个 JavaScript 堆内存不足。我正在尝试通过一次调用在 MongoDB 中插入 408 000 个数据。我有两个循环。第一个循环从 1 到 24,第二个(在第一个循环内)从 1 到 17 000。这些数据是 NetCDF 文件的结果。我正在解析这个文件中的数据,我正在构建模型对象并将这些数据插入 MongoDB。

我在 StackOverflow 上看到了一些关于这个问题的帖子,然后我发现我可以使用 --max_old_space_size 增加节点内存。但我不知道这是否是好方法。也许你有一些建议来优化我的代码?

这是我的循环:

for (var time_pos = 0; time_pos < 24; time_pos++) {

    // This array contains 17 000 data
    var dataSliced = file.root.variables['pm10_conc'].readSlice(
        time_pos, time_size,
        level_pos, level_size,
        lat_from, lat_size,
        lng_from, lng_size
    );

    // Loop : 0 to 17 000
    for (var i = 0; i < dataSliced.length; i++) {
        var pollution = new Pollution();

        latitude   = current_lat;
        longitude  = currrent_lng;
        country    = country_name;
        model      = model_name;
        data_type  = type_name;
        level      = 0;
        datetime   = date;
        pollutants.pm10.description = description;
        pollutants.pm10.units = units;
        pollutants.pm10.concentration = dataSliced[i];

        pollution.save(function(err){
            if (err) throw err;
            console.log("Data saved");
        })
    }
}

这是我的错误:

<--- Last few GCs --->

   56782 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1943.5 / 0.0 ms [allocation failure] [GC in old space requested].
   58617 ms: Mark-sweep 1366.6 (1436.9) -> 1366.6 (1436.9) MB, 1834.9 / 0.0 ms [allocation failure] [GC in old space requested].
   60731 ms: Mark-sweep 1366.6 (1436.9) -> 1368.6 (1417.9) MB, 2114.3 / 0.0 ms [last resort gc].
   62707 ms: Mark-sweep 1368.6 (1417.9) -> 1370.7 (1417.9) MB, 1975.8 / 0.0 ms [last resort gc].


<--- JS stacktrace --->

==== JS stack trace =========================================

Security context: 0x3a7c3fbcfb51 <JS Object>
    1: fnWrapper [/var/www/html/Project/node_modules/hooks-fixed/hooks.js:185] [pc=0x6ccee7825d4] (this=0x3a7c3fbe6119 <JS Global Object>)
    2: fn [/var/www/html/Project/node_modules/mongoose/lib/schema.js:~250] [pc=0x6ccee7d8ffe] (this=0xd29dd7fea11 <a model with map 0x994a88e5849>,next=0x1cbe49858589 <JS Function fnWrapper (SharedFunctionInfo 0x3d8ecc066811)>,done=0x1cbe498586...

FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - JavaScript heap out of memory
 1: node::Abort() [node]
 2: 0x1098b2c [node]
 3: v8::Utils::ReportApiFailure(char const*, char const*) [node]
 4: v8::internal::V8::FatalProcessOutOfMemory(char const*, bool) [node]
 5: v8::internal::Factory::NewTransitionArray(int) [node]
 6: v8::internal::TransitionArray::Insert(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Map>, v8::internal::SimpleTransitionFlag) [node]
 7: v8::internal::Map::CopyReplaceDescriptors(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::DescriptorArray>, v8::internal::Handle<v8::internal::LayoutDescriptor>, v8::internal::TransitionFlag, v8::internal::MaybeHandle<v8::internal::Name>, char const*, v8::internal::SimpleTransitionFlag) [node]
 8: v8::internal::Map::CopyAddDescriptor(v8::internal::Handle<v8::internal::Map>, v8::internal::Descriptor*, v8::internal::TransitionFlag) [node]
 9: v8::internal::Map::CopyWithField(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::FieldType>, v8::internal::PropertyAttributes, v8::internal::Representation, v8::internal::TransitionFlag) [node]
10: v8::internal::Map::TransitionToDataProperty(v8::internal::Handle<v8::internal::Map>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
11: v8::internal::LookupIterator::PrepareTransitionToDataProperty(v8::internal::Handle<v8::internal::JSObject>, v8::internal::Handle<v8::internal::Object>, v8::internal::PropertyAttributes, v8::internal::Object::StoreFromKeyed) [node]
12: v8::internal::StoreIC::LookupForWrite(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
13: v8::internal::StoreIC::UpdateCaches(v8::internal::LookupIterator*, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
14: v8::internal::StoreIC::Store(v8::internal::Handle<v8::internal::Object>, v8::internal::Handle<v8::internal::Name>, v8::internal::Handle<v8::internal::Object>, v8::internal::Object::StoreFromKeyed) [node]
15: v8::internal::Runtime_StoreIC_Miss(int, v8::internal::Object**, v8::internal::Isolate*) [node]
16: 0x6ccee4092a7
Aborted
[nodemon] app crashed - waiting for file changes before starting...

你知道是否有办法优化我的代码或者增加节点内存是最好的方法吗?

编辑

我有一个可行的解决方案。我尝试使用猫鼬insertMany(),但我再次遇到致命错误分配失败。

然后我删除了new Pollution 并将我的数据推送到一个数组中。之后我像这样使用collection.insertasync each

var pollution = [];   

for (var time_pos = 0; time_pos < 24; time_pos++) {

    // This array contains 17 000 data
    var dataSliced = file.root.variables['pm10_conc'].readSlice(
        time_pos, time_size,
        level_pos, level_size,
        lat_from, lat_size,
        lng_from, lng_size
    );

    async.each(dataSliced, function (item, next){

        pollution.push({
            'longitude' :current_lat,
            'latitude'  :current_lng,
            'country'   :country_name,
            'model'     :model_name,
            'data_type' :type_name",
            'level'     :0,
            'datetime'  : date,
            'pollution': {
                'pm10': {
                    'description': description,
                    'units': units,
                    'concentration': item
                }
            }
        });

    }
}

Pollution.collection.insert(pollution, function(err, docs){
    if (err) throw err;

    console.log("Data saved");
});

如果您有更好的解决方案,可以发布您的答案。

【问题讨论】:

  • 听起来您需要分批进行,而不是一次全部完成。
  • 第二个循环执行了 408000 (24*17000) 次,你应该从改变它开始。
  • @Daniel A. White 你的意思是我必须做一个 cron 吗?
  • 不,我的意思是在程序中。
  • 有例子可以批量做吗?

标签: javascript node.js mongodb memory optimization


【解决方案1】:

我希望这对您和其他人有所帮助... :-)

我一直在研究什么是最好的将数据导入 Mongodb。我已经使用了Mongoimport 以及带有insertMany method 的Mongoose(使用Native Mongodb)。我读过最好keep the batch sizes to about 100 for best performance。这是我使用 insertMany 的解决方案。使用 Mongoimport 非常简单(只需一行代码)。所以我觉得没必要在这里发帖。

在我的示例中,首先将 602.198 条记录解析为对象数组,然后成功导入到 Mongodb。

将解析后的对象导入Mongodb需要一些内存,因此通常需要使用以下命令让节点使用更多内存,可以读取更多here

node --max_old_space_size=8000  partImportNew.js

为了提高效率,我将对象数组拆分为多个批次,并使用Promise.all,它会在可迭代参数中的所有承诺都已解决时解决。

如果您有较大的文件并且即使您按节点增加内存限额也耗尽了内存,那么您可以拆分文件。删除之前的标头,然后将它们添加到 csv 解析器中。

分割文件:

$ split -l numberoflines filename
ex. split -l 1000000 term2.csv

可以说 term2.csv 有 5.000.001 行并且没有标题。 从上面的例子你会得到 6 个文件,5 个文件,每个文件有 100 万行,1 个文件有 1 行。

看看我是如何在 mongodb.js 文件中的 function bulkImportToMongo 中解决它的。

控制台

➜  database git:(master) ✗ node --max_old_space_size=8000  partImport.js
Connected to db!
Time to parse file: : 5209.325ms
Disconnected from db!
Time to import parsed objects to db: : 153606.545ms
➜  database git:(master) ✗

parseCSV.js

const csv = require("fast-csv");

function promiseCSV(filePath, options) {
  return new Promise((resolve, reject) => {
    console.time("Time to parse file");
    var records = [];
    csv
      .fromPath(filePath, options)
      .on("data", record => {
        records.push(record);
      })
      .on("end", () => {
        console.timeEnd("Time to parse file");
        resolve(records);
      });
  });
}

module.exports = promiseCSV;

mongodb.js

const mongoose = require("mongoose");
mongoose.Promise = global.Promise;

function connectToMongo(databaseName) {
  mongoose.connect(`mongodb://localhost:27017/${databaseName}`, {
    keepAlive: true,
    reconnectTries: Number.MAX_VALUE,
    useMongoClient: true
  });
  console.log("Connected to db!");
}

function disconnectFromMongo() {
  mongoose.disconnect();
  console.log("Disconnected from db!");
}

function bulkImportToMongo(arrayToImport, mongooseModel) {
  const Model = require(`../../../models/${mongooseModel}`);
  const batchSize = 100;
  let batchCount = Math.ceil(arrayToImport.length / batchSize);
  let recordsLeft = arrayToImport.length;
  let ops = [];
  let counter = 0;
  for (let i = 0; i < batchCount; i++) {
    let batch = arrayToImport.slice(counter, counter + batchSize);
    counter += batchSize;
    ops.push(Model.insertMany(batch));
  }
  return Promise.all(ops);
}

module.exports.bulkImportToMongo = bulkImportToMongo;
module.exports.connectToMongo = connectToMongo;
module.exports.disconnectFromMongo = disconnectFromMongo;

partImport.js

const path = require("path");
const parseCSV = require("./helpers/parseCSV");
const {
  connectToMongo,
  disconnectFromMongo,
  bulkImportToMongo
} = require("./helpers/mongodb");

const filePath = path.join(__dirname, "../../data/parts.csv");
const options = {
  delimiter: ";",
  noheader: true,
  headers: [
    "facility",
    "partNumber",
    "partName",
    "partDescription",
    "netWeight",
    "customsTariff"
  ]
};

connectToMongo("autoMDM");
parseCSV(filePath, options)
  .then(records => {
    console.time("Time to import parsed objects to db");
    return bulkImportToMongo(records, "parts.js");
  })
  /*   .then(result =>
    console.log("Total batches inserted: ", result, result.length)
  ) */
  .then(() => {
    disconnectFromMongo();
    console.timeEnd("Time to import parsed objects to db");
  })
  .catch(error => console.log(error));

【讨论】:

    猜你喜欢
    • 2019-06-23
    • 2017-05-21
    • 2018-08-01
    • 2012-03-09
    • 2019-06-18
    • 1970-01-01
    • 2021-09-02
    • 1970-01-01
    • 2022-12-12
    相关资源
    最近更新 更多