【问题标题】:What is the fastest method to merge a number of files into a file in c#?在c#中将多个文件合并到一个文件中的最快方法是什么?
【发布时间】:2016-08-11 02:13:44
【问题描述】:

我处理大文件(容量至少为 500MB)以通过 c# 拆分和合并。

我必须将文件拆分成数千个文件,将这些文件分类到一些组中,然后按每个组进行合并。

文件的最小数量为 10,000 个。

我使用 Stream.CopyTo() 方法实现了合并功能。这是其中的主要部分。

using (Stream writer = File.OpenWrite(outputFilePath))
{
      int fileNum = filePaths.Count();
      for (int i = 0; i < fileNum; i++)
      {
           using (Stream reader = File.OpenRead(filePaths.ElementAt(i)))
           { reader.CopyTo(writer); }
       }
}

我已经测试了我的程序,将 500MB 分成 2 组的 17000 个文件,并将每组 8500 个文件合并到一个文件中。

合并部分大约需要 80 秒。我认为与拆分大约需要 15~20 秒的相同文件相比,这相当慢

有没有比我的代码更快的方法?

【问题讨论】:

  • 为什么不直接使用 Stream.CopyTo() 方法?msdn.microsoft.com/en-us/library/…
  • 我使用了 Stream.CopyTo() 方法。但是根据我的测试结果,我认为它很慢。所以我要求比 CopyTo() 方法更快的方法。
  • 我写了两种方法作为答案试试吧!!祝你好运

标签: c# .net file-io merge


【解决方案1】:

您的代码看起来不错,但 ElementAt 是代码异味。将其转换为数组并改用[i]。如果你有 10K 个元素,我敢肯定你在浪费很多时间。

【讨论】:

  • ;D. ElementAt 真是个混蛋。第二次它咬我时我拒绝使用它。从那以后我就没有了。
【解决方案2】:

为什么不直接使用 Stream.CopyTo() 方法?

     private static void CombineMultipleFilesIntoSingleFile(string inputDirectoryPath, string inputFileNamePattern, string outputFilePath)
{
    string[] inputFilePaths = Directory.GetFiles(inputDirectoryPath, inputFileNamePattern);
    Console.WriteLine("Number of files: {0}.", inputFilePaths.Length);
    using (var outputStream = File.Create(outputFilePath))
    {
        foreach (var inputFilePath in inputFilePaths)
        {
            using (var inputStream = File.OpenRead(inputFilePath))
            {
                // Buffer size can be passed as the second argument.
                inputStream.CopyTo(outputStream);
            }
            Console.WriteLine("The file {0} has been processed.", inputFilePath);
        }
    }
}

分块做:

const int chunkSize = 2 * 1024; // 2KB
var inputFiles = new[] ;
using (var output = File.Create("output.dat"))
{
    foreach (var file in inputFiles)
    {
        using (var input = File.OpenRead(file))
        {
            var buffer = new byte[chunkSize];
            int bytesRead;
            while ((bytesRead = input.Read(buffer, 0, buffer.Length)) > 0)
            {
                output.Write(buffer, 0, bytesRead);
            }
        }
    }
}

【讨论】:

  • 感谢您的回答!
【解决方案3】:

也许尝试压缩文件?

using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.IO.Compression;

class Program {
    static void SaveCompressedFile(string filename, string data) {
        FileStream fileStream = new FileStream(filename, FileMode.Create, FileAccess.Write);
        GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Compress);
        StreamWriter writer = new StreamWriter(compressionStream);
        writer.Write(data);
        writer.Close();
    }

    static string LoadCompressedFile(string filename) {
        FileStream fileStream = new FileStream(filename, FileMode.Open, FileAccess.Read);
        GZipStream compressionStream = new GZipStream(fileStream, CompressionMode.Decompress);
        StreamReader reader = new StreamReader(compressionStream);
        string data = reader.ReadToEnd();
        reader.Close();
        return data;
    }

    static void Main(string[] args) {
        try {
            string filename = "compressedFile.txt";
            string sourceString = "Source String";
            SaveCompressedFile(filename, sourceString);
            FileInfo compressedFileData = new FileInfo(filename);
            string recoveredString = LoadCompressedFile(filename);
        } catch (IOException ex) {
            Console.WriteLine(ex.ToString());
        }
    }
}

Source

还可以查看压缩目录的示例。

using System;
using System.Text;
using System.IO;
using System.IO.Compression;

namespace CmprDir
{
  class Program
  {
    delegate void ProgressDelegate(string sMessage);

    static void CompressFile(string sDir, string sRelativePath, GZipStream zipStream)
    {
      //Compress file name
      char[] chars = sRelativePath.ToCharArray();
      zipStream.Write(BitConverter.GetBytes(chars.Length), 0, sizeof(int));
      foreach (char c in chars)
        zipStream.Write(BitConverter.GetBytes(c), 0, sizeof(char));

      //Compress file content
      byte[] bytes = File.ReadAllBytes(Path.Combine(sDir, sRelativePath));
      zipStream.Write(BitConverter.GetBytes(bytes.Length), 0, sizeof(int));
      zipStream.Write(bytes, 0, bytes.Length);
    }

    static bool DecompressFile(string sDir, GZipStream zipStream, ProgressDelegate progress)
    {
      //Decompress file name
      byte[] bytes = new byte[sizeof(int)];
      int Readed = zipStream.Read(bytes, 0, sizeof(int));
      if (Readed < sizeof(int))
        return false;

      int iNameLen = BitConverter.ToInt32(bytes, 0);
      bytes = new byte[sizeof(char)];
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < iNameLen; i++)
      {
        zipStream.Read(bytes, 0, sizeof(char));
        char c = BitConverter.ToChar(bytes, 0);
        sb.Append(c);
      }
      string sFileName = sb.ToString();
      if (progress != null)
        progress(sFileName);

      //Decompress file content
      bytes = new byte[sizeof(int)];
      zipStream.Read(bytes, 0, sizeof(int));
      int iFileLen = BitConverter.ToInt32(bytes, 0);

      bytes = new byte[iFileLen];
      zipStream.Read(bytes, 0, bytes.Length);

      string sFilePath = Path.Combine(sDir, sFileName);
      string sFinalDir = Path.GetDirectoryName(sFilePath);
      if (!Directory.Exists(sFinalDir))
        Directory.CreateDirectory(sFinalDir);

      using (FileStream outFile = new FileStream(sFilePath, FileMode.Create, FileAccess.Write, FileShare.None))
        outFile.Write(bytes, 0, iFileLen);

      return true;
    }

    static void CompressDirectory(string sInDir, string sOutFile, ProgressDelegate progress)
    {
      string[] sFiles = Directory.GetFiles(sInDir, "*.*", SearchOption.AllDirectories);
      int iDirLen = sInDir[sInDir.Length - 1] == Path.DirectorySeparatorChar ? sInDir.Length : sInDir.Length + 1;

      using (FileStream outFile = new FileStream(sOutFile, FileMode.Create, FileAccess.Write, FileShare.None))
      using (GZipStream str = new GZipStream(outFile, CompressionMode.Compress))
        foreach (string sFilePath in sFiles)
        {
          string sRelativePath = sFilePath.Substring(iDirLen);
          if (progress != null)
            progress(sRelativePath);
          CompressFile(sInDir, sRelativePath, str);
        }
    }

    static void DecompressToDirectory(string sCompressedFile, string sDir, ProgressDelegate progress)
    {
      using (FileStream inFile = new FileStream(sCompressedFile, FileMode.Open, FileAccess.Read, FileShare.None))
      using (GZipStream zipStream = new GZipStream(inFile, CompressionMode.Decompress, true))
        while (DecompressFile(sDir, zipStream, progress));
    }

    public static int Main(string[] argv) 
    {
      if (argv.Length != 2)
      {
        Console.WriteLine("Usage: CmprDir.exe <in_dir compressed_file> | <compressed_file out_dir>");
        return 1;
      }

      string sDir;
      string sCompressedFile;
      bool bCompress = false;
      try
      {
        if (Directory.Exists(argv[0]))
        {
          sDir = argv[0];
          sCompressedFile = argv[1];
          bCompress = true;
        }
        else
          if (File.Exists(argv[0]))
          {
            sCompressedFile = argv[0];
            sDir = argv[1];
            bCompress = false;
          }
          else
          {
            Console.Error.WriteLine("Wrong arguments");
            return 1;
          }

        if (bCompress)
          CompressDirectory(sDir, sCompressedFile, (fileName) => { Console.WriteLine("Compressing {0}...", fileName); });
        else
          DecompressToDirectory(sCompressedFile, sDir, (fileName) => { Console.WriteLine("Decompressing {0}...", fileName); });

        return 0;
      }
      catch (Exception ex)
      {
        Console.Error.WriteLine(ex.Message);
        return 1;
      }
    }
  }
}

Source

【讨论】:

  • 我可以使用这个功能来合并文件吗? Gzip 压缩是否具有线性特性?
  • 感谢您的回答!
猜你喜欢
  • 2010-11-02
  • 2013-08-11
  • 2012-09-04
  • 2010-10-01
  • 2017-10-27
  • 1970-01-01
  • 1970-01-01
  • 2011-11-15
  • 1970-01-01
相关资源
最近更新 更多