【发布时间】:2021-03-20 15:04:47
【问题描述】:
我有一个文件夹,我将在其中保存文本文件(200-500mb - 不是很大,但它的文本文件很大),我想并行处理这个文件。 该文件将有
"ComnanyTestIsert", "Firs Comment", "LA 132", "222-33-22", 1
"ComnanyTestIsert1", "Seconds Comment", "LA 132", "222-33-22", 1
例如,我使用了 2 个这样的文件。我不太明白何时将 BufferedStream 与并行循环一起使用 如何设置并行操作的数量?以及如何正确插入
static void Main(string[] args)
{
//Basic usage to help you get started:
ProcessFileTaskItem(
new string[] { "\\Insert.txt"
, "\\Insert1.txt" }
, "Data Source=(localdb)\\MSSQLLocalDB;Initial Catalog=test;Integrated Security=True;Connect Timeout=30;Encrypt=False;TrustServerCertificate=False;ApplicationIntent=ReadWrite;MultiSubnetFailover=False"
, "test");
}
/// This will read an array of input files, process the lines in parallel, and upload
/// everything into the database.
public static void ProcessFileTaskItem(string[] SourceFiles, string DatabaseConnectionString, string DestinationTable)
{
//Make sure there's files to read
if (SourceFiles != null && SourceFiles.Length > 0)
{
//Loop through the file array
Parallel.For(0, SourceFiles.Length, x =>
//for (int x = 0; x < SourceFiles.Length; x++)
{
//Make sure the file exists and if so open it for reading.
if (File.Exists(SourceFiles[x]))
{
using (SqlConnection connectionDest = new SqlConnection(DatabaseConnectionString))
{
connectionDest.Open();
//Configure everything to upload to the database via bulk copy.
using (SqlBulkCopy sbc = new SqlBulkCopy(connectionDest, SqlBulkCopyOptions.TableLock, null))
{
//Configure the bulk copy settings
sbc.DestinationTableName = DestinationTable;
sbc.BulkCopyTimeout = 28800; //8 hours
//Now read and process the file
ProcessAllLinesInInputFile(SourceFiles[x], connectionDest, sbc);
}
connectionDest.Close();
}
}
} //for
); //End Parallel reading of files
//Explicitly clean up before exiting
Array.Clear(SourceFiles, 0, SourceFiles.Length);
}
}
/// Processes every line in the source input file.
private static void ProcessAllLinesInInputFile(string SourceFiles, SqlConnection connectionDest, SqlBulkCopy sbc)
{
//Create a local data table. Should be the same name as the table
//in the database you'll be uploading everything to.
DataTable CurrentRecords = new DataTable("test");
//The column names. They should match what's in the database table.
string[] ColumnNames = new string[] { "Name", "Comment", "Address", "Phone", "IsActive" };
using (FileStream fs = File.Open(SourceFiles, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
using (BufferedStream bs = new BufferedStream(fs))
using (StreamReader sr = new StreamReader(bs))
{
string s;
while ((s = sr.ReadLine()) != null)
{
}
}
//Create the datatable with the column names.
for (int x = 0; x < ColumnNames.Length; x++)
CurrentRecords.Columns.Add(ColumnNames[x], typeof(string));
//Now process each line in parallel.
Parallel.For(0, SourceFiles, x =>
{
List<object> values = null; //so each thread gets its own copy.
}
}
【问题讨论】:
-
你是说不能将文件加载到内存中?
-
我需要将数据从文本文件插入到 sql。我尝试使用并行文件处理,然后创建本地数据表并批量插入此数据
标签: c# .net linq bulkinsert