【问题标题】:Large File download from SQL via WebApi after custom MultipartFormDataStreamProvider upload自定义 MultipartFormDataStreamProvider 上传后通过 WebApi 从 SQL 下载大文件
【发布时间】:2016-11-23 22:00:01
【问题描述】:

这是我之前提出的一个问题的后续,该问题因过于宽泛而被关闭。Previous Question

在那个问题中,我解释说我需要通过将块存储为单独的行来将一个大文件 (1-3GB) 上传到数据库。我通过覆盖 MultipartFormDataStreamProvider.GetStream 方法来做到这一点。该方法返回一个自定义流,该流将缓冲的块写入数据库。

问题在于重写的 GetStream 方法正在将整个请求写入数据库(包括标头)。它在保持内存级别平坦的同时成功写入了该数据,但是当我下载文件时,除了文件内容之外,它还会返回下载文件内容中的所有标题信息,因此无法打开文件。

有没有办法在重写的 GetStream 方法中只将文件的内容写入数据库而不写入标题?

API

  [HttpPost]
    [Route("file")]
    [ValidateMimeMultipartContentFilter]
    public Task<HttpResponseMessage> PostFormData()
    {
        var provider = new CustomMultipartFormDataStreamProvider();

        // Read the form data and return an async task.
        var task = Request.Content.ReadAsMultipartAsync(provider).ContinueWith<HttpResponseMessage>(t =>
        {
            if (t.IsFaulted || t.IsCanceled)
            {
                Request.CreateErrorResponse(HttpStatusCode.InternalServerError, t.Exception);
            }

            return Request.CreateResponse(HttpStatusCode.OK);
        });

        return task;
    }

    [HttpGet]
    [Route("file/{id}")]
    public async Task<HttpResponseMessage> GetFile(string id)
    {
                       var result = new HttpResponseMessage()
            {
                Content = new PushStreamContent(async (outputStream, httpContent, transportContext) =>
                {
                    await WriteDataChunksFromDBToStream(outputStream, httpContent, transportContext, id);
                }),
                StatusCode = HttpStatusCode.OK
            };


            result.Content.Headers.ContentType = new MediaTypeHeaderValue("application/zipx");
            result.Content.Headers.ContentDisposition = new ContentDispositionHeaderValue("attachment") { FileName = "test response.zipx" };

            return result;
        }

        return new HttpResponseMessage(HttpStatusCode.BadRequest);
    }

    private async Task WriteDataChunksFromDBToStream(Stream responseStream, HttpContent httpContent, TransportContext transportContext, string fileIdentifier)
    {
        // PushStreamContent requires the responseStream to be closed
        // for signaling it that you have finished writing the response.
        using (responseStream)
        {
            using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["TestDB"].ConnectionString))
            {
                await myConn.OpenAsync();

                using (var myCmd = new SqlCommand("ReadAttachmentChunks", myConn))
                {
                    myCmd.CommandType = System.Data.CommandType.StoredProcedure;

                    var fileName = new SqlParameter("@Identifier", fileIdentifier);

                    myCmd.Parameters.Add(fileName);


                    // Read data back from db in async call to avoid OutOfMemoryException when sending file back to user
                    using (var reader = await myCmd.ExecuteReaderAsync(CommandBehavior.SequentialAccess))
                    {
                        while (await reader.ReadAsync())
                        {
                            if (!(await reader.IsDBNullAsync(3)))
                            {
                                using (var data = reader.GetStream(3))
                                {
                                    // Asynchronously copy the stream from the server to the response stream
                                    await data.CopyToAsync(responseStream);
                                }
                            }
                        }
                    }
                }
            }
        }// close response stream
    }

自定义 MultipartFormDataStreamProvider GetStream 方法实现

 public override Stream GetStream(HttpContent parent, HttpContentHeaders headers)
    {
        // For form data, Content-Disposition header is a requirement
        ContentDispositionHeaderValue contentDisposition = headers.ContentDisposition;
        if (contentDisposition != null)
        {
            // If we have a file name then write contents out to AWS stream. Otherwise just write to MemoryStream
            if (!String.IsNullOrEmpty(contentDisposition.FileName))
            {
                var identifier = Guid.NewGuid().ToString();
                var fileName = contentDisposition.FileName;// GetLocalFileName(headers);

                if (fileName.Contains("\\"))
                {
                    fileName = fileName.Substring(fileName.LastIndexOf("\\") + 1).Replace("\"", "");
                }

                // We won't post process files as form data
                _isFormData.Add(false);

                var stream = new CustomSqlStream();
                stream.Filename = fileName;
                stream.Identifier = identifier;
                stream.ContentType = headers.ContentType.MediaType;
                stream.Description = (_formData.AllKeys.Count() > 0 && _formData["description"] != null) ? _formData["description"] : "";

                return stream;
                //return new CustomSqlStream(contentDisposition.Name);
            }

            // We will post process this as form data
            _isFormData.Add(true);

            // If no filename parameter was found in the Content-Disposition header then return a memory stream.
            return new MemoryStream();
        }

        throw new InvalidOperationException("Did not find required 'Content-Disposition' header field in MIME multipart body part..");
        #endregion
    }

实现了CustomSqlStream调用Stream的Write方法

 public override void Write(byte[] buffer, int offset, int count)
    {
                   //write buffer to database
        using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["TestDB"].ConnectionString)) {
            using (var myCmd = new SqlCommand("WriteAttachmentChunk", myConn)) {
                myCmd.CommandType = System.Data.CommandType.StoredProcedure;

                                  var pContent = new SqlParameter("@Content", buffer);

                myCmd.Parameters.Add(pContent);

                myConn.Open();
                myCmd.ExecuteNonQuery();

                if (myConn.State == System.Data.ConnectionState.Open)
                {
                    myConn.Close();
                }
            }
        }
            ((ManualResetEvent)_dataAddedEvent).Set();
    }

“ReadAttachmentChunks”存储过程从数据库中获取与文件相关的行,这些行按插入数据库的时间排序。因此,代码的工作方式是将这些块拉回,然后异步将其写回 PushStreamContent 以返回给用户。

所以我的问题是:

除了内容之外,有没有办法只写正在上传的文件的内容而不是标题?

任何帮助将不胜感激。谢谢。

【问题讨论】:

    标签: c# asp.net-web-api asp.net-web-api2


    【解决方案1】:

    我终于明白了。我过度复杂的写作过程带来了大部分的挣扎。这是我对最初问题的解决方案:

    要防止 .net 缓冲内存中的文件(以便您可以处理大文件上传),您首先需要覆盖 WebHostBufferPolicySelector 以便它不会为您的控制器缓冲输入流,然后替换 BufferPolicy 选择器。

     public class NoBufferPolicySelector : WebHostBufferPolicySelector
    {
        public override bool UseBufferedInputStream(object hostContext)
        {
            var context = hostContext as HttpContextBase;
    
            if (context != null)
            {
                if (context.Request.RequestContext.RouteData.Values["controller"] != null)
                {
                    if (string.Equals(context.Request.RequestContext.RouteData.Values["controller"].ToString(), "upload", StringComparison.InvariantCultureIgnoreCase))
                        return false;
                }
            }
    
            return true;
        }
    
        public override bool UseBufferedOutputStream(HttpResponseMessage response)
        {
            return base.UseBufferedOutputStream(response);
        }
    }
    

    然后用于替换 BufferPolicy 选择器

    GlobalConfiguration.Configuration.Services.Replace(typeof(IHostBufferPolicySelector), new NoBufferPolicySelector());
    

    然后为了避免将文件流写入磁盘的默认行为,您需要提供一个将写入数据库的流提供程序。为此,您继承 MultipartStreamProvider 并覆盖 GetStream 方法以返回将写入数据库的流。

        public override Stream GetStream(HttpContent parent, HttpContentHeaders headers)
        {
            // For form data, Content-Disposition header is a requirement
            ContentDispositionHeaderValue contentDisposition = headers.ContentDisposition;
            if (contentDisposition != null && !String.IsNullOrEmpty(contentDisposition.FileName))
            {
                // We won't post process files as form data
                _isFormData.Add(false);
    
                //create unique identifier for this file upload
                var identifier = Guid.NewGuid();
                var fileName = contentDisposition.FileName;
    
                var boundaryObj = parent.Headers.ContentType.Parameters.SingleOrDefault(a => a.Name == "boundary");
    
                var boundary = (boundaryObj != null) ? boundaryObj.Value : "";
    
                if (fileName.Contains("\\"))
                {
                    fileName = fileName.Substring(fileName.LastIndexOf("\\") + 1).Replace("\"", "");
                }
    
                //write parent container for the file chunks that are being stored
                WriteLargeFileContainer(fileName, identifier, headers.ContentType.MediaType, boundary);
    
                //create an instance of the custom stream that will write the chunks to the database
                var stream = new CustomSqlStream();
                stream.Filename = fileName;
                stream.FullFilename = contentDisposition.FileName.Replace("\"", "");
                stream.Identifier = identifier.ToString();
                stream.ContentType = headers.ContentType.MediaType;
                stream.Boundary = (!string.IsNullOrEmpty(boundary)) ? boundary : "";
    
                return stream;
            }
            else
            {
                // We will post process this as form data
                _isFormData.Add(true);
    
                // If no filename parameter was found in the Content-Disposition header then return a memory stream.
                return new MemoryStream();
            }
        }
    

    您创建的自定义流需要继承 Stream 并覆盖 Write 方法。这就是我过度思考问题的地方,并认为我需要解析出通过缓冲区参数传递的边界标头。但这实际上是通过利用 offset 和 count 参数为您完成的。

    public override void Write(byte[] buffer, int offset, int count)
        {
            //no boundary is inluded in buffer
            byte[] fileData = new byte[count];
            Buffer.BlockCopy(buffer, offset, fileData, 0, count);
            WriteData(fileData);
        }
    

    从那里,它只是插入用于上传和下载的 api 方法。 上传:

     public Task<HttpResponseMessage> PostFormData()
        {
            var provider = new CustomMultipartLargeFileStreamProvider();
    
            // Read the form data and return an async task.
            var task = Request.Content.ReadAsMultipartAsync(provider).ContinueWith<HttpResponseMessage>(t =>
            {
                if (t.IsFaulted || t.IsCanceled)
                {
                    Request.CreateErrorResponse(HttpStatusCode.InternalServerError, t.Exception);
                }
    
                return Request.CreateResponse(HttpStatusCode.OK);
            });
    
            return task;
        }
    

    为了下载,为了保持低内存占用,我利用 PushStreamContent 将块推送回用户:

    [HttpGet]
        [Route("file/{id}")]
        public async Task<HttpResponseMessage> GetFile(string id)
        {
            string mimeType = string.Empty;
            string filename = string.Empty;
            if (!string.IsNullOrEmpty(id))
            {
                //get the headers for the file being sent back to the user
                using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PortalBetaConnectionString"].ConnectionString))
                {
                    using (var myCmd = new SqlCommand("ReadLargeFileInfo", myConn))
                    {
                        myCmd.CommandType = System.Data.CommandType.StoredProcedure;
    
                        var pIdentifier = new SqlParameter("@Identifier", id);
    
                        myCmd.Parameters.Add(pIdentifier);
    
                        myConn.Open();
    
                        var dataReader = myCmd.ExecuteReader();
    
                        if (dataReader.HasRows)
                        {
                            while (dataReader.Read())
                            {
                                mimeType = dataReader.GetString(0);
                                filename = dataReader.GetString(1);
                            }
                        }
                    }
                }
    
    
                var result = new HttpResponseMessage()
                {
                    Content = new PushStreamContent(async (outputStream, httpContent, transportContext) =>
                    {
                        //pull the data back from the db and stream the data back to the user
                        await WriteDataChunksFromDBToStream(outputStream, httpContent, transportContext, id);
                    }),
                    StatusCode = HttpStatusCode.OK
                };
    
                result.Content.Headers.ContentType = new MediaTypeHeaderValue(mimeType);// "application/octet-stream");
                result.Content.Headers.ContentDisposition = new ContentDispositionHeaderValue("attachment") { FileName = filename };
    
                return result;
            }
    
            return new HttpResponseMessage(HttpStatusCode.BadRequest);
        }
    
        private async Task WriteDataChunksFromDBToStream(Stream responseStream, HttpContent httpContent, TransportContext transportContext, string fileIdentifier)
        {
            // PushStreamContent requires the responseStream to be closed
            // for signaling it that you have finished writing the response.
            using (responseStream)
            {
                using (var myConn = new SqlConnection(System.Configuration.ConfigurationManager.ConnectionStrings["PortalBetaConnectionString"].ConnectionString))
                {
                    await myConn.OpenAsync();
    
                    //stored proc to pull the data back from the db
                    using (var myCmd = new SqlCommand("ReadAttachmentChunks", myConn))
                    {
                        myCmd.CommandType = System.Data.CommandType.StoredProcedure;
    
                        var fileName = new SqlParameter("@Identifier", fileIdentifier);
    
                        myCmd.Parameters.Add(fileName);
    
                        // The reader needs to be executed with the SequentialAccess behavior to enable network streaming
                        // Otherwise ReadAsync will buffer the entire BLOB into memory which can cause scalability issues or even OutOfMemoryExceptions
                        using (var reader = await myCmd.ExecuteReaderAsync(CommandBehavior.SequentialAccess))
                        {
                            while (await reader.ReadAsync())
                            {
                                //confirm the column that has the binary data of the file returned is not null
                                if (!(await reader.IsDBNullAsync(0)))
                                {
                                    //read the binary data of the file into a stream
                                    using (var data = reader.GetStream(0))
                                    {
                                        // Asynchronously copy the stream from the server to the response stream
                                        await data.CopyToAsync(responseStream);
                                        await data.FlushAsync();
                                    }
                                }
                            }
                        }
                    }
                }
            }// close response stream
        }
    

    【讨论】:

      【解决方案2】:

      呃。这很讨厌。上传时,您必须确保

      1. 将标头与内容部分分开 - 您必须遵循 HTTP RFC 文档的要求。
      2. 允许分块传输
      3. 当然,内容部分(除非您传输文本)将被二进制编码为字符串。
      4. 允许压缩传输,例如 GZIP 或 DEFLATE。
      5. 也许——只是也许——考虑编码(ASCII、Unicode、UTF8 等)。

      如果不查看所有这些信息,您将无法真正确保将正确的信息保存到数据库中。对于后面的项目,关于要做什么的所有元数据都将在标题中的某个位置,所以这不仅仅是一次性的。

      【讨论】:

      • 是因为有更好的方法可以做到这一点,还是因为您通常不这样做,这很讨厌?以前每次进行大文件传输时,我都会将其传输到磁盘并锁定文件存储区域,所以我以前从未处理过这样的事情,如果我正在做的只是愚蠢,请原谅我.要求是我需要将大文件存储在数据库中(不能使用 Filestream),并且我需要在它到达那里之前对其进行加密,同时保持低内存占用。这种分块的想法是我能想到的唯一方法。
      • 令人讨厌的部分是你不能使用框架中的东西来为你做所有肮脏的工作。也许你可以在 Nuget 上找到一个第三方 http 客户端库,查看源代码,看看它在分解上传流的情况下是如何发挥作用的。
      • 看来我现在唯一的问题是请求标头包含在我对数据库的写入中。我已经尝试了各种方法来手动剥离标题,方法是利用边界值来检测标题的位置/结束位置,但是 1)它不适用于非文本文件 2)它感觉像是一个应该做的黑客和脆弱的解决方案是一个简单的问题。有什么想法可以在我写之前去掉标题吗?
      猜你喜欢
      • 2016-05-14
      • 1970-01-01
      • 2014-05-22
      • 1970-01-01
      • 2020-03-27
      • 1970-01-01
      • 1970-01-01
      • 2020-12-02
      • 2012-08-07
      相关资源
      最近更新 更多