【问题标题】:Server sending invalid gzip to TcpClient服务器向 TcpClient 发送无效的 gzip
【发布时间】:2012-08-20 22:47:57
【问题描述】:

我正在尝试通过实现 web tcp 客户端来更多地了解 web 和 tcp 的工作原理。

目前,我的网络请求函数如下所示:

    public string SendWebRequest(SocketWebRequest request)
    {
        using (NetworkStream ns = tc.GetStream())
        {
                using (System.IO.StreamReader sr = new System.IO.StreamReader(ns))
                {
                    request.WriteTo(ns);
                    ns.Flush();

                    var statusLine = sr.ReadLine();
                    ProcessStatusLine(statusLine);

                    Headers = ReadHeaders(sr);

                    ProcessCookies(request.Host);

                    int contentLength = 0;
                    if (Headers.ContainsKey("Content-Length"))
                    {
                        foreach (var cl in Headers["Content-Length"])
                        {
                            int buf;
                            if (int.TryParse(cl,out buf))
                            {
                                contentLength = buf;
                                break;
                            }
                        }
                    }
                    if (contentLength==0)
                    {
                        return "";
                    }

                    byte[] content = new byte[contentLength];

                    if (IsGziped())
                    {
                        MemoryStream decompressed = new MemoryStream();

                        using (var zs = new GZipStream(ns, CompressionMode.Decompress))
                        {
                            while (true)
                            {
                                var buf = new byte[1024];
                                int read = zs.Read(buf, 0, buf.Length);
                                if (read == 0)
                                {
                                    break;
                                }
                                decompressed.Write(buf, 0, read);
                            }
                        }
                        content = decompressed.ToArray();
                    }
                    else
                    {
                        using (BinaryReader rdr = new BinaryReader(ns))
                        {
                            rdr.Read(content, 0, content.Length);
                        }
                    }

                    var encoding = GetEncoding();

                    return encoding.GetString(content.ToArray());
                }

        }

    }

请求如下所示:

GET http://www.youtube.com/ HTTP/1.1
Accept: application/x-ms-application, image/jpeg, application/xaml+xml, image/gif, image/pjpeg, application/x-ms-xbap, */*
Accept-Language: en-US
User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)
Accept-Encoding: gzip, deflate
Connection: Keep-Alive
Host:www.youtube.com

响应头看起来像这样:

HTTP/1.1 200 OK
Date: Sat, 25 Aug 2012 19:46:51 GMT
Server: Apache
X-Content-Type-Options: nosniff
Content-Encoding: gzip
Set-Cookie: use_hitbox=d5c5516c3379125f43aa0d495d100d6ddAEAAAAw; path=/; domain=.youtube.com
Set-Cookie: VISITOR_INFO1_LIVE=av7rkkf4Sfw; path=/; domain=.youtube.com; expires=Mon, 22-Apr-2013 19:46:51 GMT
Expires: Tue, 27 Apr 1971 19:44:06 EST
Cache-Control: no-cache
P3P: CP="This is not a P3P policy! See //support.google.com/accounts/bin/answer.py?answer=151657&hl=en-US for more info."
X-Frame-Options: SAMEORIGIN
Content-Length: 18977
Content-Type: text/html; charset=utf-8

在此之后,第一个 int read = zs.Read(buf, 0, buf.Length); 有时会起作用,但通常会失败并出现以下异常:

The magic number in GZip header is not correct. Make sure you are passing in a GZip stream. I've tried reading the data as string, and it looks encoded.

Youtube 可以通过浏览器正常工作。当以字符串的形式读取数据时,它看起来是经过编码的。

为什么会出现这个问题,我应该如何解决这个问题?

更新

看起来这是传输过程中的某种错误。在 10 个案例中,有 5 个可以正常工作,而在其他 5 个案例中,它会在没有明显原因的情况下失败

这是IsGziped()的代码

 bool IsGziped()
    {
        foreach (var h in Headers["Content-Encoding"])
        {
            if (h.ToLowerInvariant().Contains("gzip"))
            {
                return true;
            }
        }
        return false;
    }

【问题讨论】:

  • 我使用的是 TcpClient 而不是 HttpWebRequest,这不是重复的
  • HttpWebRequest 使用 DeflateStream 而不是 GZipStream 进行“放气”编码。如果不显示 IsGziped [原文如此],真的很难说出你真正在做什么......

标签: c# http tcp httpwebrequest tcpclient


【解决方案1】:

StreamReader 不一定只读取所需的字节数。由于内部缓冲,它可以读取更多。这会导致从NetworkStream ns 获取压缩字节并放入StreamReader 内部缓冲区。

在获取字节后,GZipStream 无法读取它们。

您可能需要使用在二进制级别上工作的自定义标头解析解决方案。没有办法限制StreamReader 只读取尽可能少的字节数。

StreamReader 不得与其他阅读器一起使用。

【讨论】:

    【解决方案2】:

    你可以用下面的代码(带有header的响应流)无缝分离。

    // Read response.
    
    var buffer2 = new byte[4096];
    var hd = new MemoryStream();
    var response = new MemoryStream();
    var endHeader = false;
    
    do
    {
        // Your networkstream object instead > "stream".
    
        bytes = stream.Read(buffer2, 0, buffer2.Length);
    
        if (!endHeader)
        {
            var startIndex = 0;
            if (IsContainsHeaderCrLf(buffer2, out startIndex))
            {
                endHeader = true;
                hd.Write(buffer2, 0,startIndex);
                response.Write(buffer2, startIndex + 4, bytes - startIndex - 4);
            }
            else
            {
                hd.Write(buffer2, 0, bytes);
            }
        }
        else
        {
            response.Write(buffer2, 0, bytes);
        }
    } while (bytes != 0);
    
    var headertxt = System.Text.Encoding.UTF8.GetString(hd.ToArray());
    var unziptxt = "";
    var responsetxt = "";
    
    if (headertxt.Contains("gzip"))
    {
        unziptxt = System.Text.Encoding.UTF8.GetString(Decompress(response.ToArray()));
    }
    else
    {
        responsetxt = System.Text.Encoding.UTF8.GetString(response.ToArray());
    }
    return headertxt + "\r\n\r\n" + unziptxt + responsetxt;
    
    //...
    
    private bool IsContainsHeaderCrLf(byte[] buffer, out int startIndex)
    {
        for (var i = 0; i <= buffer.Length - 4; i++)
        {
            if (buffer[i] == 13 & buffer[i + 1] == 10 && buffer[i + 2] == 13 && buffer[i + 3] == 10)
            {
                startIndex = i;
                return true;
            }
        }
        startIndex = -1;
        return false;
    }
    

    奖励解压代码。

    static byte[] Decompress(byte[] gzip)
    {
        // Create a GZIP stream with decompression mode.
        // ... Then create a buffer and write into while reading from the GZIP stream.
        using (var stream = new GZipStream(new MemoryStream(gzip), CompressionMode.Decompress))
        {
            const int size = 4096;
            var buffer = new byte[size];
            using (var memory = new MemoryStream())
            {
                var count = 0;
                do
                {
                    count = stream.Read(buffer, 0, size);
                    if (count > 0)
                    {
                        memory.Write(buffer, 0, count);
                    }
                }
                while (count > 0);
                return memory.ToArray();
            }
        }
    }
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2014-03-16
      • 2019-03-30
      • 2017-05-04
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      相关资源
      最近更新 更多