【问题标题】:Downloading file from redirecting URLs从重定向 URL 下载文件
【发布时间】:2016-03-29 00:43:09
【问题描述】:

我正在尝试从http://www.audiodump.com/ 下载 mp3。该网站有很多重定向。但是我设法让它的一部分工作。 这是我获取 DL 链接、标题、mp3 时长等所有信息的方法。

private void _InetGetHTMLSearch(string sArtist)
{
    if(_AudioDumpQuery == string.Empty)
    {
        //return string.Empty;
    }
    string[] sStringArray;
    string sResearchURL = "http://www.audiodump.biz/music.html?" + _AudioDumpQuery + sArtist.Replace(" ", "+");
    string aRet;
    HttpWebRequest webReq = (HttpWebRequest)HttpWebRequest.Create(sResearchURL);
    webReq.Referer = "http://www.audiodump.com/";
    try
    {
        webReq.CookieContainer = new CookieContainer();
        webReq.Method = "GET";
        using (WebResponse response = webReq.GetResponse())
        {
            using (Stream stream = response.GetResponseStream())
            {
                StreamReader reader = new StreamReader(stream);
                aRet = reader.ReadToEnd();
                //Console.WriteLine(aRet);
                string[] aTable = _StringBetween(aRet, "<BR><table", "table><BR>", RegexOptions.Singleline);
                if (aTable != null)
                {
                    string[] aInfos = _StringBetween(aTable[0], ". <a href=\"", "<a href=\"");
                    if (aInfos != null)
                    {
                        for(int i = 0; i < aInfos.Length; i++)
                        {
                            aInfos[i] = aInfos[i].Replace("\">", "*");
                            aInfos[i] = aInfos[i].Replace("</a> (", "*");
                            aInfos[i] = aInfos[i].Remove(aInfos[i].Length - 2);

                            sStringArray = aInfos[i].Split('*');

                            aLinks.Add(sStringArray[0]);
                            aTitles.Add(sStringArray[1]);

                            sStringArray[2] = sStringArray[2].Replace("`", "'");
                            sStringArray[2] = sStringArray[2].Replace("dont", "don't");
                            sStringArray[2] = sStringArray[2].Replace("lets", "let's");
                            sStringArray[2] = sStringArray[2].Replace("cant", "can't");
                            sStringArray[2] = sStringArray[2].Replace("shes", "she's");
                            sStringArray[2] = sStringArray[2].Replace("aint", "ain't");
                            sStringArray[2] = sStringArray[2].Replace("didnt", "didn't");
                            sStringArray[2] = sStringArray[2].Replace("im", "i'm");
                            sStringArray[2] = sStringArray[2].Replace("youre", "you're");
                            sStringArray[2] = sStringArray[2].Replace("ive", "i've");
                            sStringArray[2] = sStringArray[2].Replace("youll", "you'll");
                            sStringArray[2] = sStringArray[2].Replace("&#39;", "'");
                            sStringArray[2] = sStringArray[2].Replace("'", "simplequotes");
                            sStringArray[2] = sStringArray[2].Replace("vk.com", "");
                            sStringArray[2] = _StringReplaceCyrillicChars(sStringArray[2]);
                            sStringArray[2] = Regex.Replace(sStringArray[2], @"<[^>]+>|&nbsp;", "").Trim();
                            sStringArray[2] = Regex.Replace(sStringArray[2], @"\s{2,}", " ");
                            sStringArray[2] = sStringArray[2].TrimStart('\'');
                            sStringArray[2] = sStringArray[2].TrimStart('-');
                            sStringArray[2] = sStringArray[2].TrimEnd('-');
                            sStringArray[2] = sStringArray[2].Replace("- -", "-");
                            sStringArray[2] = sStringArray[2].Replace("http", "");
                            sStringArray[2] = sStringArray[2].Replace("www", "");
                            sStringArray[2] = sStringArray[2].Replace("mp3", "");
                            sStringArray[2] = sStringArray[2].Replace("simplequotes", "'");

                            aDurations.Add(sStringArray[2]);
                        }
                    }
                    else
                    {
                        //Console.WriteLine("Debug");
                    }
                }
                else
                {
                    //Console.WriteLine("Debug 2");
                }

                //return aRet;
            }
        }
    }
    catch (Exception ex)
    {
        //return null;
        ////Console.WriteLine("Debug message: " + ex.Message);
    }
}

我只需要添加引荐来源网址来防止搜索重定向webReq.Referer = "http://www.audiodump.com/"; 但是,当我想下载 mp3 时,我无法正常工作。这些 url 是正确的,并与我手动下载它们而不是以编程方式下载它们时得到的那些进行了检查。 这是我的 mp3 下载部分:

private void _DoDownload(string dArtist, ref string dPath)
{
    if (!Contain && skip <= 3 && !Downloading)
    {
        Random rnd = new Random();
        int Link = rnd.Next(5);
        _InetGetHTMLSearch(dArtist);
        Console.WriteLine("--------------------------------> " + aLinks[0]);
        string path = mp3Path + "\\" + dArtist + ".mp3";
        if (DownloadOne(aLinks[Link], path, false))
        {
            hTimmer.Start();
            Downloading = true;
        }
    }
    else if (Downloading)
    {
        int actualBytes = strm.Read(barr, 0, arrSize);
        fs.Write(barr, 0, actualBytes);
        bytesCounter += actualBytes;
        double percent = 0d;
        if (fileLength > 0)
           percent =
                100.0d * bytesCounter /
                (preloadedLength + fileLength);
        label1.Text = Math.Round(percent).ToString() + "%";
        if (Math.Round(percent) >= 100)
        {
            string path = mp3Path + "\\" + dArtist + ".mp3";
            label1.Text = "";
            dPath = path;
            aLinks.Clear();
            hTimmer.Stop();
            hTimmer.Reset();
            fs.Flush();
            fs.Close();
            lastArtistName = "N/A";
            Downloading = false;
        }
        if (Math.Round(percent) <= 1)
        {
            if (hTimmer.ElapsedMilliseconds >= 3000)
            {
                string path = mp3Path + "\\" + dArtist + ".mp3";
                hTimmer.Stop();
                hTimmer.Reset();
                fs.Flush();
                fs.Close();
                File.Delete(path);
                Contain = false;
                skip += 1;
                Downloading = false;
            }
        }
    }
}

private static string ConvertUrlToFileName(string url)
{
    string[] terms = url.Split(
        new string[] { ":", "//" },
        StringSplitOptions.RemoveEmptyEntries);
    string fname = terms[terms.Length - 1];
    fname = fname.Replace('/', '.');
    return fname;
} //ConvertUrlToFileName

private static long GetExistingFileLength(string filename)
{
    if (!File.Exists(filename)) return 0;
    FileInfo info = new FileInfo(filename);
    return info.Length;
} //GetExistingFileLength

private static bool DownloadOne(string url, string existingFilename, bool quiet)
{
    ServicePointManager.DefaultConnectionLimit = 20;
    HttpWebRequest webRequest;
    HttpWebResponse webResponse;
    IWebProxy proxy = null; //SA???
                            //fmt = CreateFormat(
                            //"{0}: {1:#} of {2:#} ({3:g3}%)", "#");
    try
    {
        fname = existingFilename;
        if (fname == null)
            fname = ConvertUrlToFileName(url);
        if (File.Exists(existingFilename))
        {
            File.Delete(existingFilename);
        }
        webRequest = (HttpWebRequest)WebRequest.Create(url);
        webRequest.UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A";
        webRequest.Referer = "http://www.audiodump.com/";
        preloadedLength = GetExistingFileLength(fname);
        if (preloadedLength > 0)
            webRequest.AddRange((int)preloadedLength);
        webRequest.Proxy = proxy; //SA??? or DefineProxy
        webResponse = (HttpWebResponse)webRequest.GetResponse();
        fs = new FileStream(fname, FileMode.Append, FileAccess.Write);
        fileLength = webResponse.ContentLength;
        strm = webResponse.GetResponseStream();
        if (strm != null)
        {
            bytesCounter = preloadedLength;
            return true;
        }
        else
        {
            return false;
        }
    }
    catch (Exception e)
    {
        //Console.WriteLine(
        //"{0}: {1} '{2}'",
        // url, e.GetType().FullName,
        //e.Message);
        return false;
    }
    //exception
} //DownloadOne

_DoDownload() 方法从每 250 毫秒运行一次的计时器执行。这种方式在其他网站上完美运行。然而,audiodump 让我很难处理这些重定向。

我不是 httprequest 的天才。我设法解决了搜索问题,但是下载部分吓坏了我。有关如何管理下载问题的任何建议?

【问题讨论】:

  • 该网站显然不希望您抓取它。问 “我怎样才能刮掉它” 太宽泛了。 必须隔离问题,然后从那里进行研究。

标签: c# http redirect download


【解决方案1】:

您只需将引荐来源网址设置为您获得该下载链接的页面。例如,您从“http://www.audiodump.biz/music.html?q=whatever”页面获取文件的链接,然后在下载文件时将其设置为引用者,而不仅仅是“http://www.audiodump.biz”。

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2019-12-02
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多