【问题标题】:Not able to download zip file using httpwebrequest through c# code. It gets downloaded through browser无法通过 c# 代码使用 httpwebrequest 下载 zip 文件。它通过浏览器下载
【发布时间】:2020-04-23 11:52:30
【问题描述】:

我想从https://eqrreportviewer.ferc.gov/ 网站之一下载 zip 文件。下载 zip 文件的方式是首先单击归档查询选项卡。在 reportType 下拉列表中选择 SubmissionsBydate,在 export 下拉列表中选择 CSV。现在单击提交按钮,然后下载 zip 文件。我想自动化这个过程。我通过捕获请求及其标头并将该详细信息传递给站点,在 C# 中编写了代码,但我无法通过代码下载文件。

这是我写的代码:

public static string PageSourceCode { get; set; }

//The ASP.NET SessionID to add validation to posts
public static string SessionID { get; set; }

//The value we are posting to the page on subsequent calls
public static string PostBackValue { get; set; }

public static string AcquisitionURL = "https://eqrreportviewer.ferc.gov";
static void Main(string[] args)
{
    Acquire();
}

private static void Acquire()
{
    GetLandingPage();
    PopulatePostBackValueForSubmitBtn();
    PostToPageForSubmitBtn();
}

private static void GetLandingPage()
{
    string mainPageOutput = string.Empty;
    HttpWebRequest objRequestLandingPage = (HttpWebRequest)WebRequest.Create(AcquisitionURL);
    objRequestLandingPage.Method = WebRequestMethods.Http.Get;
    objRequestLandingPage.Headers.Add("Cache-Control", "max-age=0");
    objRequestLandingPage.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9";
    objRequestLandingPage.Headers.Add("Accept-Encoding", "gzip, deflate, br");
    objRequestLandingPage.Headers.Add("Accept-Language", "en-US,en;q=0.9");
    objRequestLandingPage.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36";
    objRequestLandingPage.Headers.Add("Sec-Fetch-Dest", "document");
    objRequestLandingPage.Headers.Add("Sec-Fetch-Mode", "navigate");
    objRequestLandingPage.Headers.Add("Sec-Fetch-Site", "none");
    objRequestLandingPage.Headers.Add("Sec-Fetch-User", "?1");
    objRequestLandingPage.Headers.Add("Upgrade-Insecure-Requests", "1");
    //objRequestLandingPage.Headers.Add("Connection", "keep-alive");
    objRequestLandingPage.KeepAlive = true;
    objRequestLandingPage.Host = "eqrreportviewer.ferc.gov";

    using (WebResponse objResponseLandingPage = objRequestLandingPage.GetResponse())
    {
        WebHeaderCollection headers = objResponseLandingPage.Headers;

        using (Stream streamLandingPage = objResponseLandingPage.GetResponseStream())
        using (StreamReader streamReaderLandingPage = new StreamReader(streamLandingPage))
        {
            mainPageOutput = streamReaderLandingPage.ReadToEnd();
        }
        SessionID = headers["Set-Cookie"];
    }

    SessionID = StripCookie(SessionID);
    //Set the source code of the page
    PageSourceCode = mainPageOutput;
}

private static void PopulatePostBackValueForSubmitBtn()
{
    if (!String.IsNullOrEmpty(PageSourceCode))
    {
        // get fields from landing page
        Dictionary<string, string> formFields = GetFormFields(PageSourceCode);
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ddlReportTypeSum"] = "0";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ddlReportPeriodSum"] = "650";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelSummaryReports$ListSearchExtender1_ClientState"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$ddlReportType"] = "4";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$txtFromSubmissionDate"] = System.DateTime.Now.Date.AddDays(-30).ToShortDateString();
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$txtToSubmissionDate"] = System.DateTime.Now.Date.ToShortDateString();
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$ddlExport"] = "2";
        formFields["TabContainerReportViewer$TabPanelReporting$TabContainerReports$TabPanelFilingInquiries$btnSubmitOptional"] = "Submit";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtCID"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtFilingOrg"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$ddlQuarter"] = "Pick";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$ddlDownloadType"] = "CSV";
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtName"] = String.Empty;
        formFields["TabContainerReportViewer$TabPanelDownloads$TabContainerDownloads$TabPanelSelectiveFilings$txtEmail"] = String.Empty;
        formFields["__EVENTTARGET"] = String.Empty;
        formFields["__EVENTARGUMENT"] = String.Empty;
        formFields["__LASTFOCUS"] = String.Empty;
        formFields["__AjaxControlToolkitCalendarCssLoaded"] = String.Empty;
        formFields["TabContainerReportViewer_ClientState"] = "{\"ActiveTabIndex\" : 0,\"TabState\": [true,true]}";
        formFields["TabContainerReportViewer_TabPanelReporting_TabContainerReports_ClientState"] = "{\"ActiveTabIndex\" : 1,\"TabState\": [true,true]}";
        formFields["TabContainerReportViewer_TabPanelDownloads_TabContainerDownloads_ClientState"] = "{\"ActiveTabIndex\" : 0,\"TabState\": [true,true]}";
        formFields["__VIEWSTATE"] = ViewState;
        formFields["__VIEWSTATEGENERATOR"] = ViewStateGenerator;
        formFields["__VIEWSTATEENCRYPTED"] = ViewStateEncrypted;
        string postString = FormatPostString(formFields);
        PostBackValue = postString;
    }
}

private static void PostToPageForSubmitBtn()
{
    HttpWebRequest objRequestPostPage = (HttpWebRequest)WebRequest.Create(AcquisitionURL);

    objRequestPostPage.Method = WebRequestMethods.Http.Post;
    objRequestPostPage.ContentLength = PostBackValue.Length;
    objRequestPostPage.ContentType = "application/x-www-form-urlencoded";
    objRequestPostPage.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9";
    objRequestPostPage.KeepAlive = true;
    objRequestPostPage.Host = "eqrreportviewer.ferc.gov";
    objRequestPostPage.Headers.Add("Cache-Control", "max-age=0");
    objRequestPostPage.Headers.Add("Sec-Fetch-Dest", "document");
    objRequestPostPage.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36";
    objRequestPostPage.Headers.Add("Origin", "https://eqrreportviewer.ferc.gov");
    objRequestPostPage.Headers.Add("Sec-Fetch-Site", "same-origin");
    objRequestPostPage.Headers.Add("Sec-Fetch-Mode", "navigate");
    objRequestPostPage.Referer = "https://eqrreportviewer.ferc.gov/";
    objRequestPostPage.Headers.Add("Accept-Encoding", "gzip, deflate,br");
    objRequestPostPage.Headers.Add("Accept-Language", "en-US,en;q=0.9");
    //Pass in the ASP.NET Session ID
    objRequestPostPage.Headers.Add("Cookie", SessionID);

    objRequestPostPage.Headers.Add("Upgrade-Insecure-Requests", "1");
    objRequestPostPage.Headers.Add("Sec-Fetch-User", "?1");

    objRequestPostPage.ServicePoint.Expect100Continue = false;
    StreamWriter streamWriterPostPage = new StreamWriter(objRequestPostPage.GetRequestStream());
    //Post the arguments
    streamWriterPostPage.Write(PostBackValue);
    streamWriterPostPage.Close();

    //Get response
    HttpWebResponse responsePostPage = (HttpWebResponse)objRequestPostPage.GetResponse();

    WebHeaderCollection responseHeaders = responsePostPage.Headers;

    Stream responseStream = responsePostPage.GetResponseStream();

        StreamReader reader = new StreamReader(responseStream);
    PageSourceCode = reader.ReadToEnd();

    using (FileStream file = new FileStream(@"C:\Test\test.csv", FileMode.Create, FileAccess.Write))
    {
         WriteFile(responseStream, file);
    }
}

如果我做错了什么,谁能告诉我。现在所有的值都是硬编码的,但如果它有效,我可以正确地组织它。
此外,我在收到的响应中没有得到 Content Disposition 响应标头,但是当它从 Chrome 浏览器运行时,我得到了这个标头。

什么是我可以做不同的代码,或者如果我遗漏了什么? 任何帮助/建议都会对解决这个问题有很大帮助。

【问题讨论】:

  • but i am not able to download the file through code - 那么,究竟会发生什么?它会崩溃吗?它不下载任何东西吗?它是否下载了一些东西,但它是否已损坏?请注意,如果您正在下载的是 zip 文件,为什么要读取带有StreamReader 作为字符串的结果,以及为什么要在读取结果后将其写入扩展名为 .csv 的文件串成别的东西?而且,WriteFile 到底是做什么的?
  • @Nyerguds 我将其保存为 zip,这可能是错误的。如果您能够下载文件,文件保存在哪里。我应该使用什么来代替 StreamReader。 WriteFile 实际转换为字节数组。
  • @Nyerguds 实际上它给了我 200 的响应,但我无法理解文件的下载位置。您是否对代码进行了任何更改以使其正常工作。如果有的话,您能否向我提供您可能对代码所做的更改?

标签: c# httpwebrequest webrequest httpwebresponse content-disposition


【解决方案1】:

我无法使用 C# 做到这一点。
最后我结合使用 python 和 selenium 和 chrome web 驱动来完成任务。

from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-extensions")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_experimental_option("prefs", {"download.default_directory":"/databricks/driver"})
driver = webdriver.Chrome(chrome_options=options)
driver.implicitly_wait(5)

url = "https://eqrreportviewer.ferc.gov/"
driver.get(url)
driver.implicitly_wait(5)
#Filing Inquiries
driver.find_element_by_xpath('//*[@id="__tab_TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries"]').click()
driver.implicitly_wait(5)
#Submission by Date
driver.find_element_by_xpath('//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_ddlReportType"]/option[5]').click()
driver.implicitly_wait(5)
#CSV
driver.find_element_by_xpath('//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_ddlExport"]/option[2]').click()
driver.implicitly_wait(15)
#Submit
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
element = WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="TabContainerReportViewer_TabPanelReporting_TabContainerReports_TabPanelFilingInquiries_btnSubmitOptional"]')))
element.click()
driver.implicitly_wait(15) #putting wait here to make sure file gets downloaded before driver is stopped.
driver.quit()

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多