Tom-Net

网站抓取的方法(2种)

1命名空间

using System.IO;
using System.Net;

2方法

 

 /// <summary>
    /// 读取本地所有的html页信息
    /// </summary>
    /// <param name="input"></param>
    /// <returns></returns>
    public static string GetAllHtml(string input)
    {
        StreamReader ReaderFile = null;
        string FilePath = HttpContext.Current.Server.MapPath(input);
        Encoding Code = Encoding.GetEncoding("gb2312");
        string strFile = string.Empty;
        try
        {
            ReaderFile = new StreamReader(FilePath, Code);
            strFile = ReaderFile.ReadToEnd();

        }
        catch (Exception ex)
        {
            throw ex;
        }
        finally
        {
           
            ReaderFile.Close();            
        }
        return strFile.ToString();
    }
    /// <summary>
    /// 取得远程网页的所有信息
    /// </summary>
    /// <param name="URL"></param>
    /// <returns></returns>
     public   static string GetPageSource(string URL) 
     {
        Uri uri = new Uri(URL);

        HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
        HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();

        hwReq.Method = "Get";

        hwReq.KeepAlive = false;

        StreamReader reader = new StreamReader(hwRes.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));

        return reader.ReadToEnd();  
     }

分类:

技术点:

相关文章: