网站抓取的方法(2种)
1命名空间
using System.IO;
using System.Net;
2方法
/// <summary>
/// 读取本地所有的html页信息
/// </summary>
/// <param name="input"></param>
/// <returns></returns>
public static string GetAllHtml(string input)
{
StreamReader ReaderFile = null;
string FilePath = HttpContext.Current.Server.MapPath(input);
Encoding Code = Encoding.GetEncoding("gb2312");
string strFile = string.Empty;
try
{
ReaderFile = new StreamReader(FilePath, Code);
strFile = ReaderFile.ReadToEnd();
}
catch (Exception ex)
{
throw ex;
}
finally
{
ReaderFile.Close();
}
return strFile.ToString();
}
/// <summary>
/// 取得远程网页的所有信息
/// </summary>
/// <param name="URL"></param>
/// <returns></returns>
public static string GetPageSource(string URL)
{
Uri uri = new Uri(URL);
HttpWebRequest hwReq = (HttpWebRequest)WebRequest.Create(uri);
HttpWebResponse hwRes = (HttpWebResponse)hwReq.GetResponse();
hwReq.Method = "Get";
hwReq.KeepAlive = false;
StreamReader reader = new StreamReader(hwRes.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
return reader.ReadToEnd();
}