【发布时间】:2016-02-21 04:52:40
【问题描述】:
谁能告诉我如何使用用户编写的网页文档在 c# 应用程序中显示 HTML 网页的标题
【问题讨论】:
-
有大量用于 C# 的 html 解析器。你甚至可以用正则表达式来做到这一点。
谁能告诉我如何使用用户编写的网页文档在 c# 应用程序中显示 HTML 网页的标题
【问题讨论】:
你可以试试这个。首先,您需要获取整个 html 响应并遍历它以废弃 title 元素之间的文本。
public class TitleScraper {
private string url;
public TitleScraper(string url) {
this.url = url;
}
public string Title { get; set; }
public void Scrape() {
WebRequest request = WebRequest.Create(this.url);
WebResponse response = request.GetResponse();
Stream data = response.GetResponseStream();
StreamReader sr = new StreamReader(data);
string html = sr.ReadToEnd();
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)";
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Title = ex.Match(html).Value.Trim();
}
}
//Note: If you have you are trying to read from a document file instead of the URL, You can use the following instead
public void Scrape() {
string filePath = @"c:\user\filename.doc"; //location to your file
StreamReader sr = new StreamReader(filePath);
string text = sr.ReadToEnd();
sr.Close();
string regex = @"(?<=<title.*>)([\s\S]*)(?=</title>)";
System.Text.RegularExpressions.Regex ex = new System.Text.RegularExpressions.Regex(regex, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
Title = ex.Match(text).Value.Trim();
}
【讨论】: