【发布时间】:2010-01-18 10:05:16
【问题描述】:
private void button3_Click(object sender, EventArgs e) { listBox1.Items.Clear();
string szURL = textBox1.Text;// "http://localhost";
//textBox1.Text = szURL;
HttpWebRequest httpRequest;
HttpWebResponse httpResponse;
string bodyText = "";
Stream responseStream;
Byte[] RecvBytes = new Byte[Byte.MaxValue];
Int32 bytes;
httpRequest = (HttpWebRequest)WebRequest.Create(szURL);
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
responseStream = httpResponse.GetResponseStream();
while (true)
{
bytes = responseStream.Read(RecvBytes,
0, RecvBytes.Length);
if (bytes <= 0) break;
bodyText += System.Text.Encoding.UTF8.GetString(RecvBytes,
0, bytes);
}
//listBox1.Items.Add( bodyText);
textBox2.Text = bodyText;
MatchCollection m1 = Regex.Matches(bodyText, @"(<a.*?>.*?</a>)",
RegexOptions.Singleline);
// 2.
// Loop over each match.
foreach (Match m in m1)
{
string value = m.Groups[1].Value;
// LinkItem i = new LinkItem();
// 3.
// Get href attribute.
Match m2 = Regex.Match(value, @"<\s*script[^>]*>(?<content>.*?)<\s*/\s*\script\s*>",
RegexOptions.Singleline);
if (m2.Success)
{
listBox1.Text = m2.Groups[1].Value;
}
// 4.
// Remove inner tags from text.
string t = Regex.Replace(value, @"\s*<.*?>\s*", "",
RegexOptions.Singleline);
// i.Text = t;
listBox1.Items.Clear();
listBox1.Items.Add(t);
}
}
这是我的代码。它是作为分配给我的。我必须将标签之间的内容分开......以及单独从网页中的链接......我觉得这很困难,。请尽快帮助我..
【问题讨论】:
标签: c#