import urllib2 from BeautifulSoup import BeautifulSoup import random import time def checkIndex(url): url = url.replace(\'http://\', \'\') baiduUrl = \'http://www.baidu.com/s?wd=\' + url webPage = urllib2.urlopen(baiduUrl) webCont = webPage.read() webCont = webCont.replace(\'<b>\', \'\').replace(\'</b>\', \'\') soup = BeautifulSoup(webCont) findlist = soup.find(\'span\', {\'class\': \'g\'}) if findlist: for each in findlist: if url in unicode(each): return url else: return None else: return None waittime = random.randint(1, 20) urllist = open(\'list.txt\') res = open(\'check.txt\', \'w\') for eachurl in urllist.readlines(): indexurl = unicode(checkIndex(eachurl)) + \'\n\' res.write(indexurl) time.sleep(waittime) urllist.close() res.close() print \'over!\'