#coding:utf-8 import time from selenium import webdriver from lxml import etree import sys reload(sys) sys.setdefaultencoding( "utf-8" ) friend = \'\' # 目的QQ号,目的QQ空间要求允许被访问 user = \'\' # 你的QQ号 pw = \'\' # 你的QQ密码 driver = webdriver.Chrome(executable_path=\'/Users/jiwu/Downloads/chromedriver\') driver.maximize_window() driver.get("http://i.qq.com") driver.switch_to.frame("login_frame") driver.find_element_by_id("switcher_plogin").click() driver.find_element_by_id("u").send_keys(user) driver.find_element_by_id("p").send_keys(pw) driver.find_element_by_id("login_button").click() driver.switch_to.default_content() driver.get("http://user.qzone.qq.com/" + friend + "/311") next_num = 0 while True: for i in range(1,6): height = 20000*i strWord = "window.scrollBy(0,"+str(height)+")" driver.execute_script(strWord) time.sleep(4) driver.switch_to.frame("app_canvas_frame") selector = etree.HTML(driver.page_source) divs = selector.xpath(\'//*[@id="msgList"]/li/div[3]\') with open(\'qq_word.txt\',\'a\') as f: for div in divs: qq_name = div.xpath(\'./div[2]/a/text()\') qq_content = div.xpath(\'./div[2]/pre/text()\') qq_time = div.xpath(\'./div[4]/div[1]/span/a/text()\') qq_name = qq_name[0] if len(qq_name)>0 else \'\' qq_content = qq_content[0] if len(qq_content)>0 else \'\' qq_time = qq_time[0] if len(qq_time)>0 else \'\' print(qq_name,qq_time,qq_content) f.write(qq_content+"\n") if driver.page_source.find(\'pager_next_\' + str(next_num)) == -1: break driver.find_element_by_id(\'pager_next_\' + str(next_num)).click() next_num += 1 driver.switch_to.parent_frame()
生成词云:
#coding:utf-8 from wordcloud import WordCloud import matplotlib.pyplot as plt import jieba def create_word_cloud(filename): text= open("{}.txt".format(filename)).read() wordlist = jieba.cut(text, cut_all=True) wl = " ".join(wordlist) wc = WordCloud( background_color="white", max_words=2000, font_path=\'/System/Library/Fonts/PingFang.ttc\', height= 1200, width= 1600, max_font_size=100, random_state=30, ) myword = wc.generate(wl) plt.imshow(myword) plt.axis("off") plt.show() wc.to_file(\'py_book.png\') if __name__ == \'__main__\': create_word_cloud(\'qq_word\')