【发布时间】:2021-11-05 02:02:49
【问题描述】:
早上好,
Twitter 刮板,几个月来工作正常,现在出现以下错误。它还为该日期范围拉取有限的推文。任何帮助将不胜感激。
这是我在 Python 3.8 的 spyder 5.0.5 上运行时得到的代码。
直到 9 月它都运行良好。
错误代码
[SpyderKernelApp] ERROR | Exception in message handler:
Traceback (most recent call last):
File "C:\Users\james.coldman\Anaconda3\lib\site-packages\spyder_kernels\comms\frontendcomm.py", line 164, in poll_one
asyncio.run(handler(out_stream, ident, msg))
File "C:\Users\james.coldman\AppData\Roaming\Python\Python38\site-packages\nest_asyncio.py", line 32, in run
return loop.run_until_complete(future)
File "C:\Users\james.coldman\AppData\Roaming\Python\Python38\site-packages\nest_asyncio.py", line 60, in run_until_complete
f = asyncio.ensure_future(future, loop=self)
File "C:\Users\james.coldman\Anaconda3\lib\asyncio\tasks.py", line 673, in ensure_future
raise TypeError('An asyncio.Future, a coroutine or an awaitable is '
TypeError: An asyncio.Future, a coroutine or an awaitable is required
[SpyderKernelApp] ERROR | Exception in message handler:
Traceback (most recent call last):
File "C:\Users\james.coldman\Anaconda3\lib\site-packages\spyder_kernels\comms\frontendcomm.py", line 164, in poll_one
asyncio.run(handler(out_stream, ident, msg))
File "C:\Users\james.coldman\AppData\Roaming\Python\Python38\site-packages\nest_asyncio.py", line 32, in run
return loop.run_until_complete(future)
File "C:\Users\james.coldman\AppData\Roaming\Python\Python38\site-packages\nest_asyncio.py", line 60, in run_until_complete
f = asyncio.ensure_future(future, loop=self)
File "C:\Users\james.coldman\Anaconda3\lib\asyncio\tasks.py", line 673, in ensure_future
raise TypeError('An asyncio.Future, a coroutine or an awaitable is '
TypeError: An asyncio.Future, a coroutine or an awaitable is required
正在使用的完整脚本
import nest_asyncio
import twint
import pandas as pd
import re
import os
import random
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from wordcloud import WordCloud, STOPWORDS
nest_asyncio.apply()
RE_EMOJI = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE)
def strip_emoji(text):
return RE_EMOJI.sub(r'', text)
def sentiment_score(tweet):
score = analyser.polarity_scores(tweet)
return score['neg'], score['neu'], score['pos'], score['compound']
def scrape(search_term, start_date, end_date):
random_code = '%030x' % random.randrange(16**30)
#configuration
config = twint.Config()
config.Search = search_term
config.Lang = "en"
config.Limit = 1000000
config.Since = start_date
config.Until = end_date
config.Store_csv = True
config.Output = "twintTerms"+search_term+str(random_code)+".csv"
#running search
twint.run.Search(config)
#convert csv to excel file and append to df_list
df = pd.read_csv("twintTerms"+search_term+str(random_code)+".csv")
df['Search Term'] = search_term
df['Start Date'] = start_date
df['End Date'] = end_date
en = df[df['language'] == "en"]
#delete csv ready for next search term
if os.path.exists("twintTerms"+search_term+str(random_code)+".csv"):
os.remove("twintTerms"+search_term+str(random_code)+".csv")
else:
pass
return en
def fetch_scrapes(search_dict):
search_terms = search_dict['Search Term']
start_dates = search_dict['Start Date']
end_dates = search_dict['End Date']
responses = []
for key in search_terms.keys(): #build a list of futures for async to run
results = scrape(str(search_terms[key]), str(start_dates[key]), str(end_dates[key]))
responses.append(results)
return responses
if __name__ == "__main__":
print("Twitter Scrape Starting...")
#delete any leftover files
if os.path.exists("twitterResults.xlsx"):
os.remove("twitterResults.xlsx")
else:
pass
#create results directory
if not os.path.exists('data/results'):
os.makedirs('data/results')
#pull in search terms
search_df = pd.read_excel(r"data/TwitterTerms.xlsx")
search_dict = search_df.to_dict()
#run scrape
dataframe_list = fetch_scrapes(search_dict)
tweet_df = pd.concat(dataframe_list)
tweet_df = tweet_df[['date','time','tweet','replies_count','retweets_count','likes_count','Search Term','Start Date','End Date']]
tweet_df = tweet_df.drop_duplicates(['tweet','Search Term'])
#remove emojis
tweet_df['tweet'] = tweet_df['tweet'].apply(strip_emoji)
#sentiment analysis
analyser = SentimentIntensityAnalyzer()
tweet_df[['Negative','Neutral','Positive','Compound']] = tweet_df['tweet'].apply(lambda x : pd.Series(sentiment_score(x)))
tweet_df.to_excel(r"data/results/rawData.xlsx")
grouped = tweet_df.groupby(['Search Term','Start Date','End Date']).agg(['mean','count'])
grouped.to_csv(r'data/results/twitterResults.csv')
#Most frequent words across topics
extra_stopwords_list = [term.split(" ") for term in list(set(tweet_df['Search Term'].tolist()))]
extra_stopwords_list = [i for j in extra_stopwords_list for i in j]
words_list = []
for team in list(set(tweet_df['Search Term'].tolist())):
print(team)
team_df = tweet_df[(tweet_df['Search Term'] == team)]
raw_string = ' '.join(team_df['tweet'])
no_links = re.sub(r'http\S+', '', raw_string)
STOPWORDS = set(list(STOPWORDS) + extra_stopwords_list + ['amp'])
wordcloud = WordCloud(stopwords = STOPWORDS).generate(no_links)
most_popular = list(wordcloud.words_.keys())
most_popular = [word for word in most_popular if len(word) > 2]
perc_list = [[word,len(team_df[team_df['tweet'].str.contains(word)])] for word in most_popular]
words_list.append(perc_list)
df = pd.DataFrame()
df['Word']= [i[0] for i in perc_list]
df['Tweets Featured In'] = [i[1] for i in perc_list]
test = []
for lst in words_list:
for lst2 in lst:
word = lst2[0]
counter = []
for lst3 in words_list:
for lst4 in lst3:
if word == lst4[0]:
counter.append(word)
if len(counter) > 1:
test.append(word)
test = list(set(test))
df_list = []
for team in list(set(tweet_df['Search Term'].tolist())):
team_df = tweet_df[tweet_df['Search Term'] == team]
perc_list = [[word, (len(team_df[team_df['tweet'].str.contains(word)])/len(team_df))*100] for word in test]
df = pd.DataFrame()
df['Word'] = [i[0] for i in perc_list]
df[team] = [i[1] for i in perc_list]
df.set_index(team)
df_list.append(df)
words_df = pd.concat(df_list, axis = 1)
words_df.to_excel(r"data/results/wordFrequencies.xlsx")
print("Scrape Completed...")
【问题讨论】:
标签: python