【发布时间】:2017-10-30 13:51:35
【问题描述】:
尝试执行以下代码时出现以下错误。我该如何解决?
Traceback(最近一次调用最后一次): 文件“C:\Users\vaio\Desktop\coding\Twitter-Sentiment-Analysis-master\Twitter-Sentiment-Analysis-master\tweet_sentiment.py”,第 64 行,在 主要的() 文件“C:\Users\vaio\Desktop\coding\Twitter-Sentiment-Analysis-master\Twitter-Sentiment-Analysis-master\tweet_sentiment.py”,第 53 行,主要 sent_file = 打开(sys.argv[1]) IndexError:列表索引超出范围 [0.1s 完成,退出码 1]
import sys
import json
import ast
import re
def calcScoreFromTerm(termScoreFile): # returns a dictionary with term-score values
scores ={}
for line in termScoreFile:
term, score = line.split("\t")
scores[term] = float(score)
return scores
def getTweetText(tweet_file): #returns a list of all tweets
tweets = []
for line in tweet_file:
# print line
jsondata = json.loads(line)
if "text" in jsondata.keys():
tweets.append(jsondata["text"])
tweet_file.close()
return tweets
def filterTweet(et):
# Remove punctuations and non-alphanumeric chars from each tweet string
pattern = re.compile('[^A-Za-z0-9]+')
et = pattern.sub(' ', et)
#print encoded_tweet
words = et.split()
# Filter unnecessary words
for w in words:
if w.startswith("RT") or w.startswith("www") or w.startswith("http"):
words.remove(w)
return words
def getTweetSentiments(tweets, scores): #returns a list of sentiments
sentiments = []
for tweet in tweets:
sentiment = 0.0
tweet = tweet.encode('utf-8')
wordsInTweet = filterTweet(tweet) # re.split('\W+',tweet)
for eachWord in wordsInTweet:
if eachWord in scores:
sentiment += scores[eachWord]
sentiments.append(sentiment)
return sentiments
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
scores = calcScoreFromTerm(sent_file)
tweets = getTweetText(tweet_file)
sentiments = getTweetSentiments(tweets, scores)
for sentiment in sentiments:
print sentiment
if __name__ == '__main__':
main()
【问题讨论】:
标签: python twitter nlp geocoding sentiment-analysis