import jieba
txt=open('threekingdoms.txt','r',encoding='gb18030').read()
excludes={'将军','却说','荆州','二人','不可','不能','如此'}
words=jieba.lcut(txt)
counts={}
for word in words:
if len(word)==1:
continue
elif word=='诸葛亮' or word=='孔明曰':
rword='孔明'
elif word=='关公' or word=='云长':
rword='关羽'
elif word=='玄德' or word=='玄德曰':
rword='刘备'
elif word=='孟德' or word=='丞相':
rword='曹操'
else:
rword=word
counts[rword]=counts.get(rword,0)+1
for word in excludes:
del counts[word]
items = list(counts.items())
items.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
word,count=items[i]
print('{0:<10}{1:>5}'.format(word,count))

统计文本中特定词汇的出现频率

 

相关文章:

  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-08-20
  • 2022-12-23
  • 2022-12-23
猜你喜欢
  • 2022-12-23
  • 2022-12-23
  • 2022-12-23
  • 2021-09-12
  • 2021-12-26
  • 2022-12-23
  • 2021-09-02
相关资源
相似解决方案