10行代码使用python统计词频

# -*- coding: utf-8 -*-
#!/usr/bin/env python
import re f = open("C:\\Users\\陶敏\\Documents\\Pyscript\\test.txt")
str = f.read()
li = re.split(r'[, ;.\n\t]',str)
for i in li:
if(len(i))==0:
li.remove(i) res_world = []
res_count = [] for i in li:
if i not in res_world:
res_world.append(i)
res_count.append(1)
else:
res_count[res_world.index(i)]+=1 for i in range(3):
print(res_world[res_count.index(max(res_count))],max(res_count))
res_world.pop(res_count.index(max(res_count)))
res_count.pop(res_count.index(max(res_count)))

使用哈利波特文档作为测试文件,下载地址:https://pan.baidu.com/share/link?shareid=424773&uk=3744444146

测试结果如下

C:\python\python36\python3.exe C:/Users/陶敏/PycharmProjects/day1/.idea/cipin.py
the 3305
to 1841
and 1797
上一篇:[Objective-C] 012_数据持久化_XML属性列表,NSUserDefaults


下一篇:com.mchange.v2.async.ThreadPoolAsynchronousRunner$DeadlockDetector APPARENT DEADLOCK