lda学习demo-testModel.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from gensim.models.ldamodel import LdaModel
from gensim import corpora
import fileinput
import re
import jieba

#加载模型
modelPath="model_lda1/model_lda1"
model=LdaModel.load(modelPath)

#加载需要预测的文本
fileName="data/test.txt"
a=""
for line in fileinput.input(fileName,openhook=fileinput.hook_encoded('utf-8', '')):
a=a+line


def stopwordsPattern():
stopwordsPatternList=[]
for i in open('data/stopWord.txt',encoding='UTF-8').readlines():
stopwordsPatternList.append(re.sub(r"\n","",i))
return stopwordsPatternList
def paperCut(intxt,pattern=stopwordsPattern()):
aList=jieba.lcut(intxt)
for i in aList:
if i in pattern:
aList.remove(i)
return aList


#对文本分词
aList=[]

aList.append(paperCut(a))

#文本向量化
wordDict=corpora.Dictionary(aList)
corpus=wordDict.doc2bow(aList[0])

#文本对比
doc_lda = model[corpus]
scoreList=[]
for a in doc_lda:
scoreList.append(a[1])
maxIndex=scoreList.index(max(scoreList))
print(doc_lda[maxIndex])
print(model.print_topics()[maxIndex])