1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
| from gensim.models.ldamodel import LdaModel from gensim import corpora import fileinput import re import jieba
modelPath="model_lda1/model_lda1" model=LdaModel.load(modelPath)
fileName="data/test.txt" a="" for line in fileinput.input(fileName,openhook=fileinput.hook_encoded('utf-8', '')): a=a+line
def stopwordsPattern(): stopwordsPatternList=[] for i in open('data/stopWord.txt',encoding='UTF-8').readlines(): stopwordsPatternList.append(re.sub(r"\n","",i)) return stopwordsPatternList def paperCut(intxt,pattern=stopwordsPattern()): aList=jieba.lcut(intxt) for i in aList: if i in pattern: aList.remove(i) return aList
aList=[]
aList.append(paperCut(a))
wordDict=corpora.Dictionary(aList) corpus=wordDict.doc2bow(aList[0])
doc_lda = model[corpus] scoreList=[] for a in doc_lda: scoreList.append(a[1]) maxIndex=scoreList.index(max(scoreList)) print(doc_lda[maxIndex]) print(model.print_topics()[maxIndex])
|