-
Notifications
You must be signed in to change notification settings - Fork 87
/
lm.py
executable file
·70 lines (55 loc) · 1.71 KB
/
lm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#===============================================================================
#
# Copyright (c) 2017 <> All Rights Reserved
#
#
# File: /Users/hain/ai/book-of-qna-code/tmp/lm.py
# Author: Hai Liang Wang
# Date: 2018-05-28:16:06:56
#
#===============================================================================
"""
"""
from __future__ import print_function
from __future__ import division
__copyright__ = "Copyright (c) 2017 . All Rights Reserved"
__author__ = "Hai Liang Wang"
__date__ = "2018-05-28:16:06:56"
import os
import sys
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(curdir)
if sys.version_info[0] < 3:
reload(sys)
sys.setdefaultencoding("utf-8")
# raise "Must be using Python 3"
else:
unicode = str
# Get ENV
ENVIRON = os.environ.copy()
import kenlm
import math
import unittest
# run testcase: python /Users/hain/ai/book-of-qna-code/tmp/lm.py Test.testExample
class Test(unittest.TestCase):
'''
'''
def setUp(self):
print("加载LM模型 ...")
model_file = os.path.join(curdir, "ngrams.arpa.gz")
if not os.path.exists(model_file): raise BaseException("模型文件不存在!, 执行 gen_model.sh 生成模型文件。")
self.model = kenlm.Model(model_file)
def tearDown(self):
pass
def test_prob(self):
print("kenlm: 句子出现的概率")
print("保 险:", math.pow(10, self.model.score('保 险', bos = True, eos = True)))
def test_perplexity(self):
print("kenlm: 句子的困惑度")
print("保 险:", math.pow(10, self.model.perplexity('保 险')))
def test():
unittest.main()
if __name__ == '__main__':
test()