-
Notifications
You must be signed in to change notification settings - Fork 2
/
DBUtil.py
executable file
·118 lines (106 loc) · 4.48 KB
/
DBUtil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
连接MySQL数据库,是Redis的备用方案,在Redis失效的时候使用。
'''
import MySQLdb
import time
from redis_util import RedisUtil
import logging.config
import utils
logging.config.fileConfig('logger.conf')
logger = logging.getLogger('recommServerLog')
class DB(object):
def __init__(self):
if utils.get_host_ip() == '10.1.13.49':
self.HOST = '10.1.13.29'
else:
self.HOST = '202.107.204.50'
self.conn = MySQLdb.connect(host=self.HOST, user='root', passwd='tdlabDatabase', db='techpooldata',
port=3306, charset='utf8')
self.tables = {'paper': 'expert_paper_join', 'patent': 'expert_patent_join', 'project': 'expert_project_join'}
self.columns = {'paper': 'PAPER_ID', 'patent': 'PATENT_ID', 'project': 'PROJECT_ID'}
self.redis = RedisUtil()
def getConnection(self):
try:
self.conn.ping()
except:
self.conn = MySQLdb.connect(host=self.HOST, user='root', passwd='tdlabDatabase', db='techpooldata',
port=3306, charset='utf8')
print 'reconnection'
return self.conn
# 返回以docId为key,以authors为value的map
def getAuthors(self, typee, ids):
begin = time.time()
if len(ids) == 0:
return {}
authorIds = {}
sql = "select " + self.columns[typee] + ",EXPERT_ID from " + self.tables[typee] + " where " + self.columns[
typee] + " in("
for i in range(len(ids)):
sql = sql + "'" + ids[i] + "'"
if i != len(ids) - 1:
sql = sql + ','
sql = sql + ') order by ' + self.columns[typee] + ',expert_role'
cur = self.getConnection().cursor()
cur.execute(sql)
results = cur.fetchall()
cur.close()
for line in results:
if line[0] not in authorIds:
authorIds[line[0]] = []
authorIds[line[0]].append(line[1])
end = time.time()
print '############# time in ' + typee + ' getAuthors()' + str(end - begin)
return authorIds
# 返回id是否在数据库中,因为有时数据不同步
def idInDB(self, typee, id):
cur = self.getConnection().cursor()
sql = "select * from " + self.tables[typee] + " where " + self.columns[typee] + " ='" + id + "'"
count = cur.execute(sql)
cur.close()
return count > 0
# 返回一个作者的论文,包括非第一作者
def getPapers(self, expertId):
sql = "select p.PAPER_ID, p.name, p.abstract,p.authors, p.journal_name, p.year from expert_paper_join j JOIN paper p on j.PAPER_ID=p.PAPER_ID where j.EXPERT_ID='" + expertId + "';"
cur = self.getConnection().cursor()
cur.execute(sql)
r = cur.fetchall()
result = []
for line in r:
if line[2] is None:
abstract = ''
else:
abstract = line[2]
result.append([line[0], line[1], abstract, None, line[3], line[4], line[5]])
return result
# 返回一个作者的专利,包括非第一作者
def getPatents(self, expertId):
sql = "select p.PATENT_ID, p.name, p.abstract, p.application_no, p.inventors, p.applicant, p.year from expert_patent_join j JOIN patent p on j.PATENT_ID=p.PATENT_ID where j.EXPERT_ID='" + expertId + "';"
cur = self.getConnection().cursor()
cur.execute(sql)
r = cur.fetchall()
result = []
for line in r:
if line[2] is None:
abstract = ''
else:
abstract = line[2]
result.append([line[0], line[1], abstract, None, line[3], line[4], line[5], line[6]])
return result
# 返回一个作者的项目,包括非第一作者
def getProjects(self, expertId):
sql = "select p.PROJECT_ID, p.name, p.abstract_ch, p.member, p.unit, p.year, p.type from expert_project_join j JOIN project p on j.PROJECT_ID=p.PROJECT_ID where j.EXPERT_ID='" + expertId + "';"
cur = self.getConnection().cursor()
cur.execute(sql)
r = cur.fetchall()
result = []
for line in r:
if line[2] is None:
abstract = ''
else:
abstract = line[2]
result.append([line[0], line[1], abstract, None, line[3], line[4], line[5], line[6]])
return result
def __del__(self):
self.conn.close()