-
Notifications
You must be signed in to change notification settings - Fork 32
/
youdao.py
143 lines (113 loc) · 4.25 KB
/
youdao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# -*- coding: utf-8 -*-
import urllib, re, collections, xml.etree.ElementTree as ET
import sys, json
import io, platform
try:
from urllib.parse import urlparse, urlencode
from urllib.request import urlopen, Request
from urllib.error import HTTPError
except ImportError:
from urlparse import urlparse
from urllib import urlencode
from urllib2 import urlopen, Request, HTTPError
def str_encode(word):
if sys.version_info >= (3, 0):
return word
else:
return word.encode('utf-8')
def str_decode(word):
if sys.version_info >= (3, 0):
return word
else:
return word.decode('utf-8')
def bytes_decode(word):
if sys.version_info >= (3, 0):
return word.decode()
else:
return word
def url_quote(word):
if sys.version_info >= (3, 0):
return urllib.parse.quote(word)
else:
return urllib.quote(word.encode('utf-8'))
WARN_NOT_FIND = " 找不到该单词的释义"
ERROR_QUERY = " 有道翻译查询出错!"
NETWORK_ERROR = " 无法连接有道服务器!"
QUERY_BLACK_LIST = ['.', '|', '^', '$', '\\', '[', ']', '{', '}', '*', '+', '?', '(', ')', '&', '=', '\"', '\'', '\t']
def preprocess_word(word):
word = word.strip()
for i in QUERY_BLACK_LIST:
word = word.replace(i, ' ')
array = word.split('_')
word = []
p = re.compile('[a-z][A-Z]')
for piece in array:
lastIndex = 0
for i in p.finditer(piece):
word.append(piece[lastIndex:i.start() + 1])
lastIndex = i.start() + 1
word.append(piece[lastIndex:])
return ' '.join(word).strip()
def get_word_info(word):
word = preprocess_word(word)
if not word:
return ''
try:
r = urlopen('http://dict.youdao.com' + '/fsearch?q=' + url_quote(word))
except IOError:
return NETWORK_ERROR
if r.getcode() == 200:
doc = ET.fromstring(r.read())
phrase = doc.find(".//return-phrase").text
p = re.compile(r"^%s$" % word, re.IGNORECASE)
if p.match(phrase):
info = collections.defaultdict(list)
if not len(doc.findall(".//content")):
return WARN_NOT_FIND
for el in doc.findall(".//"):
if el.tag in ('return-phrase', 'phonetic-symbol'):
if el.text:
info[el.tag].append(el.text.encode("utf-8"))
elif el.tag in ('content', 'value'):
if el.text:
info[el.tag].append(el.text.encode("utf-8"))
for k, v in info.items():
info[k] = b' | '.join(v) if k == "content" else b' '.join(v)
info[k] = bytes_decode(info[k])
tpl = ' %(return-phrase)s'
if info["phonetic-symbol"]:
tpl = tpl + ' [%(phonetic-symbol)s]'
tpl = tpl + ' %(content)s'
return tpl % info
else:
try:
r = urlopen("http://fanyi.youdao.com" + "/translate?i=" + url_quote(word), timeout=5)
except IOError:
return NETWORK_ERROR
p = re.compile(r"global.translatedJson = (?P<result>.*);")
r_result = bytes_decode(r.read())
s = p.search(r_result)
if s:
r_result = json.loads(s.group('result'))
if r_result is None:
return str_decode(s.group('result'))
error_code = r_result.get("errorCode")
if error_code is None or error_code != 0:
return str_decode(s.group('result'))
translate_result = r_result.get("translateResult")
if translate_result is None:
return str_decode(s.group('result'))
translate_result_tgt = ''
for i in translate_result:
translate_result_tgt = translate_result_tgt + i[0].get("tgt") + "\n"
return translate_result_tgt
else:
return ERROR_QUERY
else:
return ERROR_QUERY
if __name__ == "__main__":
if(platform.system()=='Windows'):
sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8')
argv = sys.argv
info = get_word_info(str_decode("".join(argv[1:])))
sys.stdout.write(info)