-
Notifications
You must be signed in to change notification settings - Fork 0
/
054.py
executable file
·35 lines (24 loc) · 939 Bytes
/
054.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
54. 品詞タグ付け
Stanford Core NLPの解析結果XMLを読み込み,単語,レンマ,品詞をタブ区切り形式で
出力せよ.
"""
import sys
from lxml import etree
def one_liner(infile):
[sys.stdout.write('\t'.join(elem.text for elem in token.iter('word', 'lemma', 'POS')) + '\n') for token in etree.parse(infile).iterfind('//token')]
def main():
for token in etree.parse(sys.stdin).iterfind('//token'):
print '\t'.join(elem.text for elem in token.iter('word', 'lemma', 'POS'))
# context = etree.iterparse(sys.stdin, tag='token')
# for _, token in context:
# print '\t'.join(elem.text for elem in token.iter('word', 'lemma', 'POS'))
# Natural natural JJ
# language language NN
# processing processing NN
# From from IN
# Wikipedia Wikipedia NNP
if __name__ == '__main__':
main()