-
Notifications
You must be signed in to change notification settings - Fork 4
/
aribstr.py
343 lines (328 loc) · 11.9 KB
/
aribstr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
#!/usr/bin/python
# -*- coding: utf-8 -*-
import array
import sys
import StringIO
from aribgaiji import *
# import copy
class Code:
KANJI = 'KANJI'
ALPHANUMERIC = 'ALPHANUMERIC'
HIRAGANA = 'HIRAGANA'
KATAKANA = 'KATAKANA'
MOSAIC_A = 'MOSAIC_A'
MOSAIC_B = 'MOSAIC_B'
MOSAIC_C = 'MOSAIC_C'
MOSAIC_D = 'MOSAIC_D'
PROP_ALPHANUMERIC = 'PROP_ALPHANUMERIC'
PROP_HIRAGANA = 'PROP_HIRAGANA'
PROP_KATAKANA = 'PROP_KATAKANA'
JIS_X0201_KATAKANA = 'JIS_X0201_KATAKANA'
JIS_KANJI_PLANE_1 = 'JIS_KANJI_PLANE_1'
JIS_KANJI_PLANE_2 = 'JIS_KANJI_PLANE_2'
ADDITIONAL_SYMBOLS = 'ADDITIONAL_SYMBOLS'
UNSUPPORTED = 'UNSUPPORTED'
CODE_SET_G = {
0x42:(Code.KANJI, 2),
0x4A:(Code.ALPHANUMERIC, 1),
0x30:(Code.HIRAGANA, 1),
0x31:(Code.KATAKANA, 1),
0x32:(Code.MOSAIC_A, 1),
0x33:(Code.MOSAIC_B, 1),
0x34:(Code.MOSAIC_C, 1),
0x35:(Code.MOSAIC_D, 1),
0x36:(Code.PROP_ALPHANUMERIC, 1),
0x37:(Code.PROP_HIRAGANA, 1),
0x38:(Code.PROP_KATAKANA, 1),
0x49:(Code.JIS_X0201_KATAKANA, 1),
0x39:(Code.JIS_KANJI_PLANE_1, 2),
0x3A:(Code.JIS_KANJI_PLANE_2, 2),
0x3B:(Code.ADDITIONAL_SYMBOLS, 2),
}
CODE_SET_DRCS = {
0x40:(Code.UNSUPPORTED, 2), # DRCS-0
0x41:(Code.UNSUPPORTED, 1), # DRCS-1
0x42:(Code.UNSUPPORTED, 1), # DRCS-2
0x43:(Code.UNSUPPORTED, 1), # DRCS-3
0x44:(Code.UNSUPPORTED, 1), # DRCS-4
0x45:(Code.UNSUPPORTED, 1), # DRCS-5
0x46:(Code.UNSUPPORTED, 1), # DRCS-6
0x47:(Code.UNSUPPORTED, 1), # DRCS-7
0x48:(Code.UNSUPPORTED, 1), # DRCS-8
0x49:(Code.UNSUPPORTED, 1), # DRCS-9
0x4A:(Code.UNSUPPORTED, 1), # DRCS-10
0x4B:(Code.UNSUPPORTED, 1), # DRCS-11
0x4C:(Code.UNSUPPORTED, 1), # DRCS-12
0x4D:(Code.UNSUPPORTED, 1), # DRCS-13
0x4E:(Code.UNSUPPORTED, 1), # DRCS-14
0x4F:(Code.UNSUPPORTED, 1), # DRCS-15
0x70:(Code.UNSUPPORTED, 1), # MACRO
}
CODE_SET_KEYS = CODE_SET_DRCS.keys() + CODE_SET_G.keys()
ARIB_BASE = {
0x79:0x3C,
0x7A:0x23,
0x7B:0x56,
0x7C:0x57,
0x7D:0x22,
0x7E:0x26
}
ARIB_HIRAGANA_MAP = {
0x77:0x35,
0x78:0x36,
}
ARIB_KATAKANA_MAP = {
0x77:0x33,
0x78:0x34,
}
ARIB_KATAKANA_MAP.update(ARIB_BASE)
ARIB_HIRAGANA_MAP.update(ARIB_BASE)
# ひらがな カタカナ
# ゝ 35 ヽ 33
# ゞ 36 ヾ 34
# ー 3c ー 3c
# 。 23 。 23
# 「 56 「 56
# 」 57 」 57
# 、 22 、 22
# ・ 26 ・ 26
ESC_SEQ_ASCII = (0x1B, 0x28, 0x42)
ESC_SEQ_ZENKAKU = (0x1B, 0x24, 0x42)
ESC_SEQ_HANKAKU = (0x1B, 0x28, 0x49)
class Buffer:
G0 = 'G0'
G1 = 'G1'
G2 = 'G2'
G3 = 'G3'
class CodeArea:
LEFT = 'LEFT'
RIGHT = 'RIGHT'
class AribIndexError(Exception):
pass
class EscapeSequenceError(Exception):
pass
class DegignationError(Exception):
pass
class CodeSetController:
def __init__(self):
self.v_buffer = {
Buffer.G0:CODE_SET_G[0x42], # KANJI
Buffer.G1:CODE_SET_G[0x4a], # ALPHANUMERIC
Buffer.G2:CODE_SET_G[0x30], # HIRAGANA
Buffer.G3:CODE_SET_G[0x31], # KATAKANA
}
self.single_shift = None
self.graphic_left = Buffer.G0 # KANJI
self.graphic_right = Buffer.G2 # HIRAGANA
self.esc_seq_count = 0
self.esc_buffer_index = Buffer.G0
self.esc_drcs = False
def degignate(self, code):
if not code in CODE_SET_KEYS:
raise DegignationError, 'esc_seq_count=%i esc_buffer_index=%s code=0x%02X' % (
self.esc_seq_count, self.esc_buffer_index, code)
if self.esc_drcs:
self.v_buffer[self.esc_buffer_index] = CODE_SET_DRCS[code]
else:
self.v_buffer[self.esc_buffer_index] = CODE_SET_G[code]
self.esc_seq_count = 0
def invoke(self, buffer_index, area, locking_shift=True):
if CodeArea.LEFT == area:
if locking_shift:
self.graphic_left = buffer_index
else:
self.single_shift = buffer_index
elif CodeArea.RIGHT == area:
self.graphic_right = buffer_index
self.esc_seq_count = 0
def get_current_code(self, data):
if data >= 0x21 and data <= 0x7E:
if self.single_shift:
code = self.v_buffer[self.single_shift]
self.single_shift = None
return code
else:
return self.v_buffer[self.graphic_left]
elif data >= 0xA1 and data <= 0xFE:
return self.v_buffer[self.graphic_right]
return None
def set_escape(self, buffer_index, drcs):
if buffer_index != None:
self.esc_buffer_index = buffer_index
self.esc_drcs = drcs
self.esc_seq_count += 1
class AribArray(array.array):
esc_seq = None
def pop0(self):
try:
return self.pop(0)
except IndexError:
raise AribIndexError
def append_str(self, esc_seq, *string):
if self.esc_seq != esc_seq:
self.extend(esc_seq)
self.esc_seq = esc_seq
if len(string) > 1:
self.extend(string)
else:
self.append(string[0])
class AribString:
def __init__(self, array):
self.control = CodeSetController()
self.arib_array = AribArray('B', array)
self.jis_array = AribArray('B')
self.utf_buffer = StringIO.StringIO()
self.utf_buffer_symbol = StringIO.StringIO()
self.split_symbol = False
def convert_utf_split(self):
self.split_symbol = True
self.convert()
self.flush_jis_array()
return (self.utf_buffer.getvalue(), self.utf_buffer_symbol.getvalue())
def convert_utf(self):
self.convert()
self.flush_jis_array()
return self.utf_buffer.getvalue()
def flush_jis_array(self):
if len(self.jis_array) > 0:
uni = 'UnicodeDecodeError'
try:
uni = unicode(self.jis_array.tostring(), 'iso-2022-jp').encode('utf-8')
except UnicodeDecodeError:
pass
self.utf_buffer.write(uni)
self.jis_array = AribArray('B')
def convert(self):
while True:
try:
data = self.arib_array.pop0()
if self.control.esc_seq_count:
self.do_escape(data)
else:
if (data >= 0x21 and data <= 0x7E) or (data >= 0xA1 and data <= 0xFE):
# GL/GR Table
self.do_convert(data)
elif data in (
0x20, # space
0xA0, # space (arib)
0x09): # HT
self.jis_array.append_str(ESC_SEQ_ASCII, 0x20)
elif data in (
0x0D, # CR
0x0A): # LF
self.jis_array.append_str(ESC_SEQ_ASCII, 0x0A)
else:
# Control Character
self.do_control(data)
except AribIndexError:
break
return self.jis_array
def do_convert(self, data):
(code, size) = self.control.get_current_code(data)
char = data
char2 = 0x0
if size == 2:
char2 = self.arib_array.pop0()
if char >= 0xA1 and char <= 0xFE:
char = char & 0x7F
char2 = char2 & 0x7F
if code in (Code.KANJI, Code.JIS_KANJI_PLANE_1, Code.JIS_KANJI_PLANE_2):
# 漢字コード出力
self.jis_array.append_str(ESC_SEQ_ZENKAKU, char, char2)
elif code in (Code.ALPHANUMERIC, Code.PROP_ALPHANUMERIC):
# 英数字コード出力
self.jis_array.append_str(ESC_SEQ_ASCII, char)
elif code in (Code.HIRAGANA, Code.PROP_HIRAGANA):
# ひらがなコード出力
if char >= 0x77:
self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x21, ARIB_HIRAGANA_MAP[char])
else:
self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x24, char)
elif code in (Code.PROP_KATAKANA, Code.KATAKANA):
# カタカナコード出力
if char >= 0x77:
self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x21, ARIB_KATAKANA_MAP[char])
else:
self.jis_array.append_str(ESC_SEQ_ZENKAKU, 0x25, char)
elif code == Code.JIS_X0201_KATAKANA:
# 半角カタカナコード出力
self.jis_array.append_str(ESC_SEQ_HANKAKU, char)
elif code == Code.ADDITIONAL_SYMBOLS:
# 追加シンボル文字コード出力
self.flush_jis_array()
if self.split_symbol:
wchar = ((char << 8) + char2)
gaiji = GAIJI_MAP_TITLE.get(wchar)
if gaiji != None:
self.utf_buffer_symbol.write(gaiji)
else:
self.utf_buffer.write(GAIJI_MAP_OTHER.get(wchar, "??"))
else:
self.utf_buffer.write(GAIJI_MAP.get(((char << 8) + char2), "??"))
def do_control(self, data):
if data == 0x0F:
self.control.invoke(Buffer.G0, CodeArea.LEFT, True) # LS0
elif data == 0x0E:
self.control.invoke(Buffer.G1, CodeArea.LEFT, True) # LS1
elif data == 0x19:
self.control.invoke(Buffer.G2, CodeArea.LEFT, False) # SS2
elif data == 0x1D:
self.control.invoke(Buffer.G3, CodeArea.LEFT, False) # SS3
elif data == 0x1B:
self.control.esc_seq_count = 1
def do_escape(self, data):
if self.control.esc_seq_count == 1:
if data == 0x6E:
self.control.invoke(Buffer.G2, CodeArea.LEFT, True) # LS2
elif data == 0x6F:
self.control.invoke(Buffer.G3, CodeArea.LEFT, True) # LS3
elif data == 0x7E:
self.control.invoke(Buffer.G1, CodeArea.RIGHT, True) # LS1R
elif data == 0x7D:
self.control.invoke(Buffer.G2, CodeArea.RIGHT, True) # LS2R
elif data == 0x7C:
self.control.invoke(Buffer.G3, CodeArea.RIGHT, True) # LS3R
elif data == 0x24 or data == 0x28:
self.control.set_escape(Buffer.G0, False)
elif data == 0x29:
self.control.set_escape(Buffer.G1, False)
elif data == 0x2A:
self.control.set_escape(Buffer.G2, False)
elif data == 0x2B:
self.control.set_escape(Buffer.G3, False)
else:
raise EscapeSequenceError, 'esc_seq_count=%i data=0x%02X' % (
self.control.esc_seq_count, data)
elif self.control.esc_seq_count == 2:
if data == 0x20:
self.control.set_escape(None, True)
elif data == 0x28:
self.control.set_escape(Buffer.G0, False)
elif data == 0x29:
self.control.set_escape(Buffer.G1, False)
elif data == 0x2A:
self.control.set_escape(Buffer.G2, False)
elif data == 0x2B:
self.control.set_escape(Buffer.G3, False)
else:
self.control.degignate(data)
elif self.control.esc_seq_count == 3:
if data == 0x20:
self.control.set_escape(None, True)
else:
self.control.degignate(data)
elif self.control.esc_seq_count == 4:
self.control.degignate(data)
if __name__ == '__main__':
f = open(sys.argv[1], 'rb')
f.seek(0, 2)
byte = f.tell()
f.seek(0)
arr = array.array('B')
arr.fromfile(f, byte)
f.close()
arib = AribString(arr)
arib.convert()
f = open("output.txt", 'wb')
arib.jis_array.tofile(f)
f.close()