-
Notifications
You must be signed in to change notification settings - Fork 1
/
PyExcelToBSON.py
108 lines (82 loc) · 3.06 KB
/
PyExcelToBSON.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import bson
import json
import os
import shutil
from openpyxl import load_workbook
def set_parser():
parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", help="path of excel file", required=True)
parser.add_argument("-d", "--debug", help="export to json file", action="store_true")
parser.add_argument("-o", "--output", help="path of output directory", required=True)
parser.add_argument("-s", "--suffix", help="suffix of output file", required=False)
parser.add_argument("-c", "--clean", help="clean up output", action="store_true")
args = parser.parse_args()
if not args.input:
print("PARSER ERROR] arguments is wrong!")
exit(1)
return args
args = set_parser()
wb = load_workbook(filename=args.input, data_only=True)
if os.path.exists(args.output):
if args.clean:
shutil.rmtree(args.output, ignore_errors=True)
os.mkdir(args.output)
else:
os.mkdir(args.output)
suffix = ''
if args.suffix is not None:
suffix = args.suffix
for sheet in wb:
if sheet.title[0] == '_':
print("INFO] skipped sheet - " + sheet.title)
continue
json_dict = {}
print("INFO] start convert sheet - {}".format(sheet.title))
with open("{}/{}{}.bson".format(args.output, sheet.title, suffix), mode='wb') as bson_file:
data_list = []
key_list = []
rowIndex = -1
for row in sheet:
colIndex = 0
rowIndex = rowIndex + 1
if rowIndex < 4:
continue
if rowIndex == 4:
for col in row:
if col.value is None:
break
key_list.append(str(col.value))
continue
key_index = 0
key_list_len = len(key_list)
data_dict = {}
for col in row:
if key_list_len <= key_index:
break
if key_index == 0 and col.value is None:
break
if col.value is None:
formula = col.TYPE_FORMULA_CACHE_STRING
if formula == 'int':
col.value = 0
elif formula == 'float':
col.value = 0.0
else:
col.value = ''
data_dict[key_list[key_index]] = col.value
key_index = key_index + 1
if key_index > 0:
data_list.append(data_dict)
json_dict['data'] = data_list
bson_file.write(bson.dumps(json_dict))
if args.debug:
with open("{}/{}{}.json".format(args.output, sheet.title, suffix), mode='w', encoding='utf-8') as json_file:
json.dump(json_dict, json_file, indent=4, sort_keys=True, ensure_ascii=False)
print("INFO] end convert process - {}".format(sheet.title))
# NOTE(jjo): for test
#with open(sheet.name + '.bson', 'rb') as bson_f:
# data = bson_f.read()
# print(bson.loads(data))