-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
110 lines (79 loc) · 2.47 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import re
import numpy as np
from sqlalchemy import create_engine
def check_input_data(data):
"""
This function is used to check if input data are accepted or not
Args:
data: input data
Returns: True or false
"""
if 'uri' in data.keys() and 'type' in data.keys() and 'part' in data.keys() and 'index' in data.keys():
return True
else:
return False
def rdbms_check_if_uri_is_valid(input_uri, part):
"""
This function test if the provided connection URI is valid
Args:
input_uri: input uri
part: provided table
Returns: True/False
"""
try:
engine = create_engine(input_uri)
table_names = engine.table_names()
if part in table_names:
return True
else:
return False
except Exception as ex:
return False
def map_dtype_to_elk_type(df_type):
"""
This function is used to map data frame types to elastic search types
Args:
df_type: provided type
Returns: elastic search data type
"""
if df_type == np.int64:
return_type = {'type': 'integer'}
elif df_type == np.float64:
return_type = {'type': 'float'}
elif df_type == np.object:
return_type = {'type': 'text'}
elif df_type == np.bool:
return_type = {'type': 'boolean'}
elif df_type == np.datetime:
return_type = {'type': 'date'}
return return_type
def df_lookup(data_frame):
"""
This function is used to find data frame types
Args:
data_frame: provided data frame
Returns: processed data frame
"""
data_frame_types = data_frame.dtypes
type_items = data_frame_types.items()
transformed_types = dict(map(lambda element: (element[0], map_dtype_to_elk_type(element[1])), type_items))
return transformed_types
def replace_nan_in_files(data_frame):
"""
This function is used to remove NaN in provided files
Args:
data_frame: data frame
Returns: dataframe without NaN
"""
df_without_nan = data_frame.replace(np.nan, '', regex=True)
return df_without_nan
def split_camel_case(input_string):
"""
This function is used to transform camel case words to more words
Args:
input_string: camel case string
Returns: Extracted words from camel case
"""
splitted = re.sub('([A-Z][a-z]+)', r' \1', re.sub('([A-Z]+)', r' \1', input_string)).split()
joined_string = " ".join(splitted)
return joined_string