-
Notifications
You must be signed in to change notification settings - Fork 5
/
WriteILIinput.py
139 lines (112 loc) · 5.18 KB
/
WriteILIinput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# from pyimzml.ImzMLParser import ImzMLParser, getionimage
# from sm_analytics_python.sm_annotation_utils import sm_annotation_utils as smau
from metaspace import sm_annotation_utils as smau
import numpy as np
import csv
import glob, os, tqdm
import pandas as pd
def preCSVdatagen(xy_p, radius, nbin, PlainFirst):
"""Format the data before generating the csv input for ili'.
Args:
xy_p (str): path to the X and Y coordiantes of ablation marks .npy file.
radius (int): displayed radius of the marks in ili'.
nbin (int): bin factor used to bin the image for ili'.
PlainFirst (bool): intensity values of each datapoints are equal to 1. Used to visualize the ablation mark
coordinates on the postMALDI brighfield in ili'.
Returns:
data (list): formatted data (2D).
"""
X, Y = np.load(xy_p)
Xs = X /( nbin) # todo check relevance of Y <-> X
Ys = Y /( nbin)
Ys = Ys - np.min(Ys)
Xs = Xs - np.min(Xs)
Rs = np.ones(np.shape(Xs)) * radius
data = []
data.append(list(np.append('Num', list(range(np.shape(Xs.ravel())[0])))))
data.append(list(np.append('X', Ys.ravel())))
data.append(list(np.append('Y', Xs.ravel())))
data.append(list(np.append('Z', np.zeros(np.shape(Xs.ravel())))))
data.append(list(np.append('R', Rs.ravel())))
if PlainFirst:
data.append(list(np.append('Flat', np.ones(np.shape(Xs.ravel())))))
return data
def writeCSV(path, data):
"""Writes the formatted data in a csv file.
Args:
path (str): str of the csv file to write.
data (list): data to write (2D).
"""
data_csv = list(zip(*data))
with open(path, 'w') as testfile:
cw = csv.writer(testfile)
for i in range(np.shape(data_csv)[0]):
cw.writerow(data_csv[i])
def annotationSM2CSV(MFA, MFI, fdr, nbin, radius, tf_obj, db='HMDB-v4'):
"""Fetches annotation images from METASPACE (http://metaspace2020.eu/#/about) and writes intensity values of
each ablation marks in a csv input for ili' (https://ili.embl.de/). Used to visualize the ion signal on the
preMALDI microsocpy after registration and validate the geometric transform to apply to the ion image.
Args:
MFA (str): path to Main Folder Analysis.
MFI (str): path to Main Folder Input.
fdr (float): fdr threshold value can only be 0.05, 0.1, 0.2 and 0.5.
nbin (int): bin factor used to bin the image for ili'.
radius (int): displayed radius of the marks in ili'.
tf_obj (function): Image transformation to apply on ion image for registration.
"""
def CSVdatagen(data, results, d):
"""Writes intensity values of each ablation marks in a csv input for ili'.
TODO finish documentation
Args:
data (list): data to populate with ion intensities (2D).
results (): .
d (): .
Returns:
data (list): data to write in csv input for ili.
"""
ind = 0
for i, row in enumerate(results.reset_index().itertuples()):
images = d.isotope_images(row.formula, row.adduct)
print(row.formula)
data.append(list(np.append(row[1], tf_obj(images[0]).ravel())))
ind += 1
return data
# config = {
# 'graphql_url': 'http://staging.metaspace2020.eu/graphql',
# 'moldb_url': 'http://staging.metaspace2020.eu/mol_db/v1',
# 'jwt': None}
sm = smau.SMInstance()
os.chdir(MFI + 'MALDI/')
ds_name = glob.glob('*.imzML')[0].replace('.imzML', '')
d = sm.dataset(ds_name)
results = sm.msm_scores([d], d.annotations(database=db, fdr=fdr), db_name=db).T
predata = preCSVdatagen(MFA + 'Fiducials/transformedMarks.npy', radius, nbin, PlainFirst=False)
data_csv = CSVdatagen(predata, results, d)
writeCSV(path = MFA + '/ili/sm_annotation_detections.csv', data = data_csv)
def annotationSM2CSV_offline(MF,
tf_obj,
hdf5_path=r'F:\Google Drive\A-Team\projects\1c\hepatocytes_40samples, DKFZ\datasets/',
on_sample_list_path=r"F:\Google Drive\A-Team\projects\1c\hepatocytes_40samples, DKFZ\KATJAnMANUAL_ON_sample_annotations.csv"):
MF = r'F:\Experiments\20171106_Hepa_Nov_ANALYSIS_PAPER\F3/'
os.chdir(MF + 'Input/MALDI/')
imzml_name = glob.glob('*.imzML')[0]
ds_name = imzml_name.replace('.imzML', '')
if os.path.isdir(hdf5_path):
df_im0 = pd.concat([pd.read_hdf(p) for p in glob.glob(hdf5_path + '*.hdf5')])
else:
df_im0 = pd.read_hdf(hdf5_path)
df_im = df_im0[df_im0['ds_name'] == ds_name].reset_index()
on_mol_df = pd.read_csv(on_sample_list_path)
Xs, Ys = np.load(MF + 'Analysis/Fiducials/transformedMarks.npy')
Ys = Ys - np.min(Ys)
Xs = Xs - np.min(Xs)
ili_df = pd.DataFrame()
ili_df['Num'] = list(range(len(Xs)))
ili_df['X'] = Ys
ili_df['Y'] = Xs
ili_df['Z'] = np.ones(len(Xs)) * 0
ili_df['R'] = np.ones(len(Xs)) * 20
for i in tqdm.tqdm(df_im.index):
mol_name = '{}, {}'.format(df_im.loc[i, 'mol_formula'], df_im.loc[i, 'adduct'])
ili_df[mol_name] = tf_obj(df_im.loc[i, 'image']).ravel()
ili_df.to_csv(MF + 'Analysis/ili/offline_on_sample.csv', index=False)