-
Notifications
You must be signed in to change notification settings - Fork 12
/
dfc.py
342 lines (274 loc) · 12.1 KB
/
dfc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 4 19:14:50 2018
@author: majd
"""
import os
import nilearn.connectome
import numpy as np
from scipy import signal
import scipy.linalg
from scipy.linalg import eigh
from sklearn.cluster import KMeans,SpectralClustering,DBSCAN
from sklearn.covariance import EmpiricalCovariance,LedoitWolf,ShrunkCovariance,OAS
from pyriemann.clustering import Kmeans
from pyriemann.utils.base import sqrtm, invsqrtm, logm, expm
import statsmodels.stats.api as sms
from bct.algorithms.modularity import modularity_und
from bct.algorithms.modularity import community_louvain
from bct import grid_communities
def sliding_window_1d(a,ws=60,ss=1):
"""
Parameters
----------
a: array , 1-D (n_volumes)
ws: window width
ss: window step size, in samples. If not provided, window and step size are equal.
Returns
----------
out : array-like , 2-D (n_windows,ws)
Vector with all sliding windows
"""
if None is ss:
# no step size was provided. Return non-overlapping windows
ss = ws
# calculate the number of windows to return, ignoring leftover samples, and
# allocate memory to contain the samples
valid = len(a) - ws
nw = abs((valid) // ss)
out = np.ndarray((nw,ws),dtype = a.dtype)
for i in range(nw):
# "slide" the window along the samples
start = i * ss
stop = start + ws
out[i] = a[start : stop]
return out
def taper(window,ws=60,wtype='tukey'):
"""
Parameters
----------
window : array-like , array of sliding window indices
ws: window width
wtype: type of tapering function ( default : Tukey )
Returns
-------
out :
"""
if wtype == 'tukey':
taper= scipy.signal.tukey(ws,alpha=0.5,sym=True)
## Removing the average value before tapering (to avoid spurious correlations at the beginning and end of the windows)
window = window - np.mean(window)
out=window*taper
return out
def upper_tri(matrix):
mat_triu=[]
for i,x in enumerate(matrix):
cur_triu = (np.triu(x,1))
mat_triu.append(cur_triu[np.triu_indices_from(cur_triu,1)])
return mat_triu
def unupper_tri(X):
n_features=int((1+np.sqrt(1+8*X.shape[1]))//2)
centroids = np.zeros((X.shape[0],n_features,n_features))
for i,center in enumerate(X):
curmat=np.zeros((n_features,n_features))
curmat[np.triu_indices_from(curmat,1)]=center
centroids[i] = curmat+curmat.T
return centroids
def is_spd(A, tol=1e-8):
"""
Parameters
----------
A : Symmetrtic Square Matrix
Returns
---------
Boolean True or False
"""
E,V = scipy.linalg.eigh(A)
return np.all(E > -tol),E
def untangent_space(T, Cref):
"""Project a set of Tangent space vectors in the manifold according to the given reference point Cref
Parameters
----------
T: {array-like} ,The Tangent space , shape= ( NWindows X Mfeatures(*(Mfeatures-1)/2))
Cref: {array-like} ,The reference covariance matrix (Mfeatures X Mfeatures )
Returns
----------
covmats: {array-like} ,(Mfeatures X Mfeatures) SPD Matrice
"""
Nt, Nd = T.shape
Ne = (1+np.sqrt(1+8*Nd))/2
C12 = sqrtm(Cref)
idx = np.triu_indices_from(Cref,0)
covmats = np.zeros((Nt, Ne, Ne))
covmats[:, idx[0], idx[1]] = T
for i in range(Nt):
covmats[i] = np.diag(np.diag(covmats[i])) + np.triu(
covmats[i], 0) + np.triu(covmats[i], 0).T
covmats[i] = expm(covmats[i])
covmats[i] = np.dot(np.dot(C12, covmats[i]), C12)
return covmats
def conn_highvariance(allcovdata):
"""
-Identify windows with high variance in connectivity for each subject,
-calculate the average connectitivy (average of all edges)
-Define a 95% confidence interval on this average
-Select data points outside (higher values)
Parameters
----------
allcovdata:{array-like} , Connectivity matrices of all subjects ,
shape =(Subjects X NWindows X Mfeatures X Mfeatures )
Returns
----------
mtd_allsubj_highvar:{array-like} , All windows of High Variance , shape= (Windows X Mfeatures X Mfeatures)
"""
mtd_allsubj_highvar = []
# High variance windows for each subject
var_mtd_allsubj =[]
for curcov in allcovdata:
# calculate variance of connectivity intra subject
var_mtd_allsubj.append([a.mean() for a in curcov])
# Extract points with high variance ( > 95 % confidence interval )
for cur_i,curvarmtd in enumerate(var_mtd_allsubj):
a = sms.DescrStatsW(curvarmtd)
_,high= a.tconfint_mean()
ind_highvar = np.argwhere(curvarmtd>high)
# select the covdata for these points only
curcov = allcovdata[cur_i]
mtd_allsubj_highvar.append(curcov[ind_highvar])
return np.vstack(mtd_allsubj_highvar)
def dfc_slid_window(X,ws,ss=1):
"""
Computes Sliding-window time-series per subject per region.
Applies a Tukey window for tapering
Parameters
----------
X: {array-like}, shape = (n_subjects , n_volumes , n_regions)
resting state BOLD time-series for subjects
ws : Window size
ss: Sliding step size (default=1)
Returns
-------
slwin_ts : Array-like (n_subjects,n_windows,ws,n_regions)
"""
nsubj=X.shape[0] # number of subjects
nvolm=X.shape[1] # number of volumes
nfeat=X.shape[2] # number of brain regions
slwin_ts=np.ndarray((nsubj,np.int16(np.ceil((nvolm-ws)//ss)),ws,nfeat))
for idx,s in enumerate(X):
fulltimewin = np.arange(nvolm,dtype='int32')
swins= sliding_window_1d(a=fulltimewin,ws=ws,ss=ss)
n_slwin = swins.shape[0] #number of sliding windows
slwin_ts[idx]=np.empty((n_slwin,ws,nfeat))
for n, curwin in enumerate(swins):
cur_ts = s[curwin,:]
slwin_ts[idx][n]=np.ndarray((ws,nfeat))
for i in range(nfeat):
slwin_ts[idx][n][:,i]= taper(cur_ts[:,i],ws)
return slwin_ts,slwin_ts.shape[1]
def labels_to_FC(FCmatrices,clusterlabels):
"""
Averages FC matrices according to clustering labels.
Returns an average state estimated for each subject
Parameters
----------
FCmatrices: {array-like}, shape = (n_subjects , n_sl , n_regions,n_regions)
Functional connectivity matrices obtained from n_sl sliding windows, for a group of n_subjects subjects
clusterlabels: {array-like}, shape = (n_subjects , n_sl)
Estimated labels for each FCmatrix, per subject
Returns
-------
dfcStates : Array-like (n_states,n_subjects,n_regions,n_regions)
Average state estimated for each subject
"""
nstates = np.shape(np.unique(clusterlabels))[0]
nroi = FCmatrices.shape[2]
nsubj = FCmatrices.shape[0]
dfcKmeans_meanstate = np.ndarray((nstates,nsubj,nroi,nroi))
for curstate in range(nstates):
for s,cursubjFC in enumerate(FCmatrices):
labels_list = clusterlabels[s] == curstate
dfcKmeans_meanstate[curstate,s] = np.mean(cursubjFC[labels_list],axis=0)
return dfcKmeans_meanstate
class DynamicConnectivityKmeans():
""" Dynamic Connectivity Estimator using two passes of Kmeans on FC matrices
Parameters
-----------
cov_estimator : Method of Estimating covariance/connectivity ( default= EmpiricalCovariance)
n_states: Number of Dynamic Connectivity States
ws: sliding window width
ss: sliding window step size
kinds : list of connectivity measures
Attributes
-----------
connectivity: {array-like}, Windowed Connectivity Measures for Subjects ,
shape=(n_subjects X n_windows X n_features X n_features)
states : {array-like} ,Dynamic States , shape=(n_states , n_features , n_features)
labels : {array-like} , Kmeans Labels, shape=(1 , n_windows)
inertia : Kmeans inertia value
mean_: {array_like}, Geometric mean for the tangent kind , shape=(n_features , n_features)
"""
def __init__(self,cov_estimator=EmpiricalCovariance(store_precision=True,
assume_centered=False),n_states=2,ws=60,ss=1,kinds=['correlation'],
saveas='file'):
self.cov_estimator=cov_estimator
self.n_states=n_states
self.ws=ws
self.ss=ss
self.kinds=kinds
self.saveas=saveas
def fit(self,X):
"""
Parameters
-----------
X: {array-like}, shape = (n_subjects , n_volumes , n_features)
resting state BOLD time-series for subjects
Returns
----------
self : object
"""
if not os.path.exists(self.saveas):
#Checking if input Time series Matrix is 2D for each subject
subjects_types = [type(s) for s in X]
if set(subjects_types) != set([np.ndarray]):
raise ValueError("Each subject must be 2D numpy.ndarray.\n You "
"provided {0}".format(str(subjects_types)))
#Initializing
self.connectivity,self.states,self.labels,self.inertia=dict(),dict(),dict(),dict()
connectivity=dict()
nsubj=X.shape[0] # number of subjects
nvolm=X.shape[1] # number of volumes
nfeat=X.shape[2] # number of brain regions
ws=self.ws
ss=self.ss
#Estimating Dynamic Connectivity
for kind in self.kinds:
estimator = nilearn.connectome.ConnectivityMeasure(kind=kind,cov_estimator=self.cov_estimator,
vectorize=True,discard_diagonal=True)
slwin_ts,n_slwin=dfc_slid_window(X,ws=ws,ss=ss) # Windowed BOLD time-series
connectivity[kind] = np.ndarray((nsubj*n_slwin,(nfeat*(nfeat-1))//2))
connectivity[kind] = estimator.fit_transform(slwin_ts.reshape(nsubj*n_slwin,ws,nfeat))
#Extracting matrices with high-variance
highvar_mats=connectivity[kind].reshape(nsubj,n_slwin,(nfeat*(nfeat-1))//2)
highvar_mats=np.vstack(conn_highvariance(highvar_mats))
# 2-level KMeans clustering
clustering_lvl_1=KMeans(n_clusters=self.n_states, init='k-means++', n_init=150, max_iter=300,
precompute_distances='auto', verbose=0, random_state=None,
copy_x=True, algorithm='auto').fit(highvar_mats) # 1st level
clustcenters=clustering_lvl_1.cluster_centers_ # 1st level cluster centers
clustering_lvl_2=KMeans(n_clusters=self.n_states, init=clustcenters, n_init=150, max_iter=300,
precompute_distances='auto', verbose=0, random_state=None,
copy_x=True, algorithm='auto').fit(connectivity[kind]) # 2nd level
#Finalizing
self.connectivity[kind]= unupper_tri(connectivity[kind])
self.states[kind] =unupper_tri(clustering_lvl_2.cluster_centers_)
self.labels[kind]= clustering_lvl_2.labels_.reshape(nsubj,n_slwin)
self.inertia[kind]=clustering_lvl_2.inertia_
#Projecting from Tangent space to SPD for the tangent kind
if kind=='tangent':
self.mean_=unupper_tri(estimator.mean_)
self.states[kind]=untangent_space(self.states[kind],Cref=self.mean_)
np.savez(self.saveas,self)
else:
data=np.load(self.saveas)
self=data['arr_0'].flatten()[0]
return self