forked from neurolabusc/NiiStat
-
Notifications
You must be signed in to change notification settings - Fork 1
/
nii_stat_svr_core_xls.m~Updated upstream
303 lines (288 loc) · 10.8 KB
/
nii_stat_svr_core_xls.m~Updated upstream
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
function [r, z_map, labels, predicted_labels, p] = nii_stat_svr_core (xlsname, clipping, normRowCol, verbose, minUnique, islinear, nuSVR, deleteCols)
% xlsname : file name to analyze
% clipping: clip weights to positive (1), negative (-1), or no clipping (0 = default)
% normRowCol : normalize none [0, default], rows [1], or columns [2]
% verbose : text details none [0, default], extensive [1]
% minUnique : mininum number of unique features to use a feature (0 = default)
% islinear: use linear (1, default) or non-linear (0) fitting
% nuSVR : use nu-SVR (1) or epsilon-SVR (0, default)
% deleteCols : remove specified columns from analysis
%example
% nii_stat_svr_core ('lesionacute_better_svr.tab')
if ~exist('xlsname','var') %if Excel file not specified, have user select one
[file,pth] = uigetfile({'*.xls;*.xlsx;*.txt;*.tab','Excel/Text file';'*.txt;*.tab','Tab-delimited text (*.tab, *.txt)'},'Select the design file');
if isequal(file,0), return; end;
xlsname=[pth file];
end
if ~exist('normRowCol','var')
normRowCol = 0; %[0, default], rows [1], or columns [2]
end
if ~exist('clipping','var')
clipping = 0;
end
if ~exist('verbose','var')
verbose = false;
end
if ~exist('minUnique','var')
minUnique = 0;
end
svmdir = [fileparts(which(mfilename)) filesep 'libsvm' filesep];
if ~exist(svmdir,'file')
error('Unable to find utility scripts folder %s',svmdir);
end
if ~exist('islinear','var')
islinear = true;
end
if ~islinear
fprintf('Warning: please do not use map when conducting non-linear svr!');
end
if ~exist('nuSVR','var')
nuSVR = false;
end
addpath(svmdir); %make sure we can find the utility scripts
%addpath c:/code/libsvm-3.11/matlab;
[~,~,x] = fileparts(xlsname);
if strcmpi(x,'.tab') || strcmpi(x,'.txt') || strcmpi(x,'.val')
[num, txt] = tabreadSub (xlsname);
else
[num, txt, ~] = xlsread (xlsname);
%num(:,1) = []; % remove first column (subject name)
end
if exist ('deleteCols', 'var')
txt(:,1) = [];
deleteCols = deleteCols -1; %we already removed 1st column
s = [];
for i = 1:numel(deleteCols)
s = [s, ' ', txt{1,deleteCols(i)}];
end
fprintf('Deleting columns: %s\n',s);
txt(:,deleteCols) = [];
num(:,deleteCols) = [];
end
className = txt(1,size(txt,2));
n_subj = size (num, 1);
n_dim = size (num, 2) - 1; %final column is predictor
data = num (:, 1:n_dim);
if minUnique > 1 %
%remove columns with fewer than N number of different values
[data , good_idx] = columnUniqueThreshSub(data, minUnique);
else
%else remove columns with NO variability
[data, good_idx] = requireVarSub(data);
end
fprintf(' %s (''%s'', %g, %g, %g, %g, %g, %g);\n', mfilename, xlsname, clipping, normRowCol, verbose, minUnique, islinear, nuSVR);
if normRowCol == -2% rows [1], or columns [2], minus means exclude final column
fprintf('Normalizing so each column has range 0..1 (last column excluded)\n');
data(:,1:end-1) = normColSub(data(:,1:end-1));
elseif normRowCol == -1% rows [1], or columns [2], minus means exclude final column
fprintf('Normalizing so each row has range 0..1 (last column excluded)\n');
data(:,1:end-1) = normRowSub(data(:,1:end-1));
elseif normRowCol == 1% rows [1], or columns [2]
fprintf('Normalizing so each row has range 0..1\n');
data = normRowSub(data);
elseif normRowCol == 2% rows [1], or columns [2]
%fprintf('Normalizing so each column has range 0..1\n');
data = normColSub(data);
end
if clipping == 0
fprintf ('Two-tailed analysis; keeping both positive and negative features\n');
elseif clipping == 1
fprintf ('One-tailed analysis; keeping only positive features\n');
else
fprintf ('One-tailed analysis; keeping only negative features\n');
end
labels = num (:, size (num, 2));
labels = normColSub(labels); %CR 20April2015
predicted_labels = zeros(n_subj,1); %pre-allocate memory
map = zeros(size(data)); %pre-allocate memory
if islinear
cmd = '-t 0';
else
cmd = '-t 2';
end
if nuSVR
cmd = [cmd ' -s 4'];
else
cmd = [cmd ' -s 3'];
end
for subj = 1:n_subj
train_idx = setdiff (1:n_subj, subj);
train_data = data (train_idx, :);
train_labels = labels (train_idx);
if verbose
SVM = svmtrain (train_labels, train_data, cmd);
% added by Grigori Yourganov: scale correction
% step 1: predict training labels
pred_train_labels = svmpredict (train_labels, train_data, SVM);
else %if verbose else silent
[~, SVM] = evalc(sprintf('svmtrain (train_labels, train_data, ''%s'')',cmd)');
[out, pred_train_labels] = evalc ('svmpredict (train_labels, train_data, SVM);');
end %if verbose else silent
% step 2 of scale correction: estimate scale&offset
y = train_labels; %regression line: y = a*x + b
x = pred_train_labels;
m = length (train_labels);
c = (m+1)*sum(x.^2) - sum(x)*sum(x);
a = ((m+1)*sum(x.*y) - sum(x)*sum(y)) / c;
b = (sum(x.^2)*sum(y) - sum(x)*sum(x.*y)) / c;
% predict the test labels
ww = SVM.sv_coef' * SVM.SVs; % model weights
bb = -SVM.rho; % model offset
% if numel(ww) == 6
% nx = numel(ww);
% if ww(nx-2) > 0, ww(nx-2) = 0; end;
% if ww(nx-1) > 0, ww(nx-1) = 0; end;
% if ww(nx) > 0, ww(nx) = 0; end;
%
% end
if (clipping == 1)
ww (find (ww < 0)) = 0;
elseif (clipping == -1)
ww (find (ww > 0)) = 0;
end
predicted_labels(subj) = ww*data(subj, :)' + bb;
% step 3 of scale correction: rescale using estimated scale&offset
predicted_labels(subj) = a*predicted_labels(subj) + b;
map (subj, :) = ww; % used to be SVM.sv_coef' * SVM.SVs;
end
predicted_labels = normColSub(predicted_labels); % GY
%accuracy = sum (predicted_labels' == labels) / n_subj;
%[r, p] = corr (predicted_labels', labels)
[r, p] = corrcoef (predicted_labels', labels);
r = r(1,2); %r = correlation coefficient
p = p(1,2) /2; %p = probability, divide by two to make one-tailed
if (r < 0.0) %one tailed: we predict predicted scores should positively correlate with observed scores
p = 1.0-p; %http://www.mathworks.com/matlabcentral/newsreader/view_thread/142056
end
fprintf('r=%g r^2=%g, p=%g numObservations=%d numPredictors=%d DV=%s\n',r,r^2, p,size (data, 1),size (data, 2), className{1});
%weighted results
mean_map = mean (map, 1);
z_map = mean_map ./ std (mean_map);
if exist('good_idx','var') %insert NaN for unused features
z_mapOK = zeros(n_dim,1);
z_mapOK(:) = nan;
z_mapOK(good_idx) = z_map;
z_map = z_mapOK;
end
%plot results
figure;
plot (labels, predicted_labels, 'o');
axis ([min(labels(:)) max(labels(:)) min(labels(:)) max(labels(:))]);
%set (gca, 'XTick', [0 1 2 3 4]);
xlabel ('Actual score');
ylabel ('Predicted score');
plot_title = className{1};
plot_title (plot_title == '_') = '-';
title (sprintf ('%s; clipping = %d', plot_title, clipping));
%end nii_stat_svr_core()
function [num, txt] = tabreadSub(tabname)
%read cells from tab based array.
fid = fopen(tabname);
num = [];
txt = [];
row = 0;
while(1)
datline = fgetl(fid); % Get second row (first row of data)
%if (length(datline)==1), break; end
if(datline==-1), break; end %end of file
if datline(1)=='#', continue; end; %skip lines that begin with # (comments)
tabLocs= strfind(datline,char(9)); %findstr(char(9),datline); % find the tabs
row = row + 1;
if (tabLocs < 1), continue; end; %skip first column
dat=textscan(datline,'%s',(length(tabLocs)+1),'delimiter','\t');
if isempty(txt) || numel(dat{1}) == size(txt,2)
txt = [txt; dat{1}']; %#ok<AGROW>
end
if (row < 2) , %skip first row
%txt = dat{1}';
continue;
end;
for col = 2: size(dat{1},1) %excel does not put tabs for empty cells (tabN+1)
num(row-1, col-1) = str2double(dat{1}{col}); %#ok<AGROW>
end
end %while: for whole file
fclose(fid);
%end tabreadSub()
function [good_dat, good_idx] = requireVarSub (dat)
good_idx=[];
for col = 1:size(dat,2)
if sum(isnan(dat(:,col))) > 0
%fprintf('rejecting column %d (non-numeric data')\n',col) %
elseif min(dat(:,col)) ~= max(dat(:,col))
good_idx = [good_idx, col]; %#ok<AGROW>
end
end %for col: each column
if sum(isnan(dat(:))) > 0
fprintf('Some predictors have non-numeric values (e.g. not-a-number)\n');
end
if numel(good_idx) ~= size(dat,2)
fprintf('Some predictors have no variability (analyzing %d of %d predictors)\n',numel(good_idx), size(dat,2));
end
good_dat = dat(:,good_idx);
%end requireVarSub()
function x = normColSub(x)
%normalize each column for range 0..1
% x = [1 4 3 0; 2 6 2 5; 3 10 2 2] -> x = [0 0 1 0; 0.5 0.333 0 1; 1 1 0 0.4]
if size(x,1) < 2 %must have at least 2 rows
fprintf('Error: normalizing columns requires multiple rows\n');
return
end
x = bsxfun(@minus,x,min(x,[],1)); %translate so minimum = 0
x = bsxfun(@rdivide,x,max(x,[],1)); %scale so range is 1
%end normColSub()
function x = normRowSub(x)
%normalize each column for range 0..1
% x = [1 4 3 0; 2 6 2 5; 3 10 2 2] -> x = [0.25 1 0.75 0; 0 1 0 0.75; 0.125 1 0 0]
if size(x,2) < 2 %must have at least 2 rows
fprintf('Error: normalizing rows requires multiple columns');
return
end
if min(max(x,[],2)-min(x,[],2)) == 0
fprintf('Error: unable to normalize rows: some have no variability');
return;
end
x = bsxfun(@minus,x,min(x,[],2)); %translate so minimum = 0
x = bsxfun(@rdivide,x,max(x,[],2)); %scale so range is 1
%end normRowSub()
% function x = columnUniqueThreshSub(x, minUnique)
% %removes columns that have fewer than minUnique different values
% n_col = size(x,2);
% for c = n_col:-1:1
% if numel(unique(x(:,c))) < minUnique
% x(:,c) = [];
% end
% end
% if n_col ~= size(x,2)
% fprintf('%d columns of %d had at least %d unique values\n',size(x,2), n_col, minUnique);
% end
%end columnUniqueThreshSub()
function [good_dat, good_idx] = columnUniqueThreshSub(dat, minUnique)
n_col = size(dat,2);
good_idx=[];
for col = 1:n_col
%how many items have the most frequent value
s = sum(dat(:,col) == mode(dat(:,col)));
if (size(dat,1) - s) >= minUnique
good_idx = [good_idx, col]; %#ok<AGROW>
end
end
if numel(good_idx) ~= size(dat,2)
fprintf('%d columns of %d had at least %d items in the smaller class\n',numel(good_idx), n_col, minUnique);
end
good_dat = dat(:,good_idx);
%end columnUniqueThreshSub()
% function x = columnUniqueThreshSubOld(x, minUnique)
% %removes columns that have fewer than minUnique different values
% % x = [ 1 2 3 3 5 6; 2 2 2 2 3 3]'; ->minUnique(3) -> [1 2 3 3 5 6]'
% n_col = size(x,2);
% for c = size(x,2):-1:1
% %how many items have the most frequent value
% s = sum(x(:,c) == mode(x(:,c)));
% if (size(x,1) - s) < minUnique
% x(:,c) = [];
% end
% end
% if n_col ~= size(x,2)
% fprintf('%d columns of %d had at least %d items in the smaller class\n',size(x,2), n_col, minUnique);
% end
% %end columnUniqueThreshSub()