Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# -*- coding: utf-8 -*-
"""
Created on Wed May 20 12:49 2020
This file is for calculating the calibration score of the experiments
@author: s161488
"""
import os
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
import shutil
import eval_calibration.calibration_lib as calib
from data_utils.prepare_data import collect_test_data, prepare_pool_data
def get_group_region(method_use):
"""Give the calibration score for region active learning"""
if method_use is "B":
path_input = ["Method_B_Stage_1_Version_3", "Method_B_Stage_1_Version_4"]
elif method_use is "C":
path_input = ["Method_C_Stage_2_Version_1", "Method_C_Stage_2_Version_2"]
elif method_use is "D":
path_input = ["Method_D_Stage_3_Version_2", "Method_D_Stage_3_Version_3"]
for single_path in path_input:
calc_calibration_value(single_path, 6)
def get_group_full(version_use):
"""Give the calibration score for full image active learning"""
path_input = sorted(os.listdir("/scratch/blia/Act_Learn_Desperate_V%d/" % version_use))
for single_path in path_input:
calc_calibration_value(single_path, version_use)
def calculate_aggre_pixel(path_input, version, path2read=None):
"""This function calculates the acquired number of pixels in per step
based on the data in collect
"""
if not path2read:
path2read = '/scratch/blia/Act_Learn_Desperate_V%d/%s/' % (version, path_input)
path2save = '/home/blia/Exp_Data/calibration_score/Act_Learn_Desperate_V%d/' % version
version = int(path_input.strip().split('_')[-1])
num_of_pixel_old = np.load(path2read + '/num_of_pixel.npy')
num_of_image_old = np.load(path2read + '/num_of_image.npy')
acq_step = len(os.listdir(path2read + '/collect_data/'))
new_stat = np.zeros([acq_step, 2])
im_index = []
unique_im = np.zeros([acq_step])
for i in range(acq_step):
p = ["FE_step_00_version_%d" % version if i == 0 else "FE_step_%d_version_%d" % (i - 1, version)][0]
stat = pickle.load(open(path2read + '/collect_data/%s/updated_uncertain.txt' % p, 'rb'))
new_stat[i, 0] = np.sum(stat[-2])
im_index.append(stat[-1])
_im_ind = np.unique([v for j in im_index for v in j])
unique_im[i] = len(_im_ind)
new_stat[:, 0] = np.cumsum(new_stat[:, 0])
new_stat[:, 1] = unique_im + 10
print("number of acquired pixels old", num_of_pixel_old)
print("number of acquired pixels new", new_stat[:, 0])
print("number of acquired images old", num_of_image_old)
print("number of acquired images new", new_stat[:, 1])
np.save(path2save + 'query_stat_%s' % path_input, new_stat)
def transfer_numeric_index_back_to_imindex(numeric_index):
im_index_pool = np.arange(65)
numeric_index = numeric_index.astype('int32')
selected_imindex = np.zeros(np.shape(numeric_index))
for i in range(len(numeric_index)):
select_index = numeric_index[i]
selected_imindex[i] = im_index_pool[select_index]
im_index_pool = np.delete(im_index_pool, select_index)
return selected_imindex.astype('int32')
def collect_pool_data_multi_acquisition_step(version, use_str):
"""Args:
version: the experiment version
use_str: "Method_B_", or "Method_C_" or "Method_D_"
"""
path = "/scratch/blia/Act_Learn_Desperate_V%d/" % version
_, y_label_gt, _ = prepare_pool_data("/home/blia/Exp_Data/Data/glanddata.npy", True)
path_group = [v for v in os.listdir(path) if use_str in v]
for single_path in path_group:
collect_pool_data(version, single_path, y_label_gt)
def collect_pool_data(version, path_use, pool_fb_label):
"""This function returns the predicted uncertainty at each acquisition step for different
acquisition functions
Args:
version: the experiment version
path_use: the experiment path, such as "Method_B_Stage_1_Version_0"
pool_fb_label: the pixel-wise ground truth label for images in the pool set
"""
exp_version = int(path_use.strip().split('_')[-1])
path = '/scratch/blia/Act_Learn_Desperate_V%d/%s/' % (version, path_use)
save_dir = '/home/blia/Exp_Data/calibration_score/Act_Learn_Desperate_V%d/' % version
numeric_index = np.load(path + 'total_acqu_index.npy')
selected_imindex = transfer_numeric_index_back_to_imindex(numeric_index)
pool_data_dir = path + 'Pool_Data/'
fb_group = []
gt_group = []
bald_group = []
print(path_use)
for i in range(len(numeric_index))[:-3]:
print("acquisition step ", i)
fb_ = np.load(pool_data_dir + "/FE_step_%d_version_%d/Test_Data_A/fbprob.npy" % (i, exp_version))
fb_group.append(fb_[selected_imindex[i + 1]])
if "_D_" in path_use:
bald_ = np.load(pool_data_dir + "/FE_step_%d_version_%d/Test_Data_A/fbbald.npy" % (i, exp_version))
bald_group.append(bald_[selected_imindex[i + 1]])
gt_group.append(pool_fb_label[selected_imindex[i + 1]])
if "_D_" in path_use:
fb_group = [fb_group, bald_group]
np.save(save_dir + path_use + "pool_label", np.array(gt_group))
np.save(save_dir + path_use + "pool_stat", np.array(fb_group))
def give_calibration_histogram(path_group, version, test_step, method):
"""This function gives the ece histogram (expected calibration error) at different step
for different acquisition functions"""
x_image_test, y_label_test = collect_test_data(resize=False)
y_label_binary = (y_label_test != 0).astype('int32')
y_label_binary = np.reshape(y_label_binary, [-1])
save_dir = "/home/blia/Exp_Data/save_fig/"
if not os.path.exists(save_dir):
os.makedirs(save_dir)
path_ = "/scratch/blia/Act_Learn_Desperate_V%d/" % version
stat_group = []
for single_path in path_group:
exp_version = int(single_path.strip().split("_")[-1])
path_use = path_ + single_path + "/Test_Data/"
_stat = _give_calibration_histogram(path_use, exp_version, test_step, y_label_binary)
stat_group.append(_stat)
np.save(save_dir + "/ece_historgram_stat_%d_%s_%d" % (test_step, method, version), stat_group)
accu_mean = np.mean([v[1] for v in stat_group], axis=0)
accu_std = np.std([v[1] for v in stat_group], axis=0) * 1.95 / np.sqrt(len(path_group))
fig = plt.figure(figsize=(6, 3))
ax = fig.add_subplot(111)
ax.plot(stat_group[0][0], accu_mean, 'r')
ax.fill_between(stat_group[0][0], accu_mean - accu_std, accu_mean + accu_std,
color='r', alpha=0.5)
ax.plot([0.0, 1.0], [0.0, 1.0], color='b', ls=':')
plt.savefig(save_dir + '/%d_%s_%d.jpg' % (test_step, method, version))
def _give_calibration_histogram(path2read, exp_version, test_step, y_label_binary):
path2read = path2read + "FE_step_%d_version_%d/" % (test_step, exp_version)
path_a, path_b = "Test_Data_A", "Test_Data_B"
fb_prob = []
for single_path in [path_a, path_b]:
fb = np.load(path2read + single_path + "/fbprob.npy")
fb = np.reshape(fb, [-1, 2])
fb_prob.append(fb)
fb_prob = np.concatenate(fb_prob, axis=0)
top_k_probs, is_correct = calib.get_multiclass_predictions_and_correctness(fb_prob,
y_label_binary, None)
top_k_probs = top_k_probs.flatten()
is_correct = is_correct.flatten()
bin_edges, accuracies, counts = calib.bin_predictions_and_accuracies(top_k_probs,
is_correct,
bins=10)
bin_centers = calib.bin_centers_of_mass(top_k_probs, bin_edges)
return bin_centers, accuracies, counts
def collect_region_uncertainty(version, path_subset, step):
pathmom = '/scratch/blia/Act_Learn_Desperate_V%d/%s' % (version, path_subset)
model_version = int(path_subset.strip().split('_')[-1])
method = str(path_subset.strip().split('_')[1])
path2read = pathmom + '/collect_data/'
print("====loading experiment statistics from folder:", path2read)
for i in range(step):
print("===step %d=====" % i)
path = path2read + 'FE_step_%d_version_%d/' % (i, model_version)
_region_uncertainty(path, method)
def _region_uncertainty(path, method):
"""Calculates the uncertainty from the region acquisition"""
savepath = '/home/blia/Exp_Data/calibration_score/region_uncertainty/'
if not os.path.exists(savepath):
os.makedirs(savepath)
path_split = path.strip().split('/')
savename = path_split[3].strip().split('_')[-1] + '_' + path_split[4] + '_step_' + path_split[6].strip().split('_')[
2]
stat = pickle.load(open(path + 'updated_uncertain.txt', 'rb'))
selected_image, fb_label, ed_label, binary_mask, imindex = stat
fbprob = np.load(path + 'fbprob.npy')
fbprob_subset = fbprob[imindex]
if method is "D":
fbbald = np.load(path + 'fbbald.npy')
fbbald_subset = fbbald[imindex]
fbprob_subset = [fbprob_subset, fbbald_subset]
uncert = calc_uncertainty(fbprob_subset, method, False)
print("-----maximum of uncertainty %.2f minimum of uncertainty %.2f------" % (np.max(uncert),
np.min(uncert)))
uncert = (uncert - np.min(uncert)) / (np.max(uncert) - np.min(uncert))
uncert = uncert * binary_mask[:, :, :, 0]
uncert_aggre = uncert[uncert != 0]
print(np.shape(uncert_aggre), np.sum(binary_mask))
np.save(savepath + '/' + savename, uncert_aggre)
def calc_uncertainty(prob, method, reshape=True):
if method is "B":
uncert = 1 - np.max(prob, axis=-1)
elif method is "C":
uncert = np.sum(-prob * np.log(prob + 1e-8), axis=-1)
elif method is "D":
prob, bald = prob
bald_first = -np.sum(prob * np.log(prob + 1e-8), axis=-1)
bald_second = np.sum(bald, axis=-1)
uncert = bald_first + bald_second
if reshape:
return np.reshape(uncert, [-1])
else:
return uncert
def aggregate_stat(version):
"""This function aggregates all the calibration score in one folder
Args:
version: int, Act_Learn_Desperate_%d % version
Ops:
1. The calibration score needs to be read
2. Then this calibration score file will copied to a home directory
"""
path2read = '/scratch/blia/Act_Learn_Desperate_V%d' % version
path2save = '/home/blia/Exp_Data/calibration_score/'
path2save = path2save + 'Act_Learn_Desperate_V%d' % version
if not os.path.exists(path2save):
os.makedirs(path2save)
all_model = sorted([v for v in os.listdir(path2read) if 'Method_' in v])
for single_model in all_model:
print(single_model)
orig_file_path = path2read + '/' + single_model + '/' + 'calibration_score.obj'
new_file_path = path2save + '/%s.obj' % single_model
shutil.copy(orig_file_path, new_file_path)
def calc_calibration_value(path_input, version_use):
"""The calculated calibration score here is used to create Figure 1, 5 and E2 in the paper"""
x_image_test, y_label_test = collect_test_data(resize=False)
y_label_binary = (y_label_test != 0).astype('int32')
num_image, imh, imw = np.shape(y_label_test)
path_mom = os.path.join("/scratch/blia/Act_Learn_Desperate_V%d/" % version_use, path_input)
path_sub = np.load(os.path.join(path_mom, 'total_select_folder.npy'))
test_data_path = path_mom + '/Test_Data/'
num_class = 2
num_benign, num_mali = 37, 43
y_label_benign_binary = np.reshape(y_label_binary[:num_benign], [num_benign * imh * imw])
y_label_mali_binary = np.reshape(y_label_binary[num_benign:], [num_mali * imh * imw])
y_label_binary = np.reshape(y_label_binary, [num_image * imh * imw])
stat = {}
# for each of them there will be a score for benign, and also for mali, and also overall
# for the ece error, because it's only binary classification, so I will just do top-1
ece_score = []
brier_score = []
nll_score = []
brier_decompose_score = []
for single_sub in path_sub:
single_folder_name = single_sub.strip().split('/')[-2]
tds_dir = test_data_path + single_folder_name + '/'
pred = []
for single_test in ["Test_Data_A/", "Test_Data_B/"]:
tds_use = tds_dir + single_test
fb_prob = np.load(tds_use + 'fbprob.npy')
fb_reshape = np.reshape(np.squeeze(fb_prob, axis=(1, 2)),
[len(fb_prob) * imh * imw, num_class])
pred.append(fb_reshape)
# --- first, nll score --------#
time_init = time.time()
nll_benign, nll_mali = calib.nll(pred[0]), calib.nll(pred[1])
# time_init = get_time(time_init, "nll")
ece_benign = calib.expected_calibration_error_multiclass(pred[0], y_label_benign_binary, 10)
ece_mali = calib.expected_calibration_error_multiclass(pred[1], y_label_mali_binary, 10)
# time_init = get_time(time_init, "ece")
brier_benign = calib.brier_scores(y_label_benign_binary, probs=pred[0])
brier_mali = calib.brier_scores(y_label_mali_binary, probs=pred[1])
# time_init = get_time(time_init, "brier score")
brier_benign_decomp = calib.brier_decomp_npy(labels=y_label_benign_binary, probabilities=pred[0])
brier_mali_decomp = calib.brier_decomp_npy(labels=y_label_mali_binary, probabilities=pred[1])
# time_init = get_time(time_init, "brier score decomposition")
pred_conc = np.concatenate(pred, axis=0)
nll_all = calib.nll(pred_conc)
ece_all = calib.expected_calibration_error_multiclass(pred_conc, y_label_binary, 10)
brier_all = calib.brier_scores(y_label_binary, probs=pred_conc)
brier_all_decomp = calib.brier_decomp_npy(labels=y_label_binary, probabilities=pred_conc)
# print(time.time() - time_init)
ece_score.append([ece_benign, ece_mali, ece_all])
brier_score.append([np.mean(brier_benign), np.mean(brier_mali), np.mean(brier_all)])
brier_decompose_score.append([brier_benign_decomp, brier_mali_decomp, brier_all_decomp])
nll_score.append([nll_benign, nll_mali, nll_all])
stat["ece_score"] = np.reshape(np.array(ece_score), [len(ece_score), 3])
stat["nll_score"] = np.reshape(np.array(nll_score), [len(nll_score), 3])
stat["bri_score"] = np.reshape(np.array(brier_score), [len(brier_score), 3])
stat["bri_decompose_score"] = np.reshape(np.array(brier_decompose_score), [len(brier_decompose_score), 9])
print("ece score", stat["ece_score"][0], ece_score[0])
print("nll score", stat["nll_score"][0], nll_score[0])
print("bri score", stat["bri_score"][0], brier_score[0])
print("brier decompose score", stat["bri_decompose_score"][0], brier_decompose_score[0])
with open(path_mom + "/calibration_score.obj", 'wb') as f:
pickle.dump(stat, f)
def get_time(time_init, opt):
time_end = time.time()
print("%s use time--------%.4f" % (opt, time_end - time_init))
return time_end