# -*- coding: utf-8 -*- """ Created on Wed May 20 12:49 2020 This file is for calculating the calibration score of the experiments @author: s161488 """ import os import numpy as np import time import pickle import matplotlib.pyplot as plt import shutil import eval_calibration.calibration_lib as calib from data_utils.prepare_data import collect_test_data, prepare_pool_data def get_group_region(method_use): """Give the calibration score for region active learning""" if method_use is "B": path_input = ["Method_B_Stage_1_Version_3", "Method_B_Stage_1_Version_4"] elif method_use is "C": path_input = ["Method_C_Stage_2_Version_1", "Method_C_Stage_2_Version_2"] elif method_use is "D": path_input = ["Method_D_Stage_3_Version_2", "Method_D_Stage_3_Version_3"] for single_path in path_input: calc_calibration_value(single_path, 6) def get_group_full(version_use): """Give the calibration score for full image active learning""" path_input = sorted(os.listdir("/scratch/blia/Act_Learn_Desperate_V%d/" % version_use)) for single_path in path_input: calc_calibration_value(single_path, version_use) def calculate_aggre_pixel(path_input, version, path2read=None): """This function calculates the acquired number of pixels in per step based on the data in collect """ if not path2read: path2read = '/scratch/blia/Act_Learn_Desperate_V%d/%s/' % (version, path_input) path2save = '/home/blia/Exp_Data/calibration_score/Act_Learn_Desperate_V%d/' % version version = int(path_input.strip().split('_')[-1]) num_of_pixel_old = np.load(path2read + '/num_of_pixel.npy') num_of_image_old = np.load(path2read + '/num_of_image.npy') acq_step = len(os.listdir(path2read + '/collect_data/')) new_stat = np.zeros([acq_step, 2]) im_index = [] unique_im = np.zeros([acq_step]) for i in range(acq_step): p = ["FE_step_00_version_%d" % version if i == 0 else "FE_step_%d_version_%d" % (i - 1, version)][0] stat = pickle.load(open(path2read + '/collect_data/%s/updated_uncertain.txt' % p, 'rb')) new_stat[i, 0] = np.sum(stat[-2]) im_index.append(stat[-1]) _im_ind = np.unique([v for j in im_index for v in j]) unique_im[i] = len(_im_ind) new_stat[:, 0] = np.cumsum(new_stat[:, 0]) new_stat[:, 1] = unique_im + 10 print("number of acquired pixels old", num_of_pixel_old) print("number of acquired pixels new", new_stat[:, 0]) print("number of acquired images old", num_of_image_old) print("number of acquired images new", new_stat[:, 1]) np.save(path2save + 'query_stat_%s' % path_input, new_stat) def transfer_numeric_index_back_to_imindex(numeric_index): im_index_pool = np.arange(65) numeric_index = numeric_index.astype('int32') selected_imindex = np.zeros(np.shape(numeric_index)) for i in range(len(numeric_index)): select_index = numeric_index[i] selected_imindex[i] = im_index_pool[select_index] im_index_pool = np.delete(im_index_pool, select_index) return selected_imindex.astype('int32') def collect_pool_data_multi_acquisition_step(version, use_str): """Args: version: the experiment version use_str: "Method_B_", or "Method_C_" or "Method_D_" """ path = "/scratch/blia/Act_Learn_Desperate_V%d/" % version _, y_label_gt, _ = prepare_pool_data("/home/blia/Exp_Data/Data/glanddata.npy", True) path_group = [v for v in os.listdir(path) if use_str in v] for single_path in path_group: collect_pool_data(version, single_path, y_label_gt) def collect_pool_data(version, path_use, pool_fb_label): """This function returns the predicted uncertainty at each acquisition step for different acquisition functions Args: version: the experiment version path_use: the experiment path, such as "Method_B_Stage_1_Version_0" pool_fb_label: the pixel-wise ground truth label for images in the pool set """ exp_version = int(path_use.strip().split('_')[-1]) path = '/scratch/blia/Act_Learn_Desperate_V%d/%s/' % (version, path_use) save_dir = '/home/blia/Exp_Data/calibration_score/Act_Learn_Desperate_V%d/' % version numeric_index = np.load(path + 'total_acqu_index.npy') selected_imindex = transfer_numeric_index_back_to_imindex(numeric_index) pool_data_dir = path + 'Pool_Data/' fb_group = [] gt_group = [] bald_group = [] print(path_use) for i in range(len(numeric_index))[:-3]: print("acquisition step ", i) fb_ = np.load(pool_data_dir + "/FE_step_%d_version_%d/Test_Data_A/fbprob.npy" % (i, exp_version)) fb_group.append(fb_[selected_imindex[i + 1]]) if "_D_" in path_use: bald_ = np.load(pool_data_dir + "/FE_step_%d_version_%d/Test_Data_A/fbbald.npy" % (i, exp_version)) bald_group.append(bald_[selected_imindex[i + 1]]) gt_group.append(pool_fb_label[selected_imindex[i + 1]]) if "_D_" in path_use: fb_group = [fb_group, bald_group] np.save(save_dir + path_use + "pool_label", np.array(gt_group)) np.save(save_dir + path_use + "pool_stat", np.array(fb_group)) def give_calibration_histogram(path_group, version, test_step, method): """This function gives the ece histogram (expected calibration error) at different step for different acquisition functions""" x_image_test, y_label_test = collect_test_data(resize=False) y_label_binary = (y_label_test != 0).astype('int32') y_label_binary = np.reshape(y_label_binary, [-1]) save_dir = "/home/blia/Exp_Data/save_fig/" if not os.path.exists(save_dir): os.makedirs(save_dir) path_ = "/scratch/blia/Act_Learn_Desperate_V%d/" % version stat_group = [] for single_path in path_group: exp_version = int(single_path.strip().split("_")[-1]) path_use = path_ + single_path + "/Test_Data/" _stat = _give_calibration_histogram(path_use, exp_version, test_step, y_label_binary) stat_group.append(_stat) np.save(save_dir + "/ece_historgram_stat_%d_%s_%d" % (test_step, method, version), stat_group) accu_mean = np.mean([v[1] for v in stat_group], axis=0) accu_std = np.std([v[1] for v in stat_group], axis=0) * 1.95 / np.sqrt(len(path_group)) fig = plt.figure(figsize=(6, 3)) ax = fig.add_subplot(111) ax.plot(stat_group[0][0], accu_mean, 'r') ax.fill_between(stat_group[0][0], accu_mean - accu_std, accu_mean + accu_std, color='r', alpha=0.5) ax.plot([0.0, 1.0], [0.0, 1.0], color='b', ls=':') plt.savefig(save_dir + '/%d_%s_%d.jpg' % (test_step, method, version)) def _give_calibration_histogram(path2read, exp_version, test_step, y_label_binary): path2read = path2read + "FE_step_%d_version_%d/" % (test_step, exp_version) path_a, path_b = "Test_Data_A", "Test_Data_B" fb_prob = [] for single_path in [path_a, path_b]: fb = np.load(path2read + single_path + "/fbprob.npy") fb = np.reshape(fb, [-1, 2]) fb_prob.append(fb) fb_prob = np.concatenate(fb_prob, axis=0) top_k_probs, is_correct = calib.get_multiclass_predictions_and_correctness(fb_prob, y_label_binary, None) top_k_probs = top_k_probs.flatten() is_correct = is_correct.flatten() bin_edges, accuracies, counts = calib.bin_predictions_and_accuracies(top_k_probs, is_correct, bins=10) bin_centers = calib.bin_centers_of_mass(top_k_probs, bin_edges) return bin_centers, accuracies, counts def collect_region_uncertainty(version, path_subset, step): pathmom = '/scratch/blia/Act_Learn_Desperate_V%d/%s' % (version, path_subset) model_version = int(path_subset.strip().split('_')[-1]) method = str(path_subset.strip().split('_')[1]) path2read = pathmom + '/collect_data/' print("====loading experiment statistics from folder:", path2read) for i in range(step): print("===step %d=====" % i) path = path2read + 'FE_step_%d_version_%d/' % (i, model_version) _region_uncertainty(path, method) def _region_uncertainty(path, method): """Calculates the uncertainty from the region acquisition""" savepath = '/home/blia/Exp_Data/calibration_score/region_uncertainty/' if not os.path.exists(savepath): os.makedirs(savepath) path_split = path.strip().split('/') savename = path_split[3].strip().split('_')[-1] + '_' + path_split[4] + '_step_' + path_split[6].strip().split('_')[ 2] stat = pickle.load(open(path + 'updated_uncertain.txt', 'rb')) selected_image, fb_label, ed_label, binary_mask, imindex = stat fbprob = np.load(path + 'fbprob.npy') fbprob_subset = fbprob[imindex] if method is "D": fbbald = np.load(path + 'fbbald.npy') fbbald_subset = fbbald[imindex] fbprob_subset = [fbprob_subset, fbbald_subset] uncert = calc_uncertainty(fbprob_subset, method, False) print("-----maximum of uncertainty %.2f minimum of uncertainty %.2f------" % (np.max(uncert), np.min(uncert))) uncert = (uncert - np.min(uncert)) / (np.max(uncert) - np.min(uncert)) uncert = uncert * binary_mask[:, :, :, 0] uncert_aggre = uncert[uncert != 0] print(np.shape(uncert_aggre), np.sum(binary_mask)) np.save(savepath + '/' + savename, uncert_aggre) def calc_uncertainty(prob, method, reshape=True): if method is "B": uncert = 1 - np.max(prob, axis=-1) elif method is "C": uncert = np.sum(-prob * np.log(prob + 1e-8), axis=-1) elif method is "D": prob, bald = prob bald_first = -np.sum(prob * np.log(prob + 1e-8), axis=-1) bald_second = np.sum(bald, axis=-1) uncert = bald_first + bald_second if reshape: return np.reshape(uncert, [-1]) else: return uncert def aggregate_stat(version): """This function aggregates all the calibration score in one folder Args: version: int, Act_Learn_Desperate_%d % version Ops: 1. The calibration score needs to be read 2. Then this calibration score file will copied to a home directory """ path2read = '/scratch/blia/Act_Learn_Desperate_V%d' % version path2save = '/home/blia/Exp_Data/calibration_score/' path2save = path2save + 'Act_Learn_Desperate_V%d' % version if not os.path.exists(path2save): os.makedirs(path2save) all_model = sorted([v for v in os.listdir(path2read) if 'Method_' in v]) for single_model in all_model: print(single_model) orig_file_path = path2read + '/' + single_model + '/' + 'calibration_score.obj' new_file_path = path2save + '/%s.obj' % single_model shutil.copy(orig_file_path, new_file_path) def calc_calibration_value(path_input, version_use): """The calculated calibration score here is used to create Figure 1, 5 and E2 in the paper""" x_image_test, y_label_test = collect_test_data(resize=False) y_label_binary = (y_label_test != 0).astype('int32') num_image, imh, imw = np.shape(y_label_test) path_mom = os.path.join("/scratch/blia/Act_Learn_Desperate_V%d/" % version_use, path_input) path_sub = np.load(os.path.join(path_mom, 'total_select_folder.npy')) test_data_path = path_mom + '/Test_Data/' num_class = 2 num_benign, num_mali = 37, 43 y_label_benign_binary = np.reshape(y_label_binary[:num_benign], [num_benign * imh * imw]) y_label_mali_binary = np.reshape(y_label_binary[num_benign:], [num_mali * imh * imw]) y_label_binary = np.reshape(y_label_binary, [num_image * imh * imw]) stat = {} # for each of them there will be a score for benign, and also for mali, and also overall # for the ece error, because it's only binary classification, so I will just do top-1 ece_score = [] brier_score = [] nll_score = [] brier_decompose_score = [] for single_sub in path_sub: single_folder_name = single_sub.strip().split('/')[-2] tds_dir = test_data_path + single_folder_name + '/' pred = [] for single_test in ["Test_Data_A/", "Test_Data_B/"]: tds_use = tds_dir + single_test fb_prob = np.load(tds_use + 'fbprob.npy') fb_reshape = np.reshape(np.squeeze(fb_prob, axis=(1, 2)), [len(fb_prob) * imh * imw, num_class]) pred.append(fb_reshape) # --- first, nll score --------# time_init = time.time() nll_benign, nll_mali = calib.nll(pred[0]), calib.nll(pred[1]) # time_init = get_time(time_init, "nll") ece_benign = calib.expected_calibration_error_multiclass(pred[0], y_label_benign_binary, 10) ece_mali = calib.expected_calibration_error_multiclass(pred[1], y_label_mali_binary, 10) # time_init = get_time(time_init, "ece") brier_benign = calib.brier_scores(y_label_benign_binary, probs=pred[0]) brier_mali = calib.brier_scores(y_label_mali_binary, probs=pred[1]) # time_init = get_time(time_init, "brier score") brier_benign_decomp = calib.brier_decomp_npy(labels=y_label_benign_binary, probabilities=pred[0]) brier_mali_decomp = calib.brier_decomp_npy(labels=y_label_mali_binary, probabilities=pred[1]) # time_init = get_time(time_init, "brier score decomposition") pred_conc = np.concatenate(pred, axis=0) nll_all = calib.nll(pred_conc) ece_all = calib.expected_calibration_error_multiclass(pred_conc, y_label_binary, 10) brier_all = calib.brier_scores(y_label_binary, probs=pred_conc) brier_all_decomp = calib.brier_decomp_npy(labels=y_label_binary, probabilities=pred_conc) # print(time.time() - time_init) ece_score.append([ece_benign, ece_mali, ece_all]) brier_score.append([np.mean(brier_benign), np.mean(brier_mali), np.mean(brier_all)]) brier_decompose_score.append([brier_benign_decomp, brier_mali_decomp, brier_all_decomp]) nll_score.append([nll_benign, nll_mali, nll_all]) stat["ece_score"] = np.reshape(np.array(ece_score), [len(ece_score), 3]) stat["nll_score"] = np.reshape(np.array(nll_score), [len(nll_score), 3]) stat["bri_score"] = np.reshape(np.array(brier_score), [len(brier_score), 3]) stat["bri_decompose_score"] = np.reshape(np.array(brier_decompose_score), [len(brier_decompose_score), 9]) print("ece score", stat["ece_score"][0], ece_score[0]) print("nll score", stat["nll_score"][0], nll_score[0]) print("bri score", stat["bri_score"][0], brier_score[0]) print("brier decompose score", stat["bri_decompose_score"][0], brier_decompose_score[0]) with open(path_mom + "/calibration_score.obj", 'wb') as f: pickle.dump(stat, f) def get_time(time_init, opt): time_end = time.time() print("%s use time--------%.4f" % (opt, time_end - time_init)) return time_end