Skip to content
Snippets Groups Projects
Commit 7aafa768 authored by abda's avatar abda
Browse files

New tool for making data

parent 36a94d85
No related branches found
No related tags found
No related merge requests found
Chapter08/cases/case_1.png

1.83 KiB

Chapter08/cases/case_2.png

2.65 KiB

Chapter08/cases/case_3.png

2.65 KiB

Chapter08/cases/case_4.png

3.53 KiB

Chapter08/cases/case_5.png

4.51 KiB

Chapter08/cases/case_6.png

2.32 KiB

Chapter08/cases/case_7.png

2.56 KiB

#%%
import numpy as np import numpy as np
import matplotlib.pyplot as plt
import skimage.io
import os
def make_data(example_nr, n_pts = 200, noise = 1):
'''Make data for the neural network. The data is read from a png file in the
cases folder, which must be placed together with your code.
def make_data(example_nr, n = 200, noise = 1): Parameters:
''' example_nr : int
Generate data for training a simple neural network. 1-7
n_pts : int
Number of points in each of the two classes
noise : float
Standard deviation of the Gaussian noise
Arguments:
example_nr: a number 1 to 3 for each example.
n: number of points in each class set.
noise: noise level, best between 0.5 and 2.
Returns: Returns:
X: 2 x 2n array of points (there are n points in each class). X : ndarray
T: 2 x 2n target values. 2 x n_pts array of points
x: grid points for testing the neural network. T : ndarray
dim: size of the area covered by the grid points. 2 x n_pts array of boolean values
x_grid : ndarray
Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 25/3-2020 2 x n_pts array of points in regular grid for visualization
dim : tuple of int
Dimensions of the grid
Example:
example_nr = 1
n_pts = 2000
noise = 2
X, T, x_grid, dim = make_data(example_nr, n_pts, noise)
fig, ax = plt.subplots()
ax.plot(X[0,T[0]], X[1,T[0]], '.r', alpha=0.3)
ax.plot(X[0,T[1]], X[1,T[1]], '.g', alpha=0.3)
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
ax.set_box_aspect(1)
Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 20/3-2024
vand@dtu.dk, abda@dtu.dk vand@dtu.dk, abda@dtu.dk
'''
rg = np.random.default_rng()
dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1]))
x_grid = np.c_[np.ravel(QX), np.ravel(QY)]
# Targets: first half class 0, second half class 1
T = np.vstack((np.tile([True, False], (n, 1)),
np.tile([False, True], (n, 1))))
if example_nr == 1 : # two separated clusters
X = np.vstack((np.tile([30., 30.], (n, 1)),
np.tile([70., 70.], (n, 1))))
X += rg.normal(size=X.shape, scale=10*noise) # add noise
elif example_nr == 2 : # concentric clusters '''
rand_ang = 2 * np.pi * rg.uniform(size=n) in_dir = 'cases/'
X = np.vstack((30 * np.array([np.cos(rand_ang), np.sin(rand_ang)]).T, file_names = sorted(os.listdir(in_dir))
np.tile([0., 0.], (n, 1)))) file_names = [f for f in file_names if f.endswith('.png')]
X += [50, 50] # center
X += rg.normal(size=X.shape, scale=5*noise)# add noise
elif example_nr == 3 : # 2x2 checkerboard im = skimage.io.imread(in_dir + file_names[example_nr-1])
n1 = n//2
n2 = n//2 + n%2 # if n is odd n2 will have 1 element more
X = np.vstack((np.tile([30., 30.], (n1, 1)), [r_white, c_white] = np.where(im == 255)
np.tile([70., 70.], (n2, 1)), [r_gray, c_gray] = np.where(im == 127)
np.tile([30. ,70.], (n1, 1)), n_white = np.minimum(r_white.shape[0], n_pts)
np.tile([70., 30.], (n2, 1)))) n_gray = np.minimum(r_gray.shape[0], n_pts)
X += rg.normal(size=X.shape, scale=10*noise) # add noise
else: rid_white = np.random.permutation(r_white.shape[0])
print('No data returned - example_nr must be 1, 2, or 3') rid_gray = np.random.permutation(r_gray.shape[0])
pts_white = np.array([c_white[rid_white[:n_white]], r_white[rid_white[:n_white]]])
pts_gray = np.array([c_gray[rid_gray[:n_gray]], r_gray[rid_gray[:n_gray]]])
o = rg.permutation(range(2*n)) X = np.hstack((pts_white, pts_gray))/5 + np.random.randn(2, n_white+n_gray)*noise
T = np.zeros((2, n_white+n_gray), dtype=bool)
T[0,:n_white] = True
T[1,n_white:] = True
return X[o].T, T[o].T, x_grid.T, dim dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1]))
x_grid = np.vstack((np.ravel(QX), np.ravel(QY)))
return X, T, x_grid, dim
#%% Test of the data generation
if __name__ == "__main__": if __name__ == "__main__":
#%% example_nr = 1
n_pts = 2000
import matplotlib.pyplot as plt noise = 3
X, T, x_grid, dim = make_data(example_nr, n_pts, noise)
n = 1000
noise = 1 fig, ax = plt.subplots()
ax.plot(X[0,T[0]], X[1,T[0]], '.r', alpha=0.3)
fig, ax = plt.subplots(1, 3) ax.plot(X[0,T[1]], X[1,T[1]], '.g', alpha=0.3)
for i, a in enumerate(ax): ax.set_xlim(0, 100)
example_nr = i + 1 ax.set_ylim(0, 100)
X, T, x_grid, dim = make_data(example_nr, n, noise) ax.set_box_aspect(1)
a.scatter(X[0][T[0]], X[1][T[0]], c='r', alpha=0.3, s=15)
a.scatter(X[0][T[1]], X[1][T[1]], c='g', alpha=0.3, s=15)
a.set_aspect('equal', 'box')
a.set_title(f'Example {i} data')
plt.show()
#%% Before training, you should make data zero mean
c = np.mean(X, axis=1, keepdims=True)
X_c = X - c
fig, ax = plt.subplots(1,1)
ax.scatter(X_c[0][T[0]], X_c[1][T[0]], c='r', alpha=0.3, s=15)
ax.scatter(X_c[0][T[1]], X_c[1][T[1]], c='g', alpha=0.3, s=15)
ax.set_aspect('equal', 'box')
plt.title('Zero-mean data')
plt.show()
# %%
#%%
import numpy as np
def make_data(example_nr, n = 200, noise = 1):
'''
Generate data for training a simple neural network.
Arguments:
example_nr: a number 1 to 3 for each example.
n: number of points in each class set.
noise: noise level, best between 0.5 and 2.
Returns:
X: 2 x 2n array of points (there are n points in each class).
T: 2 x 2n target values.
x: grid points for testing the neural network.
dim: size of the area covered by the grid points.
Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 25/3-2020
vand@dtu.dk, abda@dtu.dk
'''
rg = np.random.default_rng()
dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1]))
x_grid = np.c_[np.ravel(QX), np.ravel(QY)]
# Targets: first half class 0, second half class 1
T = np.vstack((np.tile([True, False], (n, 1)),
np.tile([False, True], (n, 1))))
if example_nr == 1 : # two separated clusters
X = np.vstack((np.tile([30., 30.], (n, 1)),
np.tile([70., 70.], (n, 1))))
X += rg.normal(size=X.shape, scale=10*noise) # add noise
elif example_nr == 2 : # concentric clusters
rand_ang = 2 * np.pi * rg.uniform(size=n)
X = np.vstack((30 * np.array([np.cos(rand_ang), np.sin(rand_ang)]).T,
np.tile([0., 0.], (n, 1))))
X += [50, 50] # center
X += rg.normal(size=X.shape, scale=5*noise)# add noise
elif example_nr == 3 : # 2x2 checkerboard
n1 = n//2
n2 = n//2 + n%2 # if n is odd n2 will have 1 element more
X = np.vstack((np.tile([30., 30.], (n1, 1)),
np.tile([70., 70.], (n2, 1)),
np.tile([30. ,70.], (n1, 1)),
np.tile([70., 30.], (n2, 1))))
X += rg.normal(size=X.shape, scale=10*noise) # add noise
else:
print('No data returned - example_nr must be 1, 2, or 3')
o = rg.permutation(range(2*n))
return X[o].T, T[o].T, x_grid.T, dim
#%% Test of the data generation
if __name__ == "__main__":
#%%
import matplotlib.pyplot as plt
n = 1000
noise = 1
fig, ax = plt.subplots(1, 3)
for i, a in enumerate(ax):
example_nr = i + 1
X, T, x_grid, dim = make_data(example_nr, n, noise)
a.scatter(X[0][T[0]], X[1][T[0]], c='r', alpha=0.3, s=15)
a.scatter(X[0][T[1]], X[1][T[1]], c='g', alpha=0.3, s=15)
a.set_aspect('equal', 'box')
a.set_title(f'Example {i} data')
plt.show()
#%% Before training, you should make data zero mean
c = np.mean(X, axis=1, keepdims=True)
X_c = X - c
fig, ax = plt.subplots(1,1)
ax.scatter(X_c[0][T[0]], X_c[1][T[0]], c='r', alpha=0.3, s=15)
ax.scatter(X_c[0][T[1]], X_c[1][T[1]], c='g', alpha=0.3, s=15)
ax.set_aspect('equal', 'box')
plt.title('Zero-mean data')
plt.show()
# %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment