Skip to content
Snippets Groups Projects
Commit 7aafa768 authored by abda's avatar abda
Browse files

New tool for making data

parent 36a94d85
No related branches found
No related tags found
No related merge requests found
Chapter08/cases/case_1.png

1.83 KiB

Chapter08/cases/case_2.png

2.65 KiB

Chapter08/cases/case_3.png

2.65 KiB

Chapter08/cases/case_4.png

3.53 KiB

Chapter08/cases/case_5.png

4.51 KiB

Chapter08/cases/case_6.png

2.32 KiB

Chapter08/cases/case_7.png

2.56 KiB

#%%
import numpy as np import numpy as np
import matplotlib.pyplot as plt
import skimage.io
import os
def make_data(example_nr, n_pts = 200, noise = 1):
def make_data(example_nr, n = 200, noise = 1): '''Make data for the neural network. The data is read from a png file in the
''' cases folder, which must be placed together with your code.
Generate data for training a simple neural network.
Arguments:
example_nr: a number 1 to 3 for each example.
n: number of points in each class set.
noise: noise level, best between 0.5 and 2.
Returns:
X: 2 x 2n array of points (there are n points in each class).
T: 2 x 2n target values.
x: grid points for testing the neural network.
dim: size of the area covered by the grid points.
Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 25/3-2020
vand@dtu.dk, abda@dtu.dk
'''
rg = np.random.default_rng()
dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1])) Parameters:
x_grid = np.c_[np.ravel(QX), np.ravel(QY)] example_nr : int
1-7
# Targets: first half class 0, second half class 1 n_pts : int
T = np.vstack((np.tile([True, False], (n, 1)), Number of points in each of the two classes
np.tile([False, True], (n, 1)))) noise : float
Standard deviation of the Gaussian noise
if example_nr == 1 : # two separated clusters
Returns:
X = np.vstack((np.tile([30., 30.], (n, 1)), X : ndarray
np.tile([70., 70.], (n, 1)))) 2 x n_pts array of points
X += rg.normal(size=X.shape, scale=10*noise) # add noise T : ndarray
2 x n_pts array of boolean values
elif example_nr == 2 : # concentric clusters x_grid : ndarray
2 x n_pts array of points in regular grid for visualization
rand_ang = 2 * np.pi * rg.uniform(size=n) dim : tuple of int
X = np.vstack((30 * np.array([np.cos(rand_ang), np.sin(rand_ang)]).T, Dimensions of the grid
np.tile([0., 0.], (n, 1))))
X += [50, 50] # center
X += rg.normal(size=X.shape, scale=5*noise)# add noise
elif example_nr == 3 : # 2x2 checkerboard Example:
n1 = n//2 example_nr = 1
n2 = n//2 + n%2 # if n is odd n2 will have 1 element more n_pts = 2000
noise = 2
X = np.vstack((np.tile([30., 30.], (n1, 1)), X, T, x_grid, dim = make_data(example_nr, n_pts, noise)
np.tile([70., 70.], (n2, 1)),
np.tile([30. ,70.], (n1, 1)), fig, ax = plt.subplots()
np.tile([70., 30.], (n2, 1)))) ax.plot(X[0,T[0]], X[1,T[0]], '.r', alpha=0.3)
X += rg.normal(size=X.shape, scale=10*noise) # add noise ax.plot(X[0,T[1]], X[1,T[1]], '.g', alpha=0.3)
ax.set_xlim(0, 100)
else: ax.set_ylim(0, 100)
print('No data returned - example_nr must be 1, 2, or 3') ax.set_box_aspect(1)
o = rg.permutation(range(2*n)) Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 20/3-2024
vand@dtu.dk, abda@dtu.dk
return X[o].T, T[o].T, x_grid.T, dim
'''
#%% Test of the data generation in_dir = 'cases/'
if __name__ == "__main__": file_names = sorted(os.listdir(in_dir))
#%% file_names = [f for f in file_names if f.endswith('.png')]
import matplotlib.pyplot as plt im = skimage.io.imread(in_dir + file_names[example_nr-1])
n = 1000
noise = 1
fig, ax = plt.subplots(1, 3)
for i, a in enumerate(ax):
example_nr = i + 1
X, T, x_grid, dim = make_data(example_nr, n, noise)
a.scatter(X[0][T[0]], X[1][T[0]], c='r', alpha=0.3, s=15)
a.scatter(X[0][T[1]], X[1][T[1]], c='g', alpha=0.3, s=15)
a.set_aspect('equal', 'box')
a.set_title(f'Example {i} data')
plt.show()
#%% Before training, you should make data zero mean [r_white, c_white] = np.where(im == 255)
[r_gray, c_gray] = np.where(im == 127)
c = np.mean(X, axis=1, keepdims=True) n_white = np.minimum(r_white.shape[0], n_pts)
X_c = X - c n_gray = np.minimum(r_gray.shape[0], n_pts)
fig, ax = plt.subplots(1,1)
ax.scatter(X_c[0][T[0]], X_c[1][T[0]], c='r', alpha=0.3, s=15)
ax.scatter(X_c[0][T[1]], X_c[1][T[1]], c='g', alpha=0.3, s=15)
ax.set_aspect('equal', 'box')
plt.title('Zero-mean data')
plt.show()
rid_white = np.random.permutation(r_white.shape[0])
rid_gray = np.random.permutation(r_gray.shape[0])
pts_white = np.array([c_white[rid_white[:n_white]], r_white[rid_white[:n_white]]])
pts_gray = np.array([c_gray[rid_gray[:n_gray]], r_gray[rid_gray[:n_gray]]])
X = np.hstack((pts_white, pts_gray))/5 + np.random.randn(2, n_white+n_gray)*noise
T = np.zeros((2, n_white+n_gray), dtype=bool)
T[0,:n_white] = True
T[1,n_white:] = True
dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1]))
x_grid = np.vstack((np.ravel(QX), np.ravel(QY)))
return X, T, x_grid, dim
if __name__ == "__main__":
example_nr = 1
n_pts = 2000
noise = 3
X, T, x_grid, dim = make_data(example_nr, n_pts, noise)
fig, ax = plt.subplots()
ax.plot(X[0,T[0]], X[1,T[0]], '.r', alpha=0.3)
ax.plot(X[0,T[1]], X[1,T[1]], '.g', alpha=0.3)
ax.set_xlim(0, 100)
ax.set_ylim(0, 100)
ax.set_box_aspect(1)
# %%
#%%
import numpy as np
def make_data(example_nr, n = 200, noise = 1):
'''
Generate data for training a simple neural network.
Arguments:
example_nr: a number 1 to 3 for each example.
n: number of points in each class set.
noise: noise level, best between 0.5 and 2.
Returns:
X: 2 x 2n array of points (there are n points in each class).
T: 2 x 2n target values.
x: grid points for testing the neural network.
dim: size of the area covered by the grid points.
Authors: Vedrana Andersen Dahl and Anders Bjorholm Dahl - 25/3-2020
vand@dtu.dk, abda@dtu.dk
'''
rg = np.random.default_rng()
dim = (100, 100)
QX, QY = np.meshgrid(range(0, dim[0]), range(0, dim[1]))
x_grid = np.c_[np.ravel(QX), np.ravel(QY)]
# Targets: first half class 0, second half class 1
T = np.vstack((np.tile([True, False], (n, 1)),
np.tile([False, True], (n, 1))))
if example_nr == 1 : # two separated clusters
X = np.vstack((np.tile([30., 30.], (n, 1)),
np.tile([70., 70.], (n, 1))))
X += rg.normal(size=X.shape, scale=10*noise) # add noise
elif example_nr == 2 : # concentric clusters
rand_ang = 2 * np.pi * rg.uniform(size=n)
X = np.vstack((30 * np.array([np.cos(rand_ang), np.sin(rand_ang)]).T,
np.tile([0., 0.], (n, 1))))
X += [50, 50] # center
X += rg.normal(size=X.shape, scale=5*noise)# add noise
elif example_nr == 3 : # 2x2 checkerboard
n1 = n//2
n2 = n//2 + n%2 # if n is odd n2 will have 1 element more
X = np.vstack((np.tile([30., 30.], (n1, 1)),
np.tile([70., 70.], (n2, 1)),
np.tile([30. ,70.], (n1, 1)),
np.tile([70., 30.], (n2, 1))))
X += rg.normal(size=X.shape, scale=10*noise) # add noise
else:
print('No data returned - example_nr must be 1, 2, or 3')
o = rg.permutation(range(2*n))
return X[o].T, T[o].T, x_grid.T, dim
#%% Test of the data generation
if __name__ == "__main__":
#%%
import matplotlib.pyplot as plt
n = 1000
noise = 1
fig, ax = plt.subplots(1, 3)
for i, a in enumerate(ax):
example_nr = i + 1
X, T, x_grid, dim = make_data(example_nr, n, noise)
a.scatter(X[0][T[0]], X[1][T[0]], c='r', alpha=0.3, s=15)
a.scatter(X[0][T[1]], X[1][T[1]], c='g', alpha=0.3, s=15)
a.set_aspect('equal', 'box')
a.set_title(f'Example {i} data')
plt.show()
#%% Before training, you should make data zero mean
c = np.mean(X, axis=1, keepdims=True)
X_c = X - c
fig, ax = plt.subplots(1,1)
ax.scatter(X_c[0][T[0]], X_c[1][T[0]], c='r', alpha=0.3, s=15)
ax.scatter(X_c[0][T[1]], X_c[1][T[1]], c='g', alpha=0.3, s=15)
ax.set_aspect('equal', 'box')
plt.title('Zero-mean data')
plt.show()
# %%
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment