Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Created on Tue Nov 21 10:09:39 2017
This file is utilized to denote different layers, there are conv_layer, conv_layer_enc, max_pool, up_sampling
@author: s161488
"""
import numpy as np
import tensorflow as tf
import math
def conv_layer_renew(bottom, name, shape, training_state, strides=(1, 1), activation_func=tf.nn.relu, padding='same',
dilation_rate=(1, 1), bias_state=True):
"""
This function is a simplified version of convolutional layer
bottom: Input, dtype tf.float32, shape [Batch_Size, Height, Width, Num_Input_Channel]
shape: shape[0:2] the filter size
shape[-1]: output channel size
load_pretrain: a boolean variable. If True, then load the parameter from DeepLab If False,
initialize with random truncated normal distribution
dilation_rate: default to be [1,1]
training_state: Since we first fix the "downsampling" path, only train the gaussin filter.
Then after it's kind of fixed, we retrain the whole network.
so it's a boolean variable.
activation_func: it could be relu, or None
"""
with tf.variable_scope(name) as scope:
w_init = tf.truncated_normal_initializer(stddev=1)
b_init = tf.constant_initializer(0.0)
output = tf.cond(training_state,
lambda: tf.layers.conv2d(bottom, filters=shape[1], kernel_size=shape[0], strides=strides,
padding=padding,
dilation_rate=dilation_rate, activation=activation_func,
use_bias=bias_state, kernel_initializer=w_init,
bias_initializer=b_init, trainable=True, name=scope.name),
lambda: tf.layers.conv2d(bottom, filters=shape[1], kernel_size=shape[0], strides=strides,
padding=padding,
dilation_rate=dilation_rate, activation=activation_func,
use_bias=bias_state, kernel_initializer=w_init,
bias_initializer=b_init, trainable=False, name=scope.name,
reuse=True))
return output
def get_deconv_layer_weight(shape):
"""
Args:
shape: 4d shape. [kernel_size, kernel_size, output_channel, input_channel]
Returns:
the initialized deconvolution filter which performs as a bilinear upsampling.
Source:
https://github.com/MarvinTeichmann/tensorflow-fcn/blob/master/fcn16_vgg.py#L245
"""
width = shape[0]
height = shape[0]
f = math.ceil(width / 2.0)
c = (2.0 * f - 1 - f % 2) / (2.0 * f)
bilinear = np.zeros([shape[0], shape[1]])
for x in range(width):
for y in range(height):
bilinear[x, y] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
weights = np.zeros(shape)
for i in range(shape[2]):
weights[:, :, i, i] = bilinear
init = tf.constant_initializer(value=weights, dtype=tf.float32)
return init
def deconv_layer_renew(bottom, filter_shape, output_channel, name, strides, training_state, padding='same',
bilinear_initialization=False):
with tf.variable_scope(name) as scope:
if bilinear_initialization is True:
w_shape = [filter_shape, filter_shape, output_channel,
bottom.shape.as_list()[-1]] # change to be bilinear upsampling!
w_init = get_deconv_layer_weight(w_shape)
print("The initialization of the deconvolution kernel is bilinear")
else:
w_init = tf.truncated_normal_initializer(stddev=0.1)
b_init = tf.constant_initializer(0.0)
output = tf.cond(training_state,
lambda: tf.layers.conv2d_transpose(bottom, output_channel, filter_shape, strides=strides,
padding=padding, activation=tf.nn.relu, use_bias=True,
kernel_initializer=w_init, bias_initializer=b_init,
trainable=True, name=scope.name),
lambda: tf.layers.conv2d_transpose(bottom, output_channel, filter_shape, strides=strides,
padding=padding, activation=tf.nn.relu, use_bias=True,
kernel_initializer=w_init, bias_initializer=b_init,
trainable=False, name=scope.name, reuse=True))
return output