Skip to content
Snippets Groups Projects
layer_utils.py 4.69 KiB
Newer Older
  • Learn to ignore specific revisions
  • blia's avatar
    blia committed
    """
    Created on Tue Nov 21 10:09:39 2017
    This file is utilized to denote different layers, there are conv_layer, conv_layer_enc, max_pool, up_sampling
    @author: s161488
    """
    import numpy as np
    import tensorflow as tf
    import math
    
    
    def conv_layer_renew(bottom, name, shape, training_state, strides=(1, 1), activation_func=tf.nn.relu, padding='same',
                         dilation_rate=(1, 1), bias_state=True):
        """
        This function is a simplified version of convolutional layer
        bottom: Input, dtype tf.float32, shape [Batch_Size, Height, Width, Num_Input_Channel]
        shape: shape[0:2] the filter size
        shape[-1]: output channel size
        load_pretrain: a boolean variable. If True, then load the parameter from DeepLab If False,
            initialize with random truncated normal distribution
        dilation_rate: default to be [1,1]
        training_state: Since we first fix the "downsampling" path, only train the gaussin filter.
            Then after it's kind of fixed, we retrain the whole network.
        so it's a boolean variable. 
        activation_func: it could be relu, or None
        """
        with tf.variable_scope(name) as scope:
            w_init = tf.truncated_normal_initializer(stddev=1)
            b_init = tf.constant_initializer(0.0)
            output = tf.cond(training_state,
                             lambda: tf.layers.conv2d(bottom, filters=shape[1], kernel_size=shape[0], strides=strides,
                                                      padding=padding,
                                                      dilation_rate=dilation_rate, activation=activation_func,
                                                      use_bias=bias_state, kernel_initializer=w_init,
                                                      bias_initializer=b_init, trainable=True, name=scope.name),
                             lambda: tf.layers.conv2d(bottom, filters=shape[1], kernel_size=shape[0], strides=strides,
                                                      padding=padding,
                                                      dilation_rate=dilation_rate, activation=activation_func,
                                                      use_bias=bias_state, kernel_initializer=w_init,
                                                      bias_initializer=b_init, trainable=False, name=scope.name,
                                                      reuse=True))
    
        return output
    
    
    def get_deconv_layer_weight(shape):
        """
        Args:
            shape: 4d shape. [kernel_size, kernel_size, output_channel, input_channel]
            
        Returns:
            the initialized deconvolution filter which performs as a bilinear upsampling.
        
        Source:
            https://github.com/MarvinTeichmann/tensorflow-fcn/blob/master/fcn16_vgg.py#L245
        """
        width = shape[0]
        height = shape[0]
        f = math.ceil(width / 2.0)
        c = (2.0 * f - 1 - f % 2) / (2.0 * f)
        bilinear = np.zeros([shape[0], shape[1]])
        for x in range(width):
            for y in range(height):
                bilinear[x, y] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
        weights = np.zeros(shape)
        for i in range(shape[2]):
            weights[:, :, i, i] = bilinear
        init = tf.constant_initializer(value=weights, dtype=tf.float32)
        return init
    
    
    def deconv_layer_renew(bottom, filter_shape, output_channel, name, strides, training_state, padding='same',
                           bilinear_initialization=False):
        with tf.variable_scope(name) as scope:
            if bilinear_initialization is True:
                w_shape = [filter_shape, filter_shape, output_channel,
                           bottom.shape.as_list()[-1]]  # change to be bilinear upsampling!
                w_init = get_deconv_layer_weight(w_shape)
                print("The initialization of the deconvolution kernel is bilinear")
            else:
                w_init = tf.truncated_normal_initializer(stddev=0.1)
                b_init = tf.constant_initializer(0.0)
            output = tf.cond(training_state,
                             lambda: tf.layers.conv2d_transpose(bottom, output_channel, filter_shape, strides=strides,
                                                                padding=padding, activation=tf.nn.relu, use_bias=True,
                                                                kernel_initializer=w_init, bias_initializer=b_init,
                                                                trainable=True, name=scope.name),
                             lambda: tf.layers.conv2d_transpose(bottom, output_channel, filter_shape, strides=strides,
                                                                padding=padding, activation=tf.nn.relu, use_bias=True,
                                                                kernel_initializer=w_init, bias_initializer=b_init,
                                                                trainable=False, name=scope.name, reuse=True))
        return output