handson-ml/nets/alexnet.py

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a model definition for AlexNet.

This work was first described in:
  ImageNet Classification with Deep Convolutional Neural Networks
  Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton

and later refined in:
  One weird trick for parallelizing convolutional neural networks
  Alex Krizhevsky, 2014

Here we provide the implementation proposed in "One weird trick" and not
"ImageNet Classification", as per the paper, the LRN layers have been removed.

Usage:
  with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
    outputs, end_points = alexnet.alexnet_v2(inputs)

@@alexnet_v2
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)


def alexnet_v2_arg_scope(weight_decay=0.0005):
  with slim.arg_scope([slim.conv2d, slim.fully_connected],
                      activation_fn=tf.nn.relu,
                      biases_initializer=tf.constant_initializer(0.1),
                      weights_regularizer=slim.l2_regularizer(weight_decay)):
    with slim.arg_scope([slim.conv2d], padding='SAME'):
      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
        return arg_sc


def alexnet_v2(inputs,
               num_classes=1000,
               is_training=True,
               dropout_keep_prob=0.5,
               spatial_squeeze=True,
               scope='alexnet_v2'):
  """AlexNet version 2.

  Described in: http://arxiv.org/pdf/1404.5997v2.pdf
  Parameters from:
  github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
  layers-imagenet-1gpu.cfg

  Note: All the fully_connected layers have been transformed to conv2d layers.
        To use in classification mode, resize input to 224x224. To use in fully
        convolutional mode, set spatial_squeeze to false.
        The LRN layers have been removed and change the initializers from
        random_normal_initializer to xavier_initializer.

  Args:
    inputs: a tensor of size [batch_size, height, width, channels].
    num_classes: number of predicted classes.
    is_training: whether or not the model is being trained.
    dropout_keep_prob: the probability that activations are kept in the dropout
      layers during training.
    spatial_squeeze: whether or not should squeeze the spatial dimensions of the
      outputs. Useful to remove unnecessary dimensions for classification.
    scope: Optional scope for the variables.

  Returns:
    the last op containing the log predictions and end_points dict.
  """
  with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
    end_points_collection = sc.name + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
                        outputs_collections=[end_points_collection]):
      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
                        scope='conv1')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
      net = slim.conv2d(net, 192, [5, 5], scope='conv2')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
      net = slim.conv2d(net, 384, [3, 3], scope='conv3')
      net = slim.conv2d(net, 384, [3, 3], scope='conv4')
      net = slim.conv2d(net, 256, [3, 3], scope='conv5')
      net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')

      # Use conv2d instead of fully_connected layers.
      with slim.arg_scope([slim.conv2d],
                          weights_initializer=trunc_normal(0.005),
                          biases_initializer=tf.constant_initializer(0.1)):
        net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
                          scope='fc6')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout6')
        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
                           scope='dropout7')
        net = slim.conv2d(net, num_classes, [1, 1],
                          activation_fn=None,
                          normalizer_fn=None,
                          biases_initializer=tf.zeros_initializer,
                          scope='fc8')

      # Convert end_points_collection into a end_point dict.
      end_points = dict(tf.get_collection(end_points_collection))
      if spatial_squeeze:
        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
        end_points[sc.name + '/fc8'] = net
      return net, end_points
alexnet_v2.default_image_size = 224
Add TF models slim nets 2016-09-25 18:42:01 +02:00			`# Copyright 2016 The TensorFlow Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ==============================================================================`
			`"""Contains a model definition for AlexNet.`

			`This work was first described in:`
			`ImageNet Classification with Deep Convolutional Neural Networks`
			`Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton`

			`and later refined in:`
			`One weird trick for parallelizing convolutional neural networks`
			`Alex Krizhevsky, 2014`

			`Here we provide the implementation proposed in "One weird trick" and not`
			`"ImageNet Classification", as per the paper, the LRN layers have been removed.`

			`Usage:`
			`with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):`
			`outputs, end_points = alexnet.alexnet_v2(inputs)`

			`@@alexnet_v2`
			`"""`

			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`import tensorflow as tf`

			`slim = tf.contrib.slim`
			`trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)`


			`def alexnet_v2_arg_scope(weight_decay=0.0005):`
			`with slim.arg_scope([slim.conv2d, slim.fully_connected],`
			`activation_fn=tf.nn.relu,`
			`biases_initializer=tf.constant_initializer(0.1),`
			`weights_regularizer=slim.l2_regularizer(weight_decay)):`
			`with slim.arg_scope([slim.conv2d], padding='SAME'):`
			`with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:`
			`return arg_sc`


			`def alexnet_v2(inputs,`
			`num_classes=1000,`
			`is_training=True,`
			`dropout_keep_prob=0.5,`
			`spatial_squeeze=True,`
			`scope='alexnet_v2'):`
			`"""AlexNet version 2.`

			`Described in: http://arxiv.org/pdf/1404.5997v2.pdf`
			`Parameters from:`
			`github.com/akrizhevsky/cuda-convnet2/blob/master/layers/`
			`layers-imagenet-1gpu.cfg`

			`Note: All the fully_connected layers have been transformed to conv2d layers.`
			`To use in classification mode, resize input to 224x224. To use in fully`
			`convolutional mode, set spatial_squeeze to false.`
			`The LRN layers have been removed and change the initializers from`
			`random_normal_initializer to xavier_initializer.`

			`Args:`
			`inputs: a tensor of size [batch_size, height, width, channels].`
			`num_classes: number of predicted classes.`
			`is_training: whether or not the model is being trained.`
			`dropout_keep_prob: the probability that activations are kept in the dropout`
			`layers during training.`
			`spatial_squeeze: whether or not should squeeze the spatial dimensions of the`
			`outputs. Useful to remove unnecessary dimensions for classification.`
			`scope: Optional scope for the variables.`

			`Returns:`
			`the last op containing the log predictions and end_points dict.`
			`"""`
			`with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:`
			`end_points_collection = sc.name + '_end_points'`
			`# Collect outputs for conv2d, fully_connected and max_pool2d.`
			`with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],`
			`outputs_collections=[end_points_collection]):`
			`net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',`
			`scope='conv1')`
			`net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')`
			`net = slim.conv2d(net, 192, [5, 5], scope='conv2')`
			`net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')`
			`net = slim.conv2d(net, 384, [3, 3], scope='conv3')`
			`net = slim.conv2d(net, 384, [3, 3], scope='conv4')`
			`net = slim.conv2d(net, 256, [3, 3], scope='conv5')`
			`net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')`

			`# Use conv2d instead of fully_connected layers.`
			`with slim.arg_scope([slim.conv2d],`
			`weights_initializer=trunc_normal(0.005),`
			`biases_initializer=tf.constant_initializer(0.1)):`
			`net = slim.conv2d(net, 4096, [5, 5], padding='VALID',`
			`scope='fc6')`
			`net = slim.dropout(net, dropout_keep_prob, is_training=is_training,`
			`scope='dropout6')`
			`net = slim.conv2d(net, 4096, [1, 1], scope='fc7')`
			`net = slim.dropout(net, dropout_keep_prob, is_training=is_training,`
			`scope='dropout7')`
			`net = slim.conv2d(net, num_classes, [1, 1],`
			`activation_fn=None,`
			`normalizer_fn=None,`
			`biases_initializer=tf.zeros_initializer,`
			`scope='fc8')`

			`# Convert end_points_collection into a end_point dict.`
			`end_points = dict(tf.get_collection(end_points_collection))`
			`if spatial_squeeze:`
			`net = tf.squeeze(net, [1, 2], name='fc8/squeezed')`
			`end_points[sc.name + '/fc8'] = net`
			`return net, end_points`
			`alexnet_v2.default_image_size = 224`