588 lines
27 KiB
Python
588 lines
27 KiB
Python
|
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
# ==============================================================================
|
||
|
"""Contains the definition for inception v3 classification network."""
|
||
|
|
||
|
from __future__ import absolute_import
|
||
|
from __future__ import division
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import tensorflow as tf
|
||
|
|
||
|
slim = tf.contrib.slim
|
||
|
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
|
||
|
|
||
|
|
||
|
def inception_v3_base(inputs,
|
||
|
final_endpoint='Mixed_7c',
|
||
|
min_depth=16,
|
||
|
depth_multiplier=1.0,
|
||
|
scope=None):
|
||
|
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||
|
|
||
|
Constructs an Inception v3 network from inputs to the given final endpoint.
|
||
|
This method can construct the network up to the final inception block
|
||
|
Mixed_7c.
|
||
|
|
||
|
Note that the names of the layers in the paper do not correspond to the names
|
||
|
of the endpoints registered by this function although they build the same
|
||
|
network.
|
||
|
|
||
|
Here is a mapping from the old_names to the new names:
|
||
|
Old name | New name
|
||
|
=======================================
|
||
|
conv0 | Conv2d_1a_3x3
|
||
|
conv1 | Conv2d_2a_3x3
|
||
|
conv2 | Conv2d_2b_3x3
|
||
|
pool1 | MaxPool_3a_3x3
|
||
|
conv3 | Conv2d_3b_1x1
|
||
|
conv4 | Conv2d_4a_3x3
|
||
|
pool2 | MaxPool_5a_3x3
|
||
|
mixed_35x35x256a | Mixed_5b
|
||
|
mixed_35x35x288a | Mixed_5c
|
||
|
mixed_35x35x288b | Mixed_5d
|
||
|
mixed_17x17x768a | Mixed_6a
|
||
|
mixed_17x17x768b | Mixed_6b
|
||
|
mixed_17x17x768c | Mixed_6c
|
||
|
mixed_17x17x768d | Mixed_6d
|
||
|
mixed_17x17x768e | Mixed_6e
|
||
|
mixed_8x8x1280a | Mixed_7a
|
||
|
mixed_8x8x2048a | Mixed_7b
|
||
|
mixed_8x8x2048b | Mixed_7c
|
||
|
|
||
|
Args:
|
||
|
inputs: a tensor of size [batch_size, height, width, channels].
|
||
|
final_endpoint: specifies the endpoint to construct the network up to. It
|
||
|
can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
|
||
|
'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3',
|
||
|
'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c',
|
||
|
'Mixed_6d', 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c'].
|
||
|
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||
|
Enforced when depth_multiplier < 1, and not an active constraint when
|
||
|
depth_multiplier >= 1.
|
||
|
depth_multiplier: Float multiplier for the depth (number of channels)
|
||
|
for all convolution ops. The value must be greater than zero. Typical
|
||
|
usage will be to set this value in (0, 1) to reduce the number of
|
||
|
parameters or computation cost of the model.
|
||
|
scope: Optional variable_scope.
|
||
|
|
||
|
Returns:
|
||
|
tensor_out: output tensor corresponding to the final_endpoint.
|
||
|
end_points: a set of activations for external use, for example summaries or
|
||
|
losses.
|
||
|
|
||
|
Raises:
|
||
|
ValueError: if final_endpoint is not set to one of the predefined values,
|
||
|
or depth_multiplier <= 0
|
||
|
"""
|
||
|
# end_points will collect relevant activations for external use, for example
|
||
|
# summaries or losses.
|
||
|
end_points = {}
|
||
|
|
||
|
if depth_multiplier <= 0:
|
||
|
raise ValueError('depth_multiplier is not greater than zero.')
|
||
|
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||
|
|
||
|
with tf.variable_scope(scope, 'InceptionV3', [inputs]):
|
||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||
|
stride=1, padding='VALID'):
|
||
|
# 299 x 299 x 3
|
||
|
end_point = 'Conv2d_1a_3x3'
|
||
|
net = slim.conv2d(inputs, depth(32), [3, 3], stride=2, scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 149 x 149 x 32
|
||
|
end_point = 'Conv2d_2a_3x3'
|
||
|
net = slim.conv2d(net, depth(32), [3, 3], scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 147 x 147 x 32
|
||
|
end_point = 'Conv2d_2b_3x3'
|
||
|
net = slim.conv2d(net, depth(64), [3, 3], padding='SAME', scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 147 x 147 x 64
|
||
|
end_point = 'MaxPool_3a_3x3'
|
||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 73 x 73 x 64
|
||
|
end_point = 'Conv2d_3b_1x1'
|
||
|
net = slim.conv2d(net, depth(80), [1, 1], scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 73 x 73 x 80.
|
||
|
end_point = 'Conv2d_4a_3x3'
|
||
|
net = slim.conv2d(net, depth(192), [3, 3], scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 71 x 71 x 192.
|
||
|
end_point = 'MaxPool_5a_3x3'
|
||
|
net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# 35 x 35 x 192.
|
||
|
|
||
|
# Inception blocks
|
||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||
|
stride=1, padding='SAME'):
|
||
|
# mixed: 35 x 35 x 256.
|
||
|
end_point = 'Mixed_5b'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||
|
scope='Conv2d_0b_5x5')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0b_3x3')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0c_3x3')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_1: 35 x 35 x 288.
|
||
|
end_point = 'Mixed_5c'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0b_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||
|
scope='Conv_1_0c_5x5')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1],
|
||
|
scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0b_3x3')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0c_3x3')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_2: 35 x 35 x 288.
|
||
|
end_point = 'Mixed_5d'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(48), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(64), [5, 5],
|
||
|
scope='Conv2d_0b_5x5')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0b_3x3')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(96), [3, 3],
|
||
|
scope='Conv2d_0c_3x3')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_3: 17 x 17 x 768.
|
||
|
end_point = 'Mixed_6a'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(384), [3, 3], stride=2,
|
||
|
padding='VALID', scope='Conv2d_1a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(64), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3],
|
||
|
scope='Conv2d_0b_3x3')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(96), [3, 3], stride=2,
|
||
|
padding='VALID', scope='Conv2d_1a_1x1')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||
|
scope='MaxPool_1a_3x3')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed4: 17 x 17 x 768.
|
||
|
end_point = 'Mixed_6b'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(128), [1, 7],
|
||
|
scope='Conv2d_0b_1x7')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||
|
scope='Conv2d_0c_7x1')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(128), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||
|
scope='Conv2d_0b_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(128), [1, 7],
|
||
|
scope='Conv2d_0c_1x7')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(128), [7, 1],
|
||
|
scope='Conv2d_0d_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||
|
scope='Conv2d_0e_1x7')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_5: 17 x 17 x 768.
|
||
|
end_point = 'Mixed_6c'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||
|
scope='Conv2d_0b_1x7')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||
|
scope='Conv2d_0c_7x1')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||
|
scope='Conv2d_0b_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||
|
scope='Conv2d_0c_1x7')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||
|
scope='Conv2d_0d_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||
|
scope='Conv2d_0e_1x7')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# mixed_6: 17 x 17 x 768.
|
||
|
end_point = 'Mixed_6d'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(160), [1, 7],
|
||
|
scope='Conv2d_0b_1x7')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||
|
scope='Conv2d_0c_7x1')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(160), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||
|
scope='Conv2d_0b_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [1, 7],
|
||
|
scope='Conv2d_0c_1x7')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(160), [7, 1],
|
||
|
scope='Conv2d_0d_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||
|
scope='Conv2d_0e_1x7')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_7: 17 x 17 x 768.
|
||
|
end_point = 'Mixed_6e'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||
|
scope='Conv2d_0b_1x7')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||
|
scope='Conv2d_0c_7x1')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||
|
scope='Conv2d_0b_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||
|
scope='Conv2d_0c_1x7')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [7, 1],
|
||
|
scope='Conv2d_0d_7x1')
|
||
|
branch_2 = slim.conv2d(branch_2, depth(192), [1, 7],
|
||
|
scope='Conv2d_0e_1x7')
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
|
||
|
scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_8: 8 x 8 x 1280.
|
||
|
end_point = 'Mixed_7a'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_0 = slim.conv2d(branch_0, depth(320), [3, 3], stride=2,
|
||
|
padding='VALID', scope='Conv2d_1a_3x3')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [1, 7],
|
||
|
scope='Conv2d_0b_1x7')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [7, 1],
|
||
|
scope='Conv2d_0c_7x1')
|
||
|
branch_1 = slim.conv2d(branch_1, depth(192), [3, 3], stride=2,
|
||
|
padding='VALID', scope='Conv2d_1a_3x3')
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
|
||
|
scope='MaxPool_1a_3x3')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
# mixed_9: 8 x 8 x 2048.
|
||
|
end_point = 'Mixed_7b'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = tf.concat(3, [
|
||
|
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||
|
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(
|
||
|
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||
|
branch_2 = tf.concat(3, [
|
||
|
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||
|
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(
|
||
|
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
|
||
|
# mixed_10: 8 x 8 x 2048.
|
||
|
end_point = 'Mixed_7c'
|
||
|
with tf.variable_scope(end_point):
|
||
|
with tf.variable_scope('Branch_0'):
|
||
|
branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
with tf.variable_scope('Branch_1'):
|
||
|
branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_1 = tf.concat(3, [
|
||
|
slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
|
||
|
slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
|
||
|
with tf.variable_scope('Branch_2'):
|
||
|
branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
|
||
|
branch_2 = slim.conv2d(
|
||
|
branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
|
||
|
branch_2 = tf.concat(3, [
|
||
|
slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
|
||
|
slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
|
||
|
with tf.variable_scope('Branch_3'):
|
||
|
branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
|
||
|
branch_3 = slim.conv2d(
|
||
|
branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
|
||
|
net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
|
||
|
end_points[end_point] = net
|
||
|
if end_point == final_endpoint: return net, end_points
|
||
|
raise ValueError('Unknown final endpoint %s' % final_endpoint)
|
||
|
|
||
|
|
||
|
def inception_v3(inputs,
|
||
|
num_classes=1000,
|
||
|
is_training=True,
|
||
|
dropout_keep_prob=0.8,
|
||
|
min_depth=16,
|
||
|
depth_multiplier=1.0,
|
||
|
prediction_fn=slim.softmax,
|
||
|
spatial_squeeze=True,
|
||
|
reuse=None,
|
||
|
scope='InceptionV3'):
|
||
|
"""Inception model from http://arxiv.org/abs/1512.00567.
|
||
|
|
||
|
"Rethinking the Inception Architecture for Computer Vision"
|
||
|
|
||
|
Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
|
||
|
Zbigniew Wojna.
|
||
|
|
||
|
With the default arguments this method constructs the exact model defined in
|
||
|
the paper. However, one can experiment with variations of the inception_v3
|
||
|
network by changing arguments dropout_keep_prob, min_depth and
|
||
|
depth_multiplier.
|
||
|
|
||
|
The default image size used to train this network is 299x299.
|
||
|
|
||
|
Args:
|
||
|
inputs: a tensor of size [batch_size, height, width, channels].
|
||
|
num_classes: number of predicted classes.
|
||
|
is_training: whether is training or not.
|
||
|
dropout_keep_prob: the percentage of activation values that are retained.
|
||
|
min_depth: Minimum depth value (number of channels) for all convolution ops.
|
||
|
Enforced when depth_multiplier < 1, and not an active constraint when
|
||
|
depth_multiplier >= 1.
|
||
|
depth_multiplier: Float multiplier for the depth (number of channels)
|
||
|
for all convolution ops. The value must be greater than zero. Typical
|
||
|
usage will be to set this value in (0, 1) to reduce the number of
|
||
|
parameters or computation cost of the model.
|
||
|
prediction_fn: a function to get predictions out of logits.
|
||
|
spatial_squeeze: if True, logits is of shape is [B, C], if false logits is
|
||
|
of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
|
||
|
reuse: whether or not the network and its variables should be reused. To be
|
||
|
able to reuse 'scope' must be given.
|
||
|
scope: Optional variable_scope.
|
||
|
|
||
|
Returns:
|
||
|
logits: the pre-softmax activations, a tensor of size
|
||
|
[batch_size, num_classes]
|
||
|
end_points: a dictionary from components of the network to the corresponding
|
||
|
activation.
|
||
|
|
||
|
Raises:
|
||
|
ValueError: if 'depth_multiplier' is less than or equal to zero.
|
||
|
"""
|
||
|
if depth_multiplier <= 0:
|
||
|
raise ValueError('depth_multiplier is not greater than zero.')
|
||
|
depth = lambda d: max(int(d * depth_multiplier), min_depth)
|
||
|
|
||
|
with tf.variable_scope(scope, 'InceptionV3', [inputs, num_classes],
|
||
|
reuse=reuse) as scope:
|
||
|
with slim.arg_scope([slim.batch_norm, slim.dropout],
|
||
|
is_training=is_training):
|
||
|
net, end_points = inception_v3_base(
|
||
|
inputs, scope=scope, min_depth=min_depth,
|
||
|
depth_multiplier=depth_multiplier)
|
||
|
|
||
|
# Auxiliary Head logits
|
||
|
with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
|
||
|
stride=1, padding='SAME'):
|
||
|
aux_logits = end_points['Mixed_6e']
|
||
|
with tf.variable_scope('AuxLogits'):
|
||
|
aux_logits = slim.avg_pool2d(
|
||
|
aux_logits, [5, 5], stride=3, padding='VALID',
|
||
|
scope='AvgPool_1a_5x5')
|
||
|
aux_logits = slim.conv2d(aux_logits, depth(128), [1, 1],
|
||
|
scope='Conv2d_1b_1x1')
|
||
|
|
||
|
# Shape of feature map before the final layer.
|
||
|
kernel_size = _reduced_kernel_size_for_small_input(
|
||
|
aux_logits, [5, 5])
|
||
|
aux_logits = slim.conv2d(
|
||
|
aux_logits, depth(768), kernel_size,
|
||
|
weights_initializer=trunc_normal(0.01),
|
||
|
padding='VALID', scope='Conv2d_2a_{}x{}'.format(*kernel_size))
|
||
|
aux_logits = slim.conv2d(
|
||
|
aux_logits, num_classes, [1, 1], activation_fn=None,
|
||
|
normalizer_fn=None, weights_initializer=trunc_normal(0.001),
|
||
|
scope='Conv2d_2b_1x1')
|
||
|
if spatial_squeeze:
|
||
|
aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')
|
||
|
end_points['AuxLogits'] = aux_logits
|
||
|
|
||
|
# Final pooling and prediction
|
||
|
with tf.variable_scope('Logits'):
|
||
|
kernel_size = _reduced_kernel_size_for_small_input(net, [8, 8])
|
||
|
net = slim.avg_pool2d(net, kernel_size, padding='VALID',
|
||
|
scope='AvgPool_1a_{}x{}'.format(*kernel_size))
|
||
|
# 1 x 1 x 2048
|
||
|
net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')
|
||
|
end_points['PreLogits'] = net
|
||
|
# 2048
|
||
|
logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
|
||
|
normalizer_fn=None, scope='Conv2d_1c_1x1')
|
||
|
if spatial_squeeze:
|
||
|
logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
|
||
|
# 1000
|
||
|
end_points['Logits'] = logits
|
||
|
end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
|
||
|
return logits, end_points
|
||
|
inception_v3.default_image_size = 299
|
||
|
|
||
|
|
||
|
def _reduced_kernel_size_for_small_input(input_tensor, kernel_size):
|
||
|
"""Define kernel size which is automatically reduced for small input.
|
||
|
|
||
|
If the shape of the input images is unknown at graph construction time this
|
||
|
function assumes that the input images are is large enough.
|
||
|
|
||
|
Args:
|
||
|
input_tensor: input tensor of size [batch_size, height, width, channels].
|
||
|
kernel_size: desired kernel size of length 2: [kernel_height, kernel_width]
|
||
|
|
||
|
Returns:
|
||
|
a tensor with the kernel size.
|
||
|
|
||
|
TODO(jrru): Make this function work with unknown shapes. Theoretically, this
|
||
|
can be done with the code below. Problems are two-fold: (1) If the shape was
|
||
|
known, it will be lost. (2) inception.slim.ops._two_element_tuple cannot
|
||
|
handle tensors that define the kernel size.
|
||
|
shape = tf.shape(input_tensor)
|
||
|
return = tf.pack([tf.minimum(shape[1], kernel_size[0]),
|
||
|
tf.minimum(shape[2], kernel_size[1])])
|
||
|
|
||
|
"""
|
||
|
shape = input_tensor.get_shape().as_list()
|
||
|
if shape[1] is None or shape[2] is None:
|
||
|
kernel_size_out = kernel_size
|
||
|
else:
|
||
|
kernel_size_out = [min(shape[1], kernel_size[0]),
|
||
|
min(shape[2], kernel_size[1])]
|
||
|
return kernel_size_out
|
||
|
|
||
|
|
||
|
def inception_v3_arg_scope(weight_decay=0.00004,
|
||
|
stddev=0.1):
|
||
|
"""Defines the default InceptionV3 arg scope.
|
||
|
|
||
|
Args:
|
||
|
weight_decay: The weight decay to use for regularizing the model.
|
||
|
stddev: The standard deviation of the trunctated normal weight initializer.
|
||
|
|
||
|
Returns:
|
||
|
An `arg_scope` to use for the inception v3 model.
|
||
|
"""
|
||
|
batch_norm_params = {
|
||
|
# Decay for the moving averages.
|
||
|
'decay': 0.9997,
|
||
|
# epsilon to prevent 0s in variance.
|
||
|
'epsilon': 0.001,
|
||
|
# collection containing update_ops.
|
||
|
'updates_collections': tf.GraphKeys.UPDATE_OPS,
|
||
|
}
|
||
|
|
||
|
# Set weight_decay for weights in Conv and FC layers.
|
||
|
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||
|
weights_regularizer=slim.l2_regularizer(weight_decay)):
|
||
|
with slim.arg_scope(
|
||
|
[slim.conv2d],
|
||
|
weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
|
||
|
activation_fn=tf.nn.relu,
|
||
|
normalizer_fn=slim.batch_norm,
|
||
|
normalizer_params=batch_norm_params) as sc:
|
||
|
return sc
|