inception_v4.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Contains the definition of the Inception V4 architecture.
  16. As described in http://arxiv.org/abs/1602.07261.
  17. Inception-v4, Inception-ResNet and the Impact of Residual Connections
  18. on Learning
  19. Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
  20. """
  21. from __future__ import absolute_import
  22. from __future__ import division
  23. from __future__ import print_function
  24. import tensorflow as tf
  25. from nets import inception_utils
  26. slim = tf.contrib.slim
  27. def block_inception_a(inputs, scope=None, reuse=None):
  28. """Builds Inception-A block for Inception v4 network."""
  29. # By default use stride=1 and SAME padding
  30. with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
  31. stride=1, padding='SAME'):
  32. with tf.variable_scope(scope, 'BlockInceptionA', [inputs], reuse=reuse):
  33. with tf.variable_scope('Branch_0'):
  34. branch_0 = slim.conv2d(inputs, 96, [1, 1], scope='Conv2d_0a_1x1')
  35. with tf.variable_scope('Branch_1'):
  36. branch_1 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
  37. branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='Conv2d_0b_3x3')
  38. with tf.variable_scope('Branch_2'):
  39. branch_2 = slim.conv2d(inputs, 64, [1, 1], scope='Conv2d_0a_1x1')
  40. branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
  41. branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0c_3x3')
  42. with tf.variable_scope('Branch_3'):
  43. branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
  44. branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
  45. return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
  46. def block_reduction_a(inputs, scope=None, reuse=None):
  47. """Builds Reduction-A block for Inception v4 network."""
  48. # By default use stride=1 and SAME padding
  49. with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
  50. stride=1, padding='SAME'):
  51. with tf.variable_scope(scope, 'BlockReductionA', [inputs], reuse=reuse):
  52. with tf.variable_scope('Branch_0'):
  53. branch_0 = slim.conv2d(inputs, 384, [3, 3], stride=2, padding='VALID',
  54. scope='Conv2d_1a_3x3')
  55. with tf.variable_scope('Branch_1'):
  56. branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
  57. branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
  58. branch_1 = slim.conv2d(branch_1, 256, [3, 3], stride=2,
  59. padding='VALID', scope='Conv2d_1a_3x3')
  60. with tf.variable_scope('Branch_2'):
  61. branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
  62. scope='MaxPool_1a_3x3')
  63. return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
  64. def block_inception_b(inputs, scope=None, reuse=None):
  65. """Builds Inception-B block for Inception v4 network."""
  66. # By default use stride=1 and SAME padding
  67. with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
  68. stride=1, padding='SAME'):
  69. with tf.variable_scope(scope, 'BlockInceptionB', [inputs], reuse=reuse):
  70. with tf.variable_scope('Branch_0'):
  71. branch_0 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
  72. with tf.variable_scope('Branch_1'):
  73. branch_1 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
  74. branch_1 = slim.conv2d(branch_1, 224, [1, 7], scope='Conv2d_0b_1x7')
  75. branch_1 = slim.conv2d(branch_1, 256, [7, 1], scope='Conv2d_0c_7x1')
  76. with tf.variable_scope('Branch_2'):
  77. branch_2 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
  78. branch_2 = slim.conv2d(branch_2, 192, [7, 1], scope='Conv2d_0b_7x1')
  79. branch_2 = slim.conv2d(branch_2, 224, [1, 7], scope='Conv2d_0c_1x7')
  80. branch_2 = slim.conv2d(branch_2, 224, [7, 1], scope='Conv2d_0d_7x1')
  81. branch_2 = slim.conv2d(branch_2, 256, [1, 7], scope='Conv2d_0e_1x7')
  82. with tf.variable_scope('Branch_3'):
  83. branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
  84. branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
  85. return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
  86. def block_reduction_b(inputs, scope=None, reuse=None):
  87. """Builds Reduction-B block for Inception v4 network."""
  88. # By default use stride=1 and SAME padding
  89. with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
  90. stride=1, padding='SAME'):
  91. with tf.variable_scope(scope, 'BlockReductionB', [inputs], reuse=reuse):
  92. with tf.variable_scope('Branch_0'):
  93. branch_0 = slim.conv2d(inputs, 192, [1, 1], scope='Conv2d_0a_1x1')
  94. branch_0 = slim.conv2d(branch_0, 192, [3, 3], stride=2,
  95. padding='VALID', scope='Conv2d_1a_3x3')
  96. with tf.variable_scope('Branch_1'):
  97. branch_1 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
  98. branch_1 = slim.conv2d(branch_1, 256, [1, 7], scope='Conv2d_0b_1x7')
  99. branch_1 = slim.conv2d(branch_1, 320, [7, 1], scope='Conv2d_0c_7x1')
  100. branch_1 = slim.conv2d(branch_1, 320, [3, 3], stride=2,
  101. padding='VALID', scope='Conv2d_1a_3x3')
  102. with tf.variable_scope('Branch_2'):
  103. branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
  104. scope='MaxPool_1a_3x3')
  105. return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
  106. def block_inception_c(inputs, scope=None, reuse=None):
  107. """Builds Inception-C block for Inception v4 network."""
  108. # By default use stride=1 and SAME padding
  109. with slim.arg_scope([slim.conv2d, slim.avg_pool2d, slim.max_pool2d],
  110. stride=1, padding='SAME'):
  111. with tf.variable_scope(scope, 'BlockInceptionC', [inputs], reuse=reuse):
  112. with tf.variable_scope('Branch_0'):
  113. branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
  114. with tf.variable_scope('Branch_1'):
  115. branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
  116. branch_1 = tf.concat(axis=3, values=[
  117. slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
  118. slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
  119. with tf.variable_scope('Branch_2'):
  120. branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
  121. branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
  122. branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
  123. branch_2 = tf.concat(axis=3, values=[
  124. slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
  125. slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
  126. with tf.variable_scope('Branch_3'):
  127. branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
  128. branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
  129. return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
  130. def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
  131. """Creates the Inception V4 network up to the given final endpoint.
  132. Args:
  133. inputs: a 4-D tensor of size [batch_size, height, width, 3].
  134. final_endpoint: specifies the endpoint to construct the network up to.
  135. It can be one of [ 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
  136. 'Mixed_3a', 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d',
  137. 'Mixed_5e', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e',
  138. 'Mixed_6f', 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c',
  139. 'Mixed_7d']
  140. scope: Optional variable_scope.
  141. Returns:
  142. logits: the logits outputs of the model.
  143. end_points: the set of end_points from the inception model.
  144. Raises:
  145. ValueError: if final_endpoint is not set to one of the predefined values,
  146. """
  147. end_points = {}
  148. def add_and_check_final(name, net):
  149. end_points[name] = net
  150. return name == final_endpoint
  151. with tf.variable_scope(scope, 'InceptionV4', [inputs]):
  152. with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
  153. stride=1, padding='SAME'):
  154. # 299 x 299 x 3
  155. net = slim.conv2d(inputs, 32, [3, 3], stride=2,
  156. padding='VALID', scope='Conv2d_1a_3x3')
  157. if add_and_check_final('Conv2d_1a_3x3', net): return net, end_points
  158. # 149 x 149 x 32
  159. net = slim.conv2d(net, 32, [3, 3], padding='VALID',
  160. scope='Conv2d_2a_3x3')
  161. if add_and_check_final('Conv2d_2a_3x3', net): return net, end_points
  162. # 147 x 147 x 32
  163. net = slim.conv2d(net, 64, [3, 3], scope='Conv2d_2b_3x3')
  164. if add_and_check_final('Conv2d_2b_3x3', net): return net, end_points
  165. # 147 x 147 x 64
  166. with tf.variable_scope('Mixed_3a'):
  167. with tf.variable_scope('Branch_0'):
  168. branch_0 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
  169. scope='MaxPool_0a_3x3')
  170. with tf.variable_scope('Branch_1'):
  171. branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
  172. scope='Conv2d_0a_3x3')
  173. net = tf.concat(axis=3, values=[branch_0, branch_1])
  174. if add_and_check_final('Mixed_3a', net): return net, end_points
  175. # 73 x 73 x 160
  176. with tf.variable_scope('Mixed_4a'):
  177. with tf.variable_scope('Branch_0'):
  178. branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
  179. branch_0 = slim.conv2d(branch_0, 96, [3, 3], padding='VALID',
  180. scope='Conv2d_1a_3x3')
  181. with tf.variable_scope('Branch_1'):
  182. branch_1 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
  183. branch_1 = slim.conv2d(branch_1, 64, [1, 7], scope='Conv2d_0b_1x7')
  184. branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
  185. branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
  186. scope='Conv2d_1a_3x3')
  187. net = tf.concat(axis=3, values=[branch_0, branch_1])
  188. if add_and_check_final('Mixed_4a', net): return net, end_points
  189. # 71 x 71 x 192
  190. with tf.variable_scope('Mixed_5a'):
  191. with tf.variable_scope('Branch_0'):
  192. branch_0 = slim.conv2d(net, 192, [3, 3], stride=2, padding='VALID',
  193. scope='Conv2d_1a_3x3')
  194. with tf.variable_scope('Branch_1'):
  195. branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
  196. scope='MaxPool_1a_3x3')
  197. net = tf.concat(axis=3, values=[branch_0, branch_1])
  198. if add_and_check_final('Mixed_5a', net): return net, end_points
  199. # 35 x 35 x 384
  200. # 4 x Inception-A blocks
  201. for idx in range(4):
  202. block_scope = 'Mixed_5' + chr(ord('b') + idx)
  203. net = block_inception_a(net, block_scope)
  204. if add_and_check_final(block_scope, net): return net, end_points
  205. # 35 x 35 x 384
  206. # Reduction-A block
  207. net = block_reduction_a(net, 'Mixed_6a')
  208. if add_and_check_final('Mixed_6a', net): return net, end_points
  209. # 17 x 17 x 1024
  210. # 7 x Inception-B blocks
  211. for idx in range(7):
  212. block_scope = 'Mixed_6' + chr(ord('b') + idx)
  213. net = block_inception_b(net, block_scope)
  214. if add_and_check_final(block_scope, net): return net, end_points
  215. # 17 x 17 x 1024
  216. # Reduction-B block
  217. net = block_reduction_b(net, 'Mixed_7a')
  218. if add_and_check_final('Mixed_7a', net): return net, end_points
  219. # 8 x 8 x 1536
  220. # 3 x Inception-C blocks
  221. for idx in range(3):
  222. block_scope = 'Mixed_7' + chr(ord('b') + idx)
  223. net = block_inception_c(net, block_scope)
  224. if add_and_check_final(block_scope, net): return net, end_points
  225. raise ValueError('Unknown final endpoint %s' % final_endpoint)
  226. def inception_v4(inputs, num_classes=1001, is_training=True,
  227. dropout_keep_prob=0.8,
  228. reuse=None,
  229. scope='InceptionV4',
  230. create_aux_logits=True):
  231. """Creates the Inception V4 model.
  232. Args:
  233. inputs: a 4-D tensor of size [batch_size, height, width, 3].
  234. num_classes: number of predicted classes. If 0 or None, the logits layer
  235. is omitted and the input features to the logits layer (before dropout)
  236. are returned instead.
  237. is_training: whether is training or not.
  238. dropout_keep_prob: float, the fraction to keep before final layer.
  239. reuse: whether or not the network and its variables should be reused. To be
  240. able to reuse 'scope' must be given.
  241. scope: Optional variable_scope.
  242. create_aux_logits: Whether to include the auxiliary logits.
  243. Returns:
  244. net: a Tensor with the logits (pre-softmax activations) if num_classes
  245. is a non-zero integer, or the non-dropped input to the logits layer
  246. if num_classes is 0 or None.
  247. end_points: the set of end_points from the inception model.
  248. """
  249. end_points = {}
  250. with tf.variable_scope(scope, 'InceptionV4', [inputs], reuse=reuse) as scope:
  251. with slim.arg_scope([slim.batch_norm, slim.dropout],
  252. is_training=is_training):
  253. net, end_points = inception_v4_base(inputs, scope=scope)
  254. with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
  255. stride=1, padding='SAME'):
  256. # Auxiliary Head logits
  257. if create_aux_logits and num_classes:
  258. with tf.variable_scope('AuxLogits'):
  259. # 17 x 17 x 1024
  260. aux_logits = end_points['Mixed_6h']
  261. aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3,
  262. padding='VALID',
  263. scope='AvgPool_1a_5x5')
  264. aux_logits = slim.conv2d(aux_logits, 128, [1, 1],
  265. scope='Conv2d_1b_1x1')
  266. aux_logits = slim.conv2d(aux_logits, 768,
  267. aux_logits.get_shape()[1:3],
  268. padding='VALID', scope='Conv2d_2a')
  269. aux_logits = slim.flatten(aux_logits)
  270. aux_logits = slim.fully_connected(aux_logits, num_classes,
  271. activation_fn=None,
  272. scope='Aux_logits')
  273. end_points['AuxLogits'] = aux_logits
  274. # Final pooling and prediction
  275. # TODO(sguada,arnoegw): Consider adding a parameter global_pool which
  276. # can be set to False to disable pooling here (as in resnet_*()).
  277. with tf.variable_scope('Logits'):
  278. # 8 x 8 x 1536
  279. kernel_size = net.get_shape()[1:3]
  280. if kernel_size.is_fully_defined():
  281. net = slim.avg_pool2d(net, kernel_size, padding='VALID',
  282. scope='AvgPool_1a')
  283. else:
  284. net = tf.reduce_mean(net, [1, 2], keep_dims=True,
  285. name='global_pool')
  286. end_points['global_pool'] = net
  287. if not num_classes:
  288. return net, end_points
  289. # 1 x 1 x 1536
  290. net = slim.dropout(net, dropout_keep_prob, scope='Dropout_1b')
  291. net = slim.flatten(net, scope='PreLogitsFlatten')
  292. end_points['PreLogitsFlatten'] = net
  293. # 1536
  294. logits = slim.fully_connected(net, num_classes, activation_fn=None,
  295. scope='Logits')
  296. end_points['Logits'] = logits
  297. end_points['Predictions'] = tf.nn.softmax(logits, name='Predictions')
  298. return logits, end_points
  299. inception_v4.default_image_size = 299
  300. inception_v4_arg_scope = inception_utils.inception_arg_scope