diff --git a/examples/resnet_50.py b/examples/resnet_50.py new file mode 100644 index 00000000000..d865b078c6d --- /dev/null +++ b/examples/resnet_50.py @@ -0,0 +1,216 @@ +'''This script demonstrates how to build a deep residual network +using the Keras functional API. + +get_resne50 returns the deep residual network model (50 layers) + +Please visit Kaiming He's GitHub homepage: +https://github.com/KaimingHe +for more information. + +The related paper is +"Deep Residual Learning for Image Recognition" +Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun +http://arxiv.org/abs/1512.03385 + +Pretrained weights were converted from Kaiming He's caffe model directly. + +For now we provide weights for the tensorflow backend only, +thus use 'tf' dim_ordering (e.g. input_shape=(224, 224, 3) for 224*224 color image) +would accelerate the computation, but we also provide weights for 'th' dim_ordering for compatibility. +please donwload them at: +http://pan.baidu.com/s/1o8pO2q2 ('th' dim ordering, For China) +http://pan.baidu.com/s/1pLanuTt ('tf' dim ordering, For China) + +https://drive.google.com/open?id=0B4ChsjFJvew3NVQ2U041Q0xHRHM ('th' dim ordering, For other countries) +https://drive.google.com/open?id=0B4ChsjFJvew3NWN5THdxcTdSWmc ('tf' dim ordering, For other countries) + +@author: BigMoyan, University of Electronic Science and Technology of China +''' +from keras.layers import merge +from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D +from keras.layers.core import Dense, Activation, Flatten +from keras.layers.normalization import BatchNormalization +from keras.models import Model +from keras.layers import Input +from keras.preprocessing.image import load_img, img_to_array +import keras.backend as K +import numpy as np + +# The names of layers in resnet50 are generated with the following format +# [type][stage][block]_branch[branch][layer] +# type: 'res' for conv layer, 'bn' and 'scale' for BN layer +# stage: from '2' to '5', current stage number +# block: 'a','b','c'... for different blocks in a stage +# branch: '1' for shortcut and '2' for main path +# layer: 'a','b','c'... for different layers in a block + +def identity_block(input_tensor, kernel_size, filters, stage, block): + """ + the identity_block is the block that has no conv layer at shortcut + params: + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the nb_filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + """ + dim_ordering = K.image_dim_ordering() + nb_filter1, nb_filter2, nb_filter3 = filters + if dim_ordering == 'tf': + axis = 3 + else: + axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + out = Convolution2D(nb_filter1, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor) + out = BatchNormalization(axis=axis, name=bn_name_base + '2a')(out) + out = Activation('relu')(out) + + out = out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', + dim_ordering=dim_ordering, name=conv_name_base + '2b')(out) + out = BatchNormalization(axis=axis, name=bn_name_base + '2b')(out) + out = Activation('relu')(out) + + out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out) + out = BatchNormalization(axis=axis, name=bn_name_base + '2c')(out) + + out = merge([out, input_tensor], mode='sum') + out = Activation('relu')(out) + return out + + +def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): + """ + conv_block is the block that has a conv layer at shortcut + params: + input_tensor: input tensor + kernel_size: defualt 3, the kernel size of middle conv layer at main path + filters: list of integers, the nb_filters of 3 conv layer at main path + stage: integer, current stage label, used for generating layer names + block: 'a','b'..., current block label, used for generating layer names + + Note that from stage 3, the first conv layer at main path is with subsample=(2,2) + And the shortcut should has subsample=(2,2) as well + """ + nb_filter1, nb_filter2, nb_filter3 = filters + dim_ordering = K.image_dim_ordering() + if dim_ordering == 'tf': + axis = 3 + else: + axis = 1 + conv_name_base = 'res' + str(stage) + block + '_branch' + bn_name_base = 'bn' + str(stage) + block + '_branch' + + out = Convolution2D(nb_filter1, 1, 1, subsample=strides, + dim_ordering=dim_ordering, name=conv_name_base + '2a')(input_tensor) + out = BatchNormalization(axis=axis, name=bn_name_base + '2a')(out) + out = Activation('relu')(out) + + out = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', + dim_ordering=dim_ordering, name=conv_name_base + '2b')(out) + out = BatchNormalization(axis=axis, name=bn_name_base + '2b')(out) + out = Activation('relu')(out) + + out = Convolution2D(nb_filter3, 1, 1, dim_ordering=dim_ordering, name=conv_name_base + '2c')(out) + out = BatchNormalization(axis=axis, name=bn_name_base + '2c')(out) + + shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, + dim_ordering=dim_ordering, name=conv_name_base + '1')(input_tensor) + shortcut = BatchNormalization(axis=axis, name=bn_name_base + '1')(shortcut) + + out = merge([out, shortcut], mode='sum') + out = Activation('relu')(out) + return out + + +def read_img(img_path): + """ + this function returns preprocessed image + """ + dim_ordering = K.image_dim_ordering() + mean = (103.939, 116.779, 123.68) + img = load_img(img_path, target_size=(224, 224)) + img = img_to_array(img, dim_ordering=dim_ordering) + + # decenterize + img[0, :, :] -= mean[0] + img[1, :, :] -= mean[1] + img[2, :, :] -= mean[2] + + # 'RGB'->'BGR' + if dim_ordering == 'th': + img = img[::-1, :, :] + else: + img = img[:, :, ::-1] + + # expand dim for test + img = np.expand_dims(img, axis=0) + return img + + +def get_resnet50(): + """ + this function returns the 50-layer residual network model + you should load pretrained weights if you want to use it directly. + Note that since the pretrained weights is converted from caffemodel + the order of channels for input image should be 'BGR' (the channel order of caffe) + """ + if K.image_dim_ordering() == 'tf': + inp = Input(shape=(224, 224, 3)) + axis = 3 + else: + inp = Input(shape=(3, 224, 224)) + axis = 1 + + dim_ordering = K.image_dim_ordering() + out = ZeroPadding2D((3, 3), dim_ordering=dim_ordering)(inp) + out = Convolution2D(64, 7, 7, subsample=(2, 2), dim_ordering=dim_ordering, name='conv1')(out) + out = BatchNormalization(axis=axis, name='bn_conv1')(out) + out = Activation('relu')(out) + out = MaxPooling2D((3, 3), strides=(2, 2), dim_ordering=dim_ordering)(out) + + out = conv_block(out, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + out = identity_block(out, 3, [64, 64, 256], stage=2, block='b') + out = identity_block(out, 3, [64, 64, 256], stage=2, block='c') + + out = conv_block(out, 3, [128, 128, 512], stage=3, block='a') + out = identity_block(out, 3, [128, 128, 512], stage=3, block='b') + out = identity_block(out, 3, [128, 128, 512], stage=3, block='c') + out = identity_block(out, 3, [128, 128, 512], stage=3, block='d') + + out = conv_block(out, 3, [256, 256, 1024], stage=4, block='a') + out = identity_block(out, 3, [256, 256, 1024], stage=4, block='b') + out = identity_block(out, 3, [256, 256, 1024], stage=4, block='c') + out = identity_block(out, 3, [256, 256, 1024], stage=4, block='d') + out = identity_block(out, 3, [256, 256, 1024], stage=4, block='e') + out = identity_block(out, 3, [256, 256, 1024], stage=4, block='f') + + out = conv_block(out, 3, [512, 512, 2048], stage=5, block='a') + out = identity_block(out, 3, [512, 512, 2048], stage=5, block='b') + out = identity_block(out, 3, [512, 512, 2048], stage=5, block='c') + + out = AveragePooling2D((7, 7), dim_ordering=dim_ordering)(out) + out = Flatten()(out) + out = Dense(1000, activation='softmax', name='fc1000')(out) + + model = Model(inp, out) + + return model + + +if __name__ == '__main__': + K.set_image_dim_ordering('tf') + weights_file = K.image_dim_ordering() + '_dim_ordering_resnet50.h5' + resnet_model = get_resnet50() + resnet_model.load_weights(weights_file) + test_img1 = read_img('cat.jpg') + test_img2 = read_img('airplane.jpg') + # you may download synset_words from address given at the begining of this file + class_table = open('synset_words', 'r') + lines = class_table.readlines() + print "result for test 1 is" + print lines[np.argmax(resnet_model.predict(test_img1)[0])] + print "result for test 2 is" + print lines[np.argmax(resnet_model.predict(test_img2)[0])] + class_table.close()