Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement SpatialPyramidPoolingLayer with the Split, Pooling, Flatten & Concat layers #560

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env sh

TOOLS=../../build/tools
MODEL=../imagenet/caffe_reference_imagenet_model

GLOG_logtostderr=1 $TOOLS/finetune_net.bin voc2012_finetune_spatial_pyramid_pooling_solver.prototxt $MODEL

echo "Done."
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# 5717 images
train_net: "voc2012_finetune_spatial_pyramid_pooling_train.prototxt"
# 5823 images, batch_size 100
test_net: "voc2012_finetune_spatial_pyramid_pooling_test.prototxt"
test_iter: 59
test_interval: 500
base_lr: 0.0001
lr_policy: "step"
gamma: 0.1
stepsize: 20000
display: 20
max_iter: 100000
momentum: 0.9
weight_decay: 0.0005
snapshot: 500
snapshot_prefix: "voc2012_finetune_spatial_pyramid_pooling_train"
# solver mode: CPU or GPU
solver_mode: CPU
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
name: "ImagenetSpatialPyramidPoolingNet"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering that, the voc2012 classification has multiple labels, how to do leveldb?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HDF5DataLayer

layers {
name: "data"
type: DATA
top: "data"
top: "label"
data_param {
source: "voc2012_test.leveldb"
mean_file: "../../data/ilsvrc12/imagenet_mean.binaryproto"
batch_size: 100
crop_size: 227
mirror: true
}
}
layers {
name: "conv1"
type: CONVOLUTION
bottom: "data"
top: "conv1"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 96
kernel_size: 11
stride: 4
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu1"
type: RELU
bottom: "conv1"
top: "conv1"
}
layers {
name: "pool1"
type: POOLING
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm1"
type: LRN
bottom: "pool1"
top: "norm1"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv2"
type: CONVOLUTION
bottom: "norm1"
top: "conv2"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 2
kernel_size: 5
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu2"
type: RELU
bottom: "conv2"
top: "conv2"
}
layers {
name: "pool2"
type: POOLING
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
layers {
name: "norm2"
type: LRN
bottom: "pool2"
top: "norm2"
lrn_param {
local_size: 5
alpha: 0.0001
beta: 0.75
}
}
layers {
name: "conv3"
type: CONVOLUTION
bottom: "norm2"
top: "conv3"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "relu3"
type: RELU
bottom: "conv3"
top: "conv3"
}
layers {
name: "conv4"
type: CONVOLUTION
bottom: "conv3"
top: "conv4"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 384
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu4"
type: RELU
bottom: "conv4"
top: "conv4"
}
layers {
name: "conv5"
type: CONVOLUTION
bottom: "conv4"
top: "conv5"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
convolution_param {
num_output: 256
pad: 1
kernel_size: 3
group: 2
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu5"
type: RELU
bottom: "conv5"
top: "conv5"
}
layers {
name: "spatial_pyramid_pooling"
type: SPATIAL_PYRAMID_POOLING
bottom: "conv5"
top: "spatial_pyramid_pooling"
spatial_pyramid_pooling_param {
pool: MAX
spatial_bin: 1
spatial_bin: 2
spatial_bin: 3
spatial_bin: 6
scale: 1
}
}
layers {
name: "ip6"
type: INNER_PRODUCT
bottom: "spatial_pyramid_pooling"
top: "ip6"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu6"
type: RELU
bottom: "ip6"
top: "ip6"
}
layers {
name: "drop6"
type: DROPOUT
bottom: "ip6"
top: "ip6"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc7"
type: INNER_PRODUCT
bottom: "ip6"
top: "fc7"
blobs_lr: 1
blobs_lr: 2
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 4096
weight_filler {
type: "gaussian"
std: 0.005
}
bias_filler {
type: "constant"
value: 1
}
}
}
layers {
name: "relu7"
type: RELU
bottom: "fc7"
top: "fc7"
}
layers {
name: "drop7"
type: DROPOUT
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layers {
name: "fc8_voc2012"
type: INNER_PRODUCT
bottom: "fc7"
top: "fc8_voc2012"
blobs_lr: 10
blobs_lr: 20
weight_decay: 1
weight_decay: 0
inner_product_param {
num_output: 20
weight_filler {
type: "gaussian"
std: 0.01
}
bias_filler {
type: "constant"
value: 0
}
}
}
layers {
name: "accuracy"
type: ACCURACY
bottom: "fc8_voc2012"
bottom: "label"
top: "accuracy"
}
layers {
name: "prob"
type: SOFTMAX_LOSS
bottom: "fc8_voc2012"
bottom: "label"
top: "loss"
}

Loading