[caffe-contrib] 251/362: Imported Upstream version 1.0.0~rc3
Zhou Mo
cdluminate-guest at moszumanska.debian.org
Tue May 3 09:24:39 UTC 2016
This is an automated email from the git hooks/post-receive script.
cdluminate-guest pushed a commit to branch master
in repository caffe-contrib.
commit 6b43d5a3cc3a95abdf75f1bcb8b0dbf9634c937f
Author: Zhou Mo <cdluminate at gmail.com>
Date: Tue Mar 8 14:56:41 2016 +0000
Imported Upstream version 1.0.0~rc3
---
CMakeLists.txt | 5 +
Makefile | 36 +-
Makefile.config.example | 5 +
cmake/Dependencies.cmake | 4 +-
cmake/Summary.cmake | 2 +-
data/ilsvrc12/get_ilsvrc_aux.sh | 2 +-
docs/installation.md | 14 +-
examples/feature_extraction/readme.md | 2 +-
examples/finetune_flickr_style/readme.md | 4 +-
examples/mnist/readme.md | 4 +-
include/caffe/common.hpp | 4 +
include/caffe/layers/base_conv_layer.hpp | 22 +-
.../layers/{im2col_layer.hpp => bias_layer.hpp} | 45 +-
include/caffe/layers/conv_layer.hpp | 3 +
include/caffe/layers/elu_layer.hpp | 86 ++++
include/caffe/layers/im2col_layer.hpp | 2 +
include/caffe/layers/scale_layer.hpp | 83 ++++
include/caffe/net.hpp | 15 +
include/caffe/solver.hpp | 3 +
include/caffe/util/im2col.hpp | 20 +-
matlab/+caffe/private/caffe_.cpp | 8 +
matlab/+caffe/version.m | 7 +
python/caffe/__init__.py | 1 +
python/caffe/_caffe.cpp | 7 +
python/caffe/draw.py | 11 +-
python/caffe/io.py | 2 +-
python/caffe/pycaffe.py | 18 +
src/caffe/CMakeLists.txt | 4 +
src/caffe/layer_factory.cpp | 30 +-
src/caffe/layers/base_conv_layer.cpp | 20 +-
src/caffe/layers/bias_layer.cpp | 121 +++++
src/caffe/layers/bias_layer.cu | 59 +++
src/caffe/layers/conv_layer.cpp | 4 +-
src/caffe/layers/deconv_layer.cpp | 4 +-
src/caffe/layers/elu_layer.cpp | 47 ++
src/caffe/layers/elu_layer.cu | 62 +++
src/caffe/layers/flatten_layer.cpp | 2 +
src/caffe/layers/im2col_layer.cpp | 25 +-
src/caffe/layers/im2col_layer.cu | 6 +-
src/caffe/layers/reshape_layer.cpp | 2 +
src/caffe/layers/scale_layer.cpp | 219 +++++++++
src/caffe/layers/scale_layer.cu | 135 ++++++
src/caffe/proto/caffe.proto | 86 +++-
src/caffe/solver.cpp | 37 +-
src/caffe/solvers/adadelta_solver.cpp | 66 +--
src/caffe/solvers/adadelta_solver.cu | 30 ++
src/caffe/solvers/adagrad_solver.cpp | 37 +-
src/caffe/solvers/adagrad_solver.cu | 26 ++
src/caffe/solvers/adam_solver.cpp | 39 +-
src/caffe/solvers/adam_solver.cu | 29 ++
src/caffe/solvers/nesterov_solver.cpp | 29 +-
src/caffe/solvers/nesterov_solver.cu | 27 ++
src/caffe/solvers/rmsprop_solver.cpp | 35 +-
src/caffe/solvers/rmsprop_solver.cu | 28 ++
src/caffe/solvers/sgd_solver.cpp | 16 +-
src/caffe/solvers/sgd_solver.cu | 24 +
src/caffe/test/test_bias_layer.cpp | 467 +++++++++++++++++++
src/caffe/test/test_convolution_layer.cpp | 129 +++++-
src/caffe/test/test_data_transformer.cpp | 136 +++---
src/caffe/test/test_embed_layer.cpp | 4 -
src/caffe/test/test_im2col_kernel.cu | 26 +-
src/caffe/test/test_im2col_layer.cpp | 51 ++-
src/caffe/test/test_neuron_layer.cpp | 59 +++
src/caffe/test/test_scale_layer.cpp | 507 +++++++++++++++++++++
src/caffe/util/im2col.cpp | 124 +++--
src/caffe/util/im2col.cu | 246 ++++++----
tools/caffe.cpp | 5 +-
67 files changed, 2933 insertions(+), 485 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c446c60..32cc42a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,6 +9,11 @@ endif()
# ---[ Caffe project
project(Caffe C CXX)
+# ---[ Caffe version
+set(CAFFE_TARGET_VERSION "1.0.0-rc3")
+set(CAFFE_TARGET_SOVERSION "1.0.0-rc3")
+add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
+
# ---[ Using cmake scripts and modules
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
diff --git a/Makefile b/Makefile
index 985fffd..598d28d 100644
--- a/Makefile
+++ b/Makefile
@@ -29,9 +29,17 @@ SRC_DIRS := $(shell find * -type d -exec bash -c "find {} -maxdepth 1 \
\( -name '*.cpp' -o -name '*.proto' \) | grep -q ." \; -print)
# The target shared library name
+LIBRARY_NAME := $(PROJECT)
LIB_BUILD_DIR := $(BUILD_DIR)/lib
-STATIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).a
-DYNAMIC_NAME := $(LIB_BUILD_DIR)/lib$(PROJECT).so
+STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a
+DYNAMIC_VERSION_MAJOR := 1
+DYNAMIC_VERSION_MINOR := 0
+DYNAMIC_VERSION_REVISION := 0-rc3
+DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so
+#DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR)
+DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)
+DYNAMIC_NAME := $(LIB_BUILD_DIR)/$(DYNAMIC_VERSIONED_NAME_SHORT)
+COMMON_FLAGS += -DCAFFE_VERSION=$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)
##############################
# Get all source files
@@ -191,7 +199,7 @@ ifeq ($(USE_OPENCV), 1)
endif
endif
-PYTHON_LIBRARIES := boost_python python2.7
+PYTHON_LIBRARIES ?= boost_python python2.7
WARNINGS := -Wall -Wno-sign-compare
##############################
@@ -253,6 +261,7 @@ ifeq ($(LINUX), 1)
# boost::thread is reasonably called boost_thread (compare OS X)
# We will also explicitly add stdc++ to the link target.
LIBRARIES += boost_thread stdc++
+ VERSIONFLAGS += -Wl,-soname,$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../lib
endif
# OS X:
@@ -276,6 +285,7 @@ ifeq ($(OSX), 1)
# we need to explicitly ask for the rpath to be obeyed
DYNAMIC_FLAGS := -install_name @rpath/libcaffe.so
ORIGIN := @loader_path
+ VERSIONFLAGS += -Wl,-install_name,$(DYNAMIC_VERSIONED_NAME_SHORT) -Wl,-rpath,$(ORIGIN)/../../build/lib
else
ORIGIN := \$$ORIGIN
endif
@@ -478,7 +488,7 @@ py: $(PY$(PROJECT)_SO) $(PROTO_GEN_PY)
$(PY$(PROJECT)_SO): $(PY$(PROJECT)_SRC) $(PY$(PROJECT)_HXX) | $(DYNAMIC_NAME)
@ echo CXX/LD -o $@ $<
$(Q)$(CXX) -shared -o $@ $(PY$(PROJECT)_SRC) \
- -o $@ $(LINKFLAGS) -l$(PROJECT) $(PYTHON_LDFLAGS) \
+ -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(PYTHON_LDFLAGS) \
-Wl,-rpath,$(ORIGIN)/../../build/lib
mat$(PROJECT): mat
@@ -542,7 +552,8 @@ $(ALL_BUILD_DIRS): | $(BUILD_DIR_LINK)
$(DYNAMIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
@ echo LD -o $@
- $(Q)$(CXX) -shared -o $@ $(OBJS) $(LINKFLAGS) $(LDFLAGS) $(DYNAMIC_FLAGS)
+ $(Q)$(CXX) -shared -o $@ $(OBJS) $(VERSIONFLAGS) $(LINKFLAGS) $(LDFLAGS) $(DYNAMIC_FLAGS)
+ @ cd $(BUILD_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT); ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)
$(STATIC_NAME): $(OBJS) | $(LIB_BUILD_DIR)
@ echo AR -o $@
@@ -573,33 +584,33 @@ $(TEST_ALL_BIN): $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
| $(DYNAMIC_NAME) $(TEST_BIN_DIR)
@ echo CXX/LD -o $@ $<
$(Q)$(CXX) $(TEST_MAIN_SRC) $(TEST_OBJS) $(GTEST_OBJ) \
- -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
+ -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib
$(TEST_CU_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CU_BUILD_DIR)/%.o \
$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
@ echo LD $<
$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
- -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
+ -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib
$(TEST_CXX_BINS): $(TEST_BIN_DIR)/%.testbin: $(TEST_CXX_BUILD_DIR)/%.o \
$(GTEST_OBJ) | $(DYNAMIC_NAME) $(TEST_BIN_DIR)
@ echo LD $<
$(Q)$(CXX) $(TEST_MAIN_SRC) $< $(GTEST_OBJ) \
- -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(PROJECT) -Wl,-rpath,$(ORIGIN)/../lib
+ -o $@ $(LINKFLAGS) $(LDFLAGS) -l$(LIBRARY_NAME) -Wl,-rpath,$(ORIGIN)/../lib
# Target for extension-less symlinks to tool binaries with extension '*.bin'.
$(TOOL_BUILD_DIR)/%: $(TOOL_BUILD_DIR)/%.bin | $(TOOL_BUILD_DIR)
@ $(RM) $@
- @ ln -s $(abspath $<) $@
+ @ ln -s $(notdir $<) $@
$(TOOL_BINS): %.bin : %.o | $(DYNAMIC_NAME)
@ echo CXX/LD -o $@
- $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
+ $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \
-Wl,-rpath,$(ORIGIN)/../lib
$(EXAMPLE_BINS): %.bin : %.o | $(DYNAMIC_NAME)
@ echo CXX/LD -o $@
- $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(PROJECT) $(LDFLAGS) \
+ $(Q)$(CXX) $< -o $@ $(LINKFLAGS) -l$(LIBRARY_NAME) $(LDFLAGS) \
-Wl,-rpath,$(ORIGIN)/../../lib
proto: $(PROTO_GEN_CC) $(PROTO_GEN_HEADER)
@@ -651,6 +662,8 @@ superclean: clean supercleanfiles
$(DIST_ALIASES): $(DISTRIBUTE_DIR)
$(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
+ # add proto
+ cp -r src/caffe/proto $(DISTRIBUTE_DIR)/
# add include
cp -r include $(DISTRIBUTE_DIR)/
mkdir -p $(DISTRIBUTE_DIR)/include/caffe/proto
@@ -661,6 +674,7 @@ $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
# add libraries
cp $(STATIC_NAME) $(DISTRIBUTE_DIR)/lib
install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib
+ cd $(DISTRIBUTE_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT); ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)
# add python - it's not the standard way, indeed...
cp -r python $(DISTRIBUTE_DIR)/python
diff --git a/Makefile.config.example b/Makefile.config.example
index 1dd6a8f..8fd49c9 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -70,6 +70,11 @@ PYTHON_INCLUDE := /usr/include/python2.7 \
# $(ANACONDA_HOME)/include/python2.7 \
# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
+# Uncomment to use Python 3 (default is Python 2)
+# PYTHON_LIBRARIES := boost_python3 python3.5m
+# PYTHON_INCLUDE := /usr/include/python3.5m \
+# /usr/lib/python3.5/dist-packages/numpy/core/include
+
# We need to be able to find libpythonX.X.so or .dylib.
PYTHON_LIB := /usr/lib
# PYTHON_LIB := $(ANACONDA_HOME)/lib
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 51a803c..c7b6a17 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -114,14 +114,14 @@ if(BUILD_python)
# Find the matching boost python implementation
set(version ${PYTHONLIBS_VERSION_STRING})
- STRING( REPLACE "." "" boost_py_version ${version} )
+ STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND)
STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} )
- STRING( REPLACE "." "" boost_py_version ${version} )
+ STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index 557a6f0..ba025cf 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -101,7 +101,7 @@ function(caffe_print_configuration_summary)
caffe_status("")
caffe_status("******************* Caffe Configuration Summary *******************")
caffe_status("General:")
- caffe_status(" Version : ${Caffe_VERSION}")
+ caffe_status(" Version : ${CAFFE_TARGET_VERSION}")
caffe_status(" Git : ${Caffe_GIT_VERSION}")
caffe_status(" System : ${CMAKE_SYSTEM_NAME}")
caffe_status(" C++ compiler : ${CMAKE_CXX_COMPILER}")
diff --git a/data/ilsvrc12/get_ilsvrc_aux.sh b/data/ilsvrc12/get_ilsvrc_aux.sh
index b9b85d2..90935f2 100755
--- a/data/ilsvrc12/get_ilsvrc_aux.sh
+++ b/data/ilsvrc12/get_ilsvrc_aux.sh
@@ -12,7 +12,7 @@ cd $DIR
echo "Downloading..."
-wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
+wget -c http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
echo "Unzipping..."
diff --git a/docs/installation.md b/docs/installation.md
index cce7ec3..ef781e8 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -87,15 +87,20 @@ There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https
## Compilation
-Now that you have the prerequisites, edit your `Makefile.config` to change the paths for your setup The defaults should work, but uncomment the relevant lines if using Anaconda Python.
+Caffe can be compiled with either Make or CMake. Make is officially supported while CMake is supported by the community.
+
+### Compilation with Make
+
+Configure the build by copying and modifying the example `Makefile.config` for your setup. The defaults should work, but uncomment the relevant lines if using Anaconda Python.
cp Makefile.config.example Makefile.config
- # Adjust Makefile.config (for example, if using Anaconda Python)
+ # Adjust Makefile.config (for example, if using Anaconda Python, or if cuDNN is desired)
make all
make test
make runtest
-- For cuDNN acceleration, you should uncomment the `USE_CUDNN := 1` switch in `Makefile.config`.
+- For CPU & GPU accelerated Caffe, no changes are needed.
+- For cuDNN acceleration using NVIDIA's proprietary cuDNN software, uncomment the `USE_CUDNN := 1` switch in `Makefile.config`. cuDNN is sometimes but not always faster than Caffe's GPU acceleration.
- For CPU-only Caffe, uncomment `CPU_ONLY := 1` in `Makefile.config`.
To compile the Python and MATLAB wrappers do `make pycaffe` and `make matcaffe` respectively.
@@ -107,7 +112,7 @@ Be sure to set your MATLAB and Python paths in `Makefile.config` first!
Now that you have installed Caffe, check out the [MNIST tutorial](gathered/examples/mnist.html) and the [reference ImageNet model tutorial](gathered/examples/imagenet.html).
-### CMake Compilation
+### Compilation with CMake
In lieu of manually editing `Makefile.config` to configure the build, Caffe offers an unofficial CMake build thanks to @Nerei, @akosiorek, and other members of the community. It requires CMake version >= 2.8.7.
The basic steps are as follows:
@@ -116,6 +121,7 @@ The basic steps are as follows:
cd build
cmake ..
make all
+ make install
make runtest
See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details.
diff --git a/examples/feature_extraction/readme.md b/examples/feature_extraction/readme.md
index 2bc3dac..5612b02 100644
--- a/examples/feature_extraction/readme.md
+++ b/examples/feature_extraction/readme.md
@@ -51,7 +51,7 @@ Extract Features
Now everything necessary is in place.
- ./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 lmdb
+ ./build/tools/extract_features.bin models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel examples/_temp/imagenet_val.prototxt fc7 examples/_temp/features 10 leveldb
The name of feature blob that you extract is `fc7`, which represents the highest level feature of the reference model.
We can use any other layer, as well, such as `conv5` or `pool3`.
diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md
index ecb9d3d..4e9d41f 100644
--- a/examples/finetune_flickr_style/readme.md
+++ b/examples/finetune_flickr_style/readme.md
@@ -22,10 +22,10 @@ Because we are predicting 20 classes instead of a 1,000, we do need to change th
Therefore, we change the name of the last layer from `fc8` to `fc8_flickr` in our prototxt.
Since there is no layer named that in the `bvlc_reference_caffenet`, that layer will begin training with random weights.
-We will also decrease the overall learning rate `base_lr` in the solver prototxt, but boost the `blobs_lr` on the newly introduced layer.
+We will also decrease the overall learning rate `base_lr` in the solver prototxt, but boost the `lr_mult` on the newly introduced layer.
The idea is to have the rest of the model change very slowly with new data, but let the new layer learn fast.
Additionally, we set `stepsize` in the solver to a lower value than if we were training from scratch, since we're virtually far along in training and therefore want the learning rate to go down faster.
-Note that we could also entirely prevent fine-tuning of all layers other than `fc8_flickr` by setting their `blobs_lr` to 0.
+Note that we could also entirely prevent fine-tuning of all layers other than `fc8_flickr` by setting their `lr_mult` to 0.
## Procedure
diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md
index 413d4a1..b87a0f5 100644
--- a/examples/mnist/readme.md
+++ b/examples/mnist/readme.md
@@ -41,11 +41,13 @@ Currently, we will read the MNIST data from the lmdb we created earlier in the d
layer {
name: "mnist"
type: "Data"
+ transform_param {
+ scale: 0.00390625
+ }
data_param {
source: "mnist_train_lmdb"
backend: LMDB
batch_size: 64
- scale: 0.00390625
}
top: "data"
top: "label"
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 1df6b9a..6b902a4 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -18,6 +18,10 @@
#include "caffe/util/device_alternate.hpp"
+// Convert macro to string
+#define STRINGIFY(m) #m
+#define AS_STRING(m) STRINGIFY(m)
+
// gflags 2.1 issue: namespace google was changed to gflags without warning.
// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
diff --git a/include/caffe/layers/base_conv_layer.hpp b/include/caffe/layers/base_conv_layer.hpp
index f3def16..0160a83 100644
--- a/include/caffe/layers/base_conv_layer.hpp
+++ b/include/caffe/layers/base_conv_layer.hpp
@@ -68,6 +68,8 @@ class BaseConvolutionLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
+ /// @brief The spatial dimensions of the dilation.
+ Blob<int> dilation_;
/// @brief The spatial dimensions of the convolution input.
Blob<int> conv_input_shape_;
/// @brief The spatial dimensions of the col_buffer.
@@ -99,11 +101,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
- stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
+ stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_cpu(data, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
- pad_.cpu_data(), stride_.cpu_data(), col_buff);
+ pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), col_buff);
}
}
inline void conv_col2im_cpu(const Dtype* col_buff, Dtype* data) {
@@ -112,11 +115,12 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
- stride_.cpu_data()[0], stride_.cpu_data()[1], data);
+ stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_cpu(col_buff, num_spatial_axes_, conv_input_shape_.cpu_data(),
col_buffer_shape_.data(), kernel_shape_.cpu_data(),
- pad_.cpu_data(), stride_.cpu_data(), data);
+ pad_.cpu_data(), stride_.cpu_data(), dilation_.cpu_data(), data);
}
}
#ifndef CPU_ONLY
@@ -126,12 +130,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
- stride_.cpu_data()[0], stride_.cpu_data()[1], col_buff);
+ stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1], col_buff);
} else {
im2col_nd_gpu(data, num_spatial_axes_, num_kernels_im2col_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(),
- stride_.gpu_data(), col_buff);
+ stride_.gpu_data(), dilation_.gpu_data(), col_buff);
}
}
inline void conv_col2im_gpu(const Dtype* col_buff, Dtype* data) {
@@ -140,12 +145,13 @@ class BaseConvolutionLayer : public Layer<Dtype> {
conv_input_shape_.cpu_data()[1], conv_input_shape_.cpu_data()[2],
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
- stride_.cpu_data()[0], stride_.cpu_data()[1], data);
+ stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1], data);
} else {
col2im_nd_gpu(col_buff, num_spatial_axes_, num_kernels_col2im_,
conv_input_shape_.gpu_data(), col_buffer_.gpu_shape(),
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
- data);
+ dilation_.gpu_data(), data);
}
}
#endif
diff --git a/include/caffe/layers/im2col_layer.hpp b/include/caffe/layers/bias_layer.hpp
similarity index 50%
copy from include/caffe/layers/im2col_layer.hpp
copy to include/caffe/layers/bias_layer.hpp
index 1d3b2eb..eedc3aa 100644
--- a/include/caffe/layers/im2col_layer.hpp
+++ b/include/caffe/layers/bias_layer.hpp
@@ -1,5 +1,5 @@
-#ifndef CAFFE_IM2COL_LAYER_HPP_
-#define CAFFE_IM2COL_LAYER_HPP_
+#ifndef CAFFE_BIAS_LAYER_HPP_
+#define CAFFE_BIAS_LAYER_HPP_
#include <vector>
@@ -10,27 +10,29 @@
namespace caffe {
/**
- * @brief A helper for image operations that rearranges image regions into
- * column vectors. Used by ConvolutionLayer to perform convolution
- * by matrix multiplication.
+ * @brief Computes a sum of two input Blobs, with the shape of the
+ * latter Blob "broadcast" to match the shape of the former.
+ * Equivalent to tiling the latter Blob, then computing the elementwise
+ * sum.
*
- * TODO(dox): thorough documentation for Forward, Backward, and proto params.
+ * The second input may be omitted, in which case it's learned as a parameter
+ * of the layer.
*/
template <typename Dtype>
-class Im2colLayer : public Layer<Dtype> {
+class BiasLayer : public Layer<Dtype> {
public:
- explicit Im2colLayer(const LayerParameter& param)
+ explicit BiasLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
- virtual inline const char* type() const { return "Im2col"; }
- virtual inline int ExactNumBottomBlobs() const { return 1; }
+ virtual inline const char* type() const { return "Bias"; }
+ virtual inline int MinBottomBlobs() const { return 1; }
+ virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }
- protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
@@ -40,24 +42,13 @@ class Im2colLayer : public Layer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
- /// @brief The spatial dimensions of a filter kernel.
- Blob<int> kernel_shape_;
- /// @brief The spatial dimensions of the stride.
- Blob<int> stride_;
- /// @brief The spatial dimensions of the padding.
- Blob<int> pad_;
-
- int num_spatial_axes_;
- int bottom_dim_;
- int top_dim_;
+ private:
+ Blob<Dtype> bias_multiplier_;
+ int outer_dim_, bias_dim_, inner_dim_, dim_;
+};
- int channel_axis_;
- int num_;
- int channels_;
- bool force_nd_im2col_;
-};
} // namespace caffe
-#endif // CAFFE_IM2COL_LAYER_HPP_
+#endif // CAFFE_BIAS_LAYER_HPP_
diff --git a/include/caffe/layers/conv_layer.hpp b/include/caffe/layers/conv_layer.hpp
index 1557476..93a618d 100644
--- a/include/caffe/layers/conv_layer.hpp
+++ b/include/caffe/layers/conv_layer.hpp
@@ -44,6 +44,9 @@ class ConvolutionLayer : public BaseConvolutionLayer<Dtype> {
* convolution, given by pad for equal dimensions or pad_h and pad_w for
* different padding. Input padding is computed implicitly instead of
* actually padding.
+ * - dilation (\b optional, default 1). The filter
+ * dilation, given by dilation_size for equal dimensions for different
+ * dilation. By default the convolution has dilation 1.
* - group (\b optional, default 1). The number of filter groups. Group
* convolution is a method for reducing parameterization by selectively
* connecting input and output channels. The input and output channel dimensions must be divisible
diff --git a/include/caffe/layers/elu_layer.hpp b/include/caffe/layers/elu_layer.hpp
new file mode 100644
index 0000000..0796e89
--- /dev/null
+++ b/include/caffe/layers/elu_layer.hpp
@@ -0,0 +1,86 @@
+#ifndef CAFFE_ELU_LAYER_HPP_
+#define CAFFE_ELU_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/neuron_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Exponential Linear Unit non-linearity @f$
+ * y = \left\{
+ * \begin{array}{lr}
+ * x & \mathrm{if} \; x > 0 \\
+ * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0
+ * \end{array} \right.
+ * @f$.
+ */
+template <typename Dtype>
+class ELULayer : public NeuronLayer<Dtype> {
+ public:
+ /**
+ * @param param provides ELUParameter elu_param,
+ * with ELULayer options:
+ * - alpha (\b optional, default 1).
+ * the value @f$ \alpha @f$ by which controls saturation for negative inputs.
+ */
+ explicit ELULayer(const LayerParameter& param)
+ : NeuronLayer<Dtype>(param) {}
+
+ virtual inline const char* type() const { return "ELU"; }
+
+ protected:
+ /**
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the computed outputs @f$
+ * y = \left\{
+ * \begin{array}{lr}
+ * x & \mathrm{if} \; x > 0 \\
+ * \alpha (\exp(x)-1) & \mathrm{if} \; x \le 0
+ * \end{array} \right.
+ * @f$.
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ /**
+ * @brief Computes the error gradient w.r.t. the ELU inputs.
+ *
+ * @param top output Blob vector (length 1), providing the error gradient with
+ * respect to the outputs
+ * -# @f$ (N \times C \times H \times W) @f$
+ * containing error gradients @f$ \frac{\partial E}{\partial y} @f$
+ * with respect to computed outputs @f$ y @f$
+ * @param propagate_down see Layer::Backward.
+ * @param bottom input Blob vector (length 1)
+ * -# @f$ (N \times C \times H \times W) @f$
+ * the inputs @f$ x @f$; Backward fills their diff with
+ * gradients @f$
+ * \frac{\partial E}{\partial x} = \left\{
+ * \begin{array}{lr}
+ * 1 & \mathrm{if} \; x > 0 \\
+ * y + \alpha & \mathrm{if} \; x \le 0
+ * \end{array} \right.
+ * @f$ if propagate_down[0].
+ */
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+};
+
+
+} // namespace caffe
+
+#endif // CAFFE_ELU_LAYER_HPP_
diff --git a/include/caffe/layers/im2col_layer.hpp b/include/caffe/layers/im2col_layer.hpp
index 1d3b2eb..71e32f7 100644
--- a/include/caffe/layers/im2col_layer.hpp
+++ b/include/caffe/layers/im2col_layer.hpp
@@ -46,6 +46,8 @@ class Im2colLayer : public Layer<Dtype> {
Blob<int> stride_;
/// @brief The spatial dimensions of the padding.
Blob<int> pad_;
+ /// @brief The spatial dimensions of the dilation.
+ Blob<int> dilation_;
int num_spatial_axes_;
int bottom_dim_;
diff --git a/include/caffe/layers/scale_layer.hpp b/include/caffe/layers/scale_layer.hpp
new file mode 100644
index 0000000..924df2e
--- /dev/null
+++ b/include/caffe/layers/scale_layer.hpp
@@ -0,0 +1,83 @@
+#ifndef CAFFE_SCALE_LAYER_HPP_
+#define CAFFE_SCALE_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+#include "caffe/layers/bias_layer.hpp"
+
+namespace caffe {
+
+/**
+ * @brief Computes a product of two input Blobs, with the shape of the
+ * latter Blob "broadcast" to match the shape of the former.
+ * Equivalent to tiling the latter Blob, then computing the elementwise
+ * product.
+ *
+ * The second input may be omitted, in which case it's learned as a parameter
+ * of the layer.
+ */
+template <typename Dtype>
+class ScaleLayer: public Layer<Dtype> {
+ public:
+ explicit ScaleLayer(const LayerParameter& param)
+ : Layer<Dtype>(param) {}
+ virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+
+ virtual inline const char* type() const { return "Scale"; }
+ // Scale
+ virtual inline int MinBottomBlobs() const { return 1; }
+ virtual inline int MaxBottomBlobs() const { return 2; }
+ virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+ /**
+ * In the below shape specifications, @f$ i @f$ denotes the value of the
+ * `axis` field given by `this->layer_param_.scale_param().axis()`, after
+ * canonicalization (i.e., conversion from negative to positive index,
+ * if applicable).
+ *
+ * @param bottom input Blob vector (length 2)
+ * -# @f$ (d_0 \times ... \times
+ * d_i \times ... \times d_j \times ... \times d_n) @f$
+ * the first factor @f$ x @f$
+ * -# @f$ (d_i \times ... \times d_j) @f$
+ * the second factor @f$ y @f$
+ * @param top output Blob vector (length 1)
+ * -# @f$ (d_0 \times ... \times
+ * d_i \times ... \times d_j \times ... \times d_n) @f$
+ * the product @f$ z = x y @f$ computed after "broadcasting" y.
+ * Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
+ * then computing the elementwise product.
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
+ virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ shared_ptr<Layer<Dtype> > bias_layer_;
+ vector<Blob<Dtype>*> bias_bottom_vec_;
+ vector<bool> bias_propagate_down_;
+ int bias_param_id_;
+
+ Blob<Dtype> sum_multiplier_;
+ Blob<Dtype> sum_result_;
+ Blob<Dtype> temp_;
+ int axis_;
+ int outer_dim_, scale_dim_, inner_dim_;
+};
+
+
+} // namespace caffe
+
+#endif // CAFFE_SCALE_LAYER_HPP_
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 1bf07d2..543133e 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -149,6 +149,18 @@ class Net {
inline const vector<vector<Blob<Dtype>*> >& top_vecs() const {
return top_vecs_;
}
+ /// @brief returns the ids of the top blobs of layer i
+ inline const vector<int> & top_ids(int i) const {
+ CHECK_GE(i, 0) << "Invalid layer id";
+ CHECK_LT(i, top_id_vecs_.size()) << "Invalid layer id";
+ return top_id_vecs_[i];
+ }
+ /// @brief returns the ids of the bottom blobs of layer i
+ inline const vector<int> & bottom_ids(int i) const {
+ CHECK_GE(i, 0) << "Invalid layer id";
+ CHECK_LT(i, bottom_id_vecs_.size()) << "Invalid layer id";
+ return bottom_id_vecs_[i];
+ }
inline const vector<vector<bool> >& bottom_need_backward() const {
return bottom_need_backward_;
}
@@ -179,6 +191,9 @@ class Net {
return param_names_index_;
}
inline const vector<int>& param_owners() const { return param_owners_; }
+ inline const vector<string>& param_display_names() const {
+ return param_display_names_;
+ }
/// @brief Input and output blob numbers
inline int num_inputs() const { return net_input_blobs_.size(); }
inline int num_outputs() const { return net_output_blobs_.size(); }
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 26b8e8e..38259ed 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -107,6 +107,7 @@ class Solver {
virtual void RestoreSolverStateFromHDF5(const string& state_file) = 0;
virtual void RestoreSolverStateFromBinaryProto(const string& state_file) = 0;
void DisplayOutputBlobs(const int net_id);
+ void UpdateSmoothedLoss(Dtype loss, int start_iter, int average_loss);
SolverParameter param_;
int iter_;
@@ -114,6 +115,8 @@ class Solver {
shared_ptr<Net<Dtype> > net_;
vector<shared_ptr<Net<Dtype> > > test_nets_;
vector<Callback*> callbacks_;
+ vector<Dtype> losses_;
+ Dtype smoothed_loss_;
// The root solver that holds root nets (actually containing shared layers)
// in data parallelism
diff --git a/include/caffe/util/im2col.hpp b/include/caffe/util/im2col.hpp
index d3eb6cc..a35bc6e 100644
--- a/include/caffe/util/im2col.hpp
+++ b/include/caffe/util/im2col.hpp
@@ -7,49 +7,53 @@ template <typename Dtype>
void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col);
+ const int* dilation, Dtype* data_col);
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, Dtype* data_col);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ Dtype* data_col);
template <typename Dtype>
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_im);
+ const int* dilation, Dtype* data_im);
template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, Dtype* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ Dtype* data_im);
template <typename Dtype>
void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
const int col_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col);
+ const int* dilation, Dtype* data_col);
template <typename Dtype>
void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, Dtype* data_col);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ Dtype* data_col);
template <typename Dtype>
void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_im);
+ const int* dilation, Dtype* data_im);
template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, Dtype* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ Dtype* data_im);
} // namespace caffe
diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp
index 1641e14..1b1b2bf 100644
--- a/matlab/+caffe/private/caffe_.cpp
+++ b/matlab/+caffe/private/caffe_.cpp
@@ -504,6 +504,13 @@ static void write_mean(MEX_ARGS) {
mxFree(mean_proto_file);
}
+// Usage: caffe_('version')
+static void version(MEX_ARGS) {
+ mxCHECK(nrhs == 0, "Usage: caffe_('version')");
+ // Return version string
+ plhs[0] = mxCreateString(AS_STRING(CAFFE_VERSION));
+}
+
/** -----------------------------------------------------------------
** Available commands.
**/
@@ -542,6 +549,7 @@ static handler_registry handlers[] = {
{ "reset", reset },
{ "read_mean", read_mean },
{ "write_mean", write_mean },
+ { "version", version },
// The end.
{ "END", NULL },
};
diff --git a/matlab/+caffe/version.m b/matlab/+caffe/version.m
new file mode 100644
index 0000000..61cae4f
--- /dev/null
+++ b/matlab/+caffe/version.m
@@ -0,0 +1,7 @@
+function version_str = version()
+% version()
+% show Caffe's version.
+
+version_str = caffe_('version');
+
+end
diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index ccda1bc..e2881b8 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,6 @@
from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list
+from ._caffe import __version__
from .proto.caffe_pb2 import TRAIN, TEST
from .classifier import Classifier
from .detector import Detector
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 69d5533..12a5745 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -212,6 +212,9 @@ BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
BOOST_PYTHON_MODULE(_caffe) {
// below, we prepend an underscore to methods that will be replaced
// in Python
+
+ bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION);
+
// Caffe utility functions
bp::def("set_mode_cpu", &set_mode_cpu);
bp::def("set_mode_gpu", &set_mode_gpu);
@@ -232,6 +235,10 @@ BOOST_PYTHON_MODULE(_caffe) {
.def("share_with", &Net<Dtype>::ShareTrainedLayersWith)
.add_property("_blob_loss_weights", bp::make_function(
&Net<Dtype>::blob_loss_weights, bp::return_internal_reference<>()))
+ .def("_bottom_ids", bp::make_function(&Net<Dtype>::bottom_ids,
+ bp::return_value_policy<bp::copy_const_reference>()))
+ .def("_top_ids", bp::make_function(&Net<Dtype>::top_ids,
+ bp::return_value_policy<bp::copy_const_reference>()))
.add_property("_blobs", bp::make_function(&Net<Dtype>::blobs,
bp::return_internal_reference<>()))
.add_property("layers", bp::make_function(&Net<Dtype>::layers,
diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index f8bf572..cfa3fc5 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -10,7 +10,16 @@ Caffe network visualization: draw the NetParameter protobuffer.
"""
from caffe.proto import caffe_pb2
-import pydot
+
+"""
+pydot is not supported under python 3 and pydot2 doesn't work properly.
+pydotplus works nicely (pip install pydotplus)
+"""
+try:
+ # Try to load pydotplus
+ import pydotplus as pydot
+except ImportError:
+ import pydot
# Internal layer and blob styles.
LAYER_STYLE_DEFAULT = {'shape': 'record',
diff --git a/python/caffe/io.py b/python/caffe/io.py
index 14942be..7531058 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -292,7 +292,7 @@ def load_image(filename, color=True):
of size (H x W x 3) in RGB or
of size (H x W x 1) in grayscale.
"""
- img = skimage.img_as_float(skimage.io.imread(filename)).astype(np.float32)
+ img = skimage.img_as_float(skimage.io.imread(filename, as_grey=not color)).astype(np.float32)
if img.ndim == 2:
img = img[:, :, np.newaxis]
if color:
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 31dc702..3054110 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -276,6 +276,22 @@ def _Net_batch(self, blobs):
padding])
yield padded_batch
+
+class _Net_IdNameWrapper:
+ """
+ A simple wrapper that allows the ids propery to be accessed as a dict
+ indexed by names. Used for top and bottom names
+ """
+ def __init__(self, net, func):
+ self.net, self.func = net, func
+
+ def __getitem__(self, name):
+ # Map the layer name to id
+ ids = self.func(self.net, list(self.net._layer_names).index(name))
+ # Map the blob id to name
+ id_to_name = list(self.net.blobs)
+ return [id_to_name[i] for i in ids]
+
# Attach methods to Net.
Net.blobs = _Net_blobs
Net.blob_loss_weights = _Net_blob_loss_weights
@@ -288,3 +304,5 @@ Net.set_input_arrays = _Net_set_input_arrays
Net._batch = _Net_batch
Net.inputs = _Net_inputs
Net.outputs = _Net_outputs
+Net.top_names = property(lambda n: _Net_IdNameWrapper(n, Net._top_ids))
+Net.bottom_names = property(lambda n: _Net_IdNameWrapper(n, Net._bottom_ids))
diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
index 40e6c11..8a80c94 100644
--- a/src/caffe/CMakeLists.txt
+++ b/src/caffe/CMakeLists.txt
@@ -20,6 +20,10 @@ endif()
add_library(caffe ${srcs})
target_link_libraries(caffe proto ${Caffe_LINKER_LIBS})
caffe_default_properties(caffe)
+set_target_properties(caffe PROPERTIES
+ VERSION ${CAFFE_TARGET_VERSION}
+ SOVERSION ${CAFFE_TARGET_SOVERSION}
+ )
# ---[ Tests
add_subdirectory(test)
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index 76d851a..e967bd6 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -37,17 +37,32 @@ namespace caffe {
template <typename Dtype>
shared_ptr<Layer<Dtype> > GetConvolutionLayer(
const LayerParameter& param) {
- ConvolutionParameter_Engine engine = param.convolution_param().engine();
+ ConvolutionParameter conv_param = param.convolution_param();
+ ConvolutionParameter_Engine engine = conv_param.engine();
+#ifdef USE_CUDNN
+ bool use_dilation = false;
+ for (int i = 0; i < conv_param.dilation_size(); ++i) {
+ if (conv_param.dilation(i) > 1) {
+ use_dilation = true;
+ }
+ }
+#endif
if (engine == ConvolutionParameter_Engine_DEFAULT) {
engine = ConvolutionParameter_Engine_CAFFE;
#ifdef USE_CUDNN
- engine = ConvolutionParameter_Engine_CUDNN;
+ if (!use_dilation) {
+ engine = ConvolutionParameter_Engine_CUDNN;
+ }
#endif
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
return shared_ptr<Layer<Dtype> >(new ConvolutionLayer<Dtype>(param));
#ifdef USE_CUDNN
} else if (engine == ConvolutionParameter_Engine_CUDNN) {
+ if (use_dilation) {
+ LOG(FATAL) << "CuDNN doesn't support the dilated convolution at Layer "
+ << param.name();
+ }
return shared_ptr<Layer<Dtype> >(new CuDNNConvolutionLayer<Dtype>(param));
#endif
} else {
@@ -76,7 +91,16 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
<< "Using Caffe's own pooling layer.";
return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
}
- return shared_ptr<Layer<Dtype> >(new CuDNNPoolingLayer<Dtype>(param));
+ // CuDNN assumes layers are not being modified in place, thus
+ // breaking our index tracking for updates in some cases in Caffe.
+ // Until there is a workaround in Caffe (index management) or
+ // cuDNN, use Caffe layer to max pooling, or don't use in place
+ // layers after max pooling layers
+ if (param.pooling_param().pool() == PoolingParameter_PoolMethod_MAX) {
+ return shared_ptr<Layer<Dtype> >(new PoolingLayer<Dtype>(param));
+ } else {
+ return shared_ptr<Layer<Dtype> >(new CuDNNPoolingLayer<Dtype>(param));
+ }
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index f6f14cd..4a4c68e 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -36,7 +36,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
CHECK(num_kernel_dims == 1 || num_kernel_dims == num_spatial_axes_)
<< "kernel_size must be specified once, or once per spatial dimension "
<< "(kernel_size specified " << num_kernel_dims << " times; "
- << num_spatial_axes_ << " spatial dims);";
+ << num_spatial_axes_ << " spatial dims).";
for (int i = 0; i < num_spatial_axes_; ++i) {
kernel_shape_data[i] =
conv_param.kernel_size((num_kernel_dims == 1) ? 0 : i);
@@ -61,7 +61,7 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
num_stride_dims == num_spatial_axes_)
<< "stride must be specified once, or once per spatial dimension "
<< "(stride specified " << num_stride_dims << " times; "
- << num_spatial_axes_ << " spatial dims);";
+ << num_spatial_axes_ << " spatial dims).";
const int kDefaultStride = 1;
for (int i = 0; i < num_spatial_axes_; ++i) {
stride_data[i] = (num_stride_dims == 0) ? kDefaultStride :
@@ -85,13 +85,27 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
num_pad_dims == num_spatial_axes_)
<< "pad must be specified once, or once per spatial dimension "
<< "(pad specified " << num_pad_dims << " times; "
- << num_spatial_axes_ << " spatial dims);";
+ << num_spatial_axes_ << " spatial dims).";
const int kDefaultPad = 0;
for (int i = 0; i < num_spatial_axes_; ++i) {
pad_data[i] = (num_pad_dims == 0) ? kDefaultPad :
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
+ // Setup dilation dimensions (dilation_).
+ dilation_.Reshape(spatial_dim_blob_shape);
+ int* dilation_data = dilation_.mutable_cpu_data();
+ const int num_dilation_dims = conv_param.dilation_size();
+ CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
+ num_dilation_dims == num_spatial_axes_)
+ << "dilation must be specified once, or once per spatial dimension "
+ << "(dilation specified " << num_dilation_dims << " times; "
+ << num_spatial_axes_ << " spatial dims).";
+ const int kDefaultDilation = 1;
+ for (int i = 0; i < num_spatial_axes_; ++i) {
+ dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
+ conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
+ }
// Special case: im2col is the identity for 1x1 convolution with stride 1
// and no padding, so flag for skipping the buffer and transformation.
is_1x1_ = true;
diff --git a/src/caffe/layers/bias_layer.cpp b/src/caffe/layers/bias_layer.cpp
new file mode 100644
index 0000000..4726a72
--- /dev/null
+++ b/src/caffe/layers/bias_layer.cpp
@@ -0,0 +1,121 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layers/bias_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ if (bottom.size() == 1 && this->blobs_.size() > 0) {
+ LOG(INFO) << "Skipping parameter initialization";
+ } else if (bottom.size() == 1) {
+ // bias is a learned parameter; initialize it
+ const BiasParameter& param = this->layer_param_.bias_param();
+ const int axis = bottom[0]->CanonicalAxisIndex(param.axis());
+ const int num_axes = param.num_axes();
+ CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
+ << "or -1 to extend to the end of bottom[0]";
+ if (num_axes >= 0) {
+ CHECK_GE(bottom[0]->num_axes(), axis + num_axes)
+ << "bias blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis;
+ }
+ this->blobs_.resize(1);
+ const vector<int>::const_iterator& shape_start =
+ bottom[0]->shape().begin() + axis;
+ const vector<int>::const_iterator& shape_end =
+ (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
+ vector<int> bias_shape(shape_start, shape_end);
+ this->blobs_[0].reset(new Blob<Dtype>(bias_shape));
+ shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(param.filler()));
+ filler->Fill(this->blobs_[0].get());
+ }
+ this->param_propagate_down_.resize(this->blobs_.size(), true);
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const BiasParameter& param = this->layer_param_.bias_param();
+ Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
+ // Always set axis == 0 in special case where bias is a scalar
+ // (num_axes == 0). Mathematically equivalent for any choice of axis, so the
+ // actual setting can be safely ignored; and computation is most efficient
+ // with axis == 0 and (therefore) outer_dim_ == 1.
+ const int axis = (bias->num_axes() == 0) ?
+ 0 : bottom[0]->CanonicalAxisIndex(param.axis());
+ CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes())
+ << "bias blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis;
+ for (int i = 0; i < bias->num_axes(); ++i) {
+ CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i))
+ << "dimension mismatch between bottom[0]->shape(" << axis + i
+ << ") and bias->shape(" << i << ")";
+ }
+ outer_dim_ = bottom[0]->count(0, axis);
+ bias_dim_ = bias->count();
+ inner_dim_ = bottom[0]->count(axis + bias->num_axes());
+ dim_ = bias_dim_ * inner_dim_;
+ if (bottom[0] != top[0]) {
+ top[0]->ReshapeLike(*bottom[0]);
+ }
+ bias_multiplier_.Reshape(vector<int>(1, inner_dim_));
+ if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) {
+ caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data());
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bias_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ if (bottom[0] != top[0]) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ caffe_copy(bottom[0]->count(), bottom_data, top_data);
+ }
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_,
+ inner_dim_, 1, Dtype(1), bias_data,
+ bias_multiplier_.cpu_data(), Dtype(1), top_data);
+ top_data += dim_;
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0] && bottom[0] != top[0]) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
+ }
+ // in-place, we don't need to do anything with the data diff
+ const bool bias_param = (bottom.size() == 1);
+ if ((!bias_param && propagate_down[1]) ||
+ (bias_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
+ ->mutable_cpu_diff();
+ bool accum = bias_param;
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
+ top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff);
+ top_diff += dim_;
+ accum = true;
+ }
+ }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(BiasLayer);
+#endif
+
+INSTANTIATE_CLASS(BiasLayer);
+REGISTER_LAYER_CLASS(Bias);
+
+} // namespace caffe
diff --git a/src/caffe/layers/bias_layer.cu b/src/caffe/layers/bias_layer.cu
new file mode 100644
index 0000000..8ac913a
--- /dev/null
+++ b/src/caffe/layers/bias_layer.cu
@@ -0,0 +1,59 @@
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layers/bias_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void BiasForward(const int n, const Dtype* in,
+ const Dtype* bias, const int bias_dim, const int inner_dim,
+ Dtype* out) {
+ CUDA_KERNEL_LOOP(index, n) {
+ const int bias_index = (index / inner_dim) % bias_dim;
+ out[index] = in[index] + bias[bias_index];
+ }
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const int count = top[0]->count();
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ const Dtype* bias_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ BiasForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, bottom_data, bias_data, bias_dim_, inner_dim_, top_data);
+}
+
+template <typename Dtype>
+void BiasLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0] && bottom[0] != top[0]) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
+ }
+ // in-place, we don't need to do anything with the data diff
+ const bool bias_param = (bottom.size() == 1);
+ if ((!bias_param && propagate_down[1]) ||
+ (bias_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
+ ->mutable_gpu_diff();
+ bool accum = bias_param;
+ for (int n = 0; n < outer_dim_; ++n) {
+ caffe_gpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
+ top_diff, bias_multiplier_.gpu_data(), Dtype(accum), bias_diff);
+ top_diff += dim_;
+ accum = true;
+ }
+ }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(BiasLayer);
+
+} // namespace caffe
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp
index cff0978..5d522ab 100644
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -9,11 +9,13 @@ void ConvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
+ const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
- const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
+ const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
+ const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
this->output_shape_.push_back(output_dim);
}
diff --git a/src/caffe/layers/deconv_layer.cpp b/src/caffe/layers/deconv_layer.cpp
index 275c056..20a460f 100644
--- a/src/caffe/layers/deconv_layer.cpp
+++ b/src/caffe/layers/deconv_layer.cpp
@@ -9,12 +9,14 @@ void DeconvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
+ const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
+ const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = stride_data[i] * (input_dim - 1)
- + kernel_shape_data[i] - 2 * pad_data[i];
+ + kernel_extent - 2 * pad_data[i];
this->output_shape_.push_back(output_dim);
}
}
diff --git a/src/caffe/layers/elu_layer.cpp b/src/caffe/layers/elu_layer.cpp
new file mode 100644
index 0000000..a0f8763
--- /dev/null
+++ b/src/caffe/layers/elu_layer.cpp
@@ -0,0 +1,47 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layers/elu_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ELULayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ const int count = bottom[0]->count();
+ Dtype alpha = this->layer_param_.elu_param().alpha();
+ for (int i = 0; i < count; ++i) {
+ top_data[i] = std::max(bottom_data[i], Dtype(0))
+ + alpha * (exp(std::min(bottom_data[i], Dtype(0))) - Dtype(1));
+ }
+}
+
+template <typename Dtype>
+void ELULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down,
+ const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0]) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ const Dtype* top_data = top[0]->cpu_data();
+ const Dtype* top_diff = top[0]->cpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ const int count = bottom[0]->count();
+ Dtype alpha = this->layer_param_.elu_param().alpha();
+ for (int i = 0; i < count; ++i) {
+ bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0)
+ + (alpha + top_data[i]) * (bottom_data[i] <= 0));
+ }
+ }
+}
+
+
+#ifdef CPU_ONLY
+STUB_GPU(ELULayer);
+#endif
+
+INSTANTIATE_CLASS(ELULayer);
+REGISTER_LAYER_CLASS(ELU);
+
+} // namespace caffe
diff --git a/src/caffe/layers/elu_layer.cu b/src/caffe/layers/elu_layer.cu
new file mode 100644
index 0000000..12545aa
--- /dev/null
+++ b/src/caffe/layers/elu_layer.cu
@@ -0,0 +1,62 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/layers/elu_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void ELUForward(const int n, const Dtype* in, Dtype* out,
+ Dtype alpha) {
+ CUDA_KERNEL_LOOP(index, n) {
+ out[index] = in[index] > 0 ? in[index] :
+ alpha * (exp(in[index]) - 1);
+ }
+}
+
+template <typename Dtype>
+void ELULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ const int count = bottom[0]->count();
+ Dtype alpha = this->layer_param_.elu_param().alpha();
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ ELUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, bottom_data, top_data, alpha);
+ CUDA_POST_KERNEL_CHECK;
+}
+
+template <typename Dtype>
+__global__ void ELUBackward(const int n, const Dtype* in_diff,
+ const Dtype* out_data, const Dtype* in_data,
+ Dtype* out_diff, Dtype alpha) {
+ CUDA_KERNEL_LOOP(index, n) {
+ out_diff[index] = in_data[index] > 0 ? in_diff[index] :
+ in_diff[index] * (out_data[index] + alpha);
+ }
+}
+
+template <typename Dtype>
+void ELULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down,
+ const vector<Blob<Dtype>*>& bottom) {
+ if (propagate_down[0]) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* top_data = top[0]->gpu_data();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ const int count = bottom[0]->count();
+ Dtype alpha = this->layer_param_.elu_param().alpha();
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ ELUBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, top_diff, top_data, bottom_data, bottom_diff, alpha);
+ CUDA_POST_KERNEL_CHECK;
+ }
+}
+
+
+INSTANTIATE_LAYER_GPU_FUNCS(ELULayer);
+
+
+} // namespace caffe
diff --git a/src/caffe/layers/flatten_layer.cpp b/src/caffe/layers/flatten_layer.cpp
index 651507e..d4ab393 100644
--- a/src/caffe/layers/flatten_layer.cpp
+++ b/src/caffe/layers/flatten_layer.cpp
@@ -7,6 +7,8 @@ namespace caffe {
template <typename Dtype>
void FlattenLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
+ CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not "
+ "allow in-place computation.";
const int start_axis = bottom[0]->CanonicalAxisIndex(
this->layer_param_.flatten_param().axis());
const int end_axis = bottom[0]->CanonicalAxisIndex(
diff --git a/src/caffe/layers/im2col_layer.cpp b/src/caffe/layers/im2col_layer.cpp
index c12e4f5..2fb9b3c 100644
--- a/src/caffe/layers/im2col_layer.cpp
+++ b/src/caffe/layers/im2col_layer.cpp
@@ -87,6 +87,20 @@ void Im2colLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
conv_param.pad((num_pad_dims == 1) ? 0 : i);
}
}
+ // Setup dilation dimensions (dilation_).
+ dilation_.Reshape(dim_blob_shape);
+ int* dilation_data = dilation_.mutable_cpu_data();
+ const int num_dilation_dims = conv_param.dilation_size();
+ CHECK(num_dilation_dims == 0 || num_dilation_dims == 1 ||
+ num_dilation_dims == num_spatial_axes_)
+ << "dilation must be specified once, or once per spatial dimension "
+ << "(dilation specified " << num_dilation_dims << " times; "
+ << num_spatial_axes_ << " spatial dims).";
+ const int kDefaultDilation = 1;
+ for (int i = 0; i < num_spatial_axes_; ++i) {
+ dilation_data[i] = (num_dilation_dims == 0) ? kDefaultDilation :
+ conv_param.dilation((num_dilation_dims == 1) ? 0 : i);
+ }
}
template <typename Dtype>
@@ -96,10 +110,12 @@ void Im2colLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const int* kernel_shape_data = kernel_shape_.cpu_data();
const int* stride_data = stride_.cpu_data();
const int* pad_data = pad_.cpu_data();
+ const int* dilation_data = dilation_.cpu_data();
for (int i = 0; i < num_spatial_axes_; ++i) {
top_shape[channel_axis_] *= kernel_shape_data[i];
const int input_dim = bottom[0]->shape(channel_axis_ + i + 1);
- const int output_dim = (input_dim + 2 * pad_data[i] - kernel_shape_data[i])
+ const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
+ const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
top_shape[channel_axis_ + i + 1] = output_dim;
}
@@ -122,6 +138,7 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
DCHECK_EQ(kernel_shape_.count(), num_spatial_axes_);
DCHECK_EQ(pad_.count(), num_spatial_axes_);
DCHECK_EQ(stride_.count(), num_spatial_axes_);
+ DCHECK_EQ(dilation_.count(), num_spatial_axes_);
if (!force_nd_im2col_ && num_spatial_axes_ == 2) {
im2col_cpu(bottom_data + n * bottom_dim_, channels_,
bottom[0]->shape(channel_axis_ + 1),
@@ -129,13 +146,14 @@ void Im2colLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_cpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
- top_data + n * top_dim_);
+ dilation_.cpu_data(), top_data + n * top_dim_);
}
}
}
@@ -153,13 +171,14 @@ void Im2colLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_cpu(top_diff + n * top_dim_, num_spatial_axes_,
bottom[0]->shape().data() + channel_axis_,
top[0]->shape().data() + channel_axis_,
kernel_shape_.cpu_data(), pad_.cpu_data(), stride_.cpu_data(),
- bottom_diff + n * bottom_dim_);
+ dilation_.cpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
diff --git a/src/caffe/layers/im2col_layer.cu b/src/caffe/layers/im2col_layer.cu
index 517b422..792c97f 100644
--- a/src/caffe/layers/im2col_layer.cu
+++ b/src/caffe/layers/im2col_layer.cu
@@ -19,13 +19,14 @@ void Im2colLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1],
top_data + n * top_dim_);
} else {
im2col_nd_gpu(bottom_data + n * bottom_dim_, num_spatial_axes_,
num_kernels, bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
- top_data + n * top_dim_);
+ dilation_.gpu_data(), top_data + n * top_dim_);
}
}
}
@@ -43,13 +44,14 @@ void Im2colLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
kernel_shape_.cpu_data()[0], kernel_shape_.cpu_data()[1],
pad_.cpu_data()[0], pad_.cpu_data()[1],
stride_.cpu_data()[0], stride_.cpu_data()[1],
+ dilation_.cpu_data()[0], dilation_.cpu_data()[1],
bottom_diff + n * bottom_dim_);
} else {
col2im_nd_gpu(top_diff + n * top_dim_, num_spatial_axes_, bottom_dim_,
bottom[0]->gpu_shape() + channel_axis_,
top[0]->gpu_shape() + channel_axis_,
kernel_shape_.gpu_data(), pad_.gpu_data(), stride_.gpu_data(),
- bottom_diff + n * bottom_dim_);
+ dilation_.gpu_data(), bottom_diff + n * bottom_dim_);
}
}
}
diff --git a/src/caffe/layers/reshape_layer.cpp b/src/caffe/layers/reshape_layer.cpp
index 82339f7..45dd090 100644
--- a/src/caffe/layers/reshape_layer.cpp
+++ b/src/caffe/layers/reshape_layer.cpp
@@ -7,6 +7,8 @@ namespace caffe {
template <typename Dtype>
void ReshapeLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
+ CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not "
+ "allow in-place computation.";
inferred_axis_ = -1;
copy_axes_.clear();
const BlobShape& top_blob_shape = this->layer_param_.reshape_param().shape();
diff --git a/src/caffe/layers/scale_layer.cpp b/src/caffe/layers/scale_layer.cpp
new file mode 100644
index 0000000..ecdbb12
--- /dev/null
+++ b/src/caffe/layers/scale_layer.cpp
@@ -0,0 +1,219 @@
+#include <algorithm>
+#include <vector>
+
+#include "caffe/filler.hpp"
+#include "caffe/layer_factory.hpp"
+#include "caffe/layers/scale_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const ScaleParameter& param = this->layer_param_.scale_param();
+ if (bottom.size() == 1 && this->blobs_.size() > 0) {
+ LOG(INFO) << "Skipping parameter initialization";
+ } else if (bottom.size() == 1) {
+ // scale is a learned parameter; initialize it
+ axis_ = bottom[0]->CanonicalAxisIndex(param.axis());
+ const int num_axes = param.num_axes();
+ CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
+ << "or -1 to extend to the end of bottom[0]";
+ if (num_axes >= 0) {
+ CHECK_GE(bottom[0]->num_axes(), axis_ + num_axes)
+ << "scale blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis_;
+ }
+ this->blobs_.resize(1);
+ const vector<int>::const_iterator& shape_start =
+ bottom[0]->shape().begin() + axis_;
+ const vector<int>::const_iterator& shape_end =
+ (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
+ vector<int> scale_shape(shape_start, shape_end);
+ this->blobs_[0].reset(new Blob<Dtype>(scale_shape));
+ FillerParameter filler_param(param.filler());
+ if (!param.has_filler()) {
+ // Default to unit (1) filler for identity operation.
+ filler_param.set_type("constant");
+ filler_param.set_value(1);
+ }
+ shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(filler_param));
+ filler->Fill(this->blobs_[0].get());
+ }
+ if (param.bias_term()) {
+ LayerParameter layer_param(this->layer_param_);
+ layer_param.set_type("Bias");
+ BiasParameter* bias_param = layer_param.mutable_bias_param();
+ bias_param->set_axis(param.axis());
+ if (bottom.size() > 1) {
+ bias_param->set_num_axes(bottom[1]->num_axes());
+ } else {
+ bias_param->set_num_axes(param.num_axes());
+ }
+ bias_param->mutable_filler()->CopyFrom(param.bias_filler());
+ bias_layer_ = LayerRegistry<Dtype>::CreateLayer(layer_param);
+ bias_bottom_vec_.resize(1);
+ bias_bottom_vec_[0] = bottom[0];
+ bias_layer_->SetUp(bias_bottom_vec_, top);
+ bias_param_id_ = this->blobs_.size();
+ this->blobs_.resize(bias_param_id_ + 1);
+ this->blobs_[bias_param_id_] = bias_layer_->blobs()[0];
+ bias_propagate_down_.resize(1, false);
+ }
+ this->param_propagate_down_.resize(this->blobs_.size(), true);
+}
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+ const ScaleParameter& param = this->layer_param_.scale_param();
+ Blob<Dtype>* scale = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
+ // Always set axis_ == 0 in special case where scale is a scalar
+ // (num_axes == 0). Mathematically equivalent for any choice of axis_, so the
+ // actual setting can be safely ignored; and computation is most efficient
+ // with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to
+ // bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally
+ // performant.)
+ axis_ = (scale->num_axes() == 0) ?
+ 0 : bottom[0]->CanonicalAxisIndex(param.axis());
+ CHECK_GE(bottom[0]->num_axes(), axis_ + scale->num_axes())
+ << "scale blob's shape extends past bottom[0]'s shape when applied "
+ << "starting with bottom[0] axis = " << axis_;
+ for (int i = 0; i < scale->num_axes(); ++i) {
+ CHECK_EQ(bottom[0]->shape(axis_ + i), scale->shape(i))
+ << "dimension mismatch between bottom[0]->shape(" << axis_ + i
+ << ") and scale->shape(" << i << ")";
+ }
+ outer_dim_ = bottom[0]->count(0, axis_);
+ scale_dim_ = scale->count();
+ inner_dim_ = bottom[0]->count(axis_ + scale->num_axes());
+ if (bottom[0] == top[0]) { // in-place computation
+ temp_.ReshapeLike(*bottom[0]);
+ } else {
+ top[0]->ReshapeLike(*bottom[0]);
+ }
+ sum_result_.Reshape(vector<int>(1, outer_dim_ * scale_dim_));
+ const int sum_mult_size = std::max(outer_dim_, inner_dim_);
+ sum_multiplier_.Reshape(vector<int>(1, sum_mult_size));
+ if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) {
+ caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data());
+ }
+ if (bias_layer_) {
+ bias_bottom_vec_[0] = top[0];
+ bias_layer_->Reshape(bias_bottom_vec_, top);
+ }
+}
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::Forward_cpu(
+ const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ if (bottom[0] == top[0]) {
+ // In-place computation; need to store bottom data before overwriting it.
+ // Note that this is only necessary for Backward; we could skip this if not
+ // doing Backward, but Caffe currently provides no way of knowing whether
+ // we'll need to do Backward at the time of the Forward call.
+ caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(),
+ temp_.mutable_cpu_data());
+ }
+ const Dtype* scale_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+ for (int n = 0; n < outer_dim_; ++n) {
+ for (int d = 0; d < scale_dim_; ++d) {
+ const Dtype factor = scale_data[d];
+ caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data);
+ bottom_data += inner_dim_;
+ top_data += inner_dim_;
+ }
+ }
+ if (bias_layer_) {
+ bias_layer_->Forward(bias_bottom_vec_, top);
+ }
+}
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (bias_layer_ &&
+ this->param_propagate_down_[this->param_propagate_down_.size() - 1]) {
+ bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_);
+ }
+ const bool scale_param = (bottom.size() == 1);
+ Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1];
+ if ((!scale_param && propagate_down[1]) ||
+ (scale_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ const bool in_place = (bottom[0] == top[0]);
+ const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data();
+ // Hack: store big eltwise product in bottom[0] diff, except in the special
+ // case where this layer itself does the eltwise product, in which case we
+ // can store it directly in the scale diff, and we're done.
+ // If we're computing in-place (and not doing eltwise computation), this
+ // hack doesn't work and we store the product in temp_.
+ const bool is_eltwise = (bottom[0]->count() == scale->count());
+ Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() :
+ (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff()));
+ caffe_mul(top[0]->count(), top_diff, bottom_data, product);
+ if (!is_eltwise) {
+ Dtype* sum_result = NULL;
+ if (inner_dim_ == 1) {
+ sum_result = product;
+ } else if (sum_result_.count() == 1) {
+ const Dtype* sum_mult = sum_multiplier_.cpu_data();
+ Dtype* scale_diff = scale->mutable_cpu_diff();
+ if (scale_param) {
+ Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult);
+ *scale_diff += result;
+ } else {
+ *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult);
+ }
+ } else {
+ const Dtype* sum_mult = sum_multiplier_.cpu_data();
+ sum_result = (outer_dim_ == 1) ?
+ scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data();
+ caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+ Dtype(1), product, sum_mult, Dtype(0), sum_result);
+ }
+ if (outer_dim_ != 1) {
+ const Dtype* sum_mult = sum_multiplier_.cpu_data();
+ Dtype* scale_diff = scale->mutable_cpu_diff();
+ if (scale_dim_ == 1) {
+ if (scale_param) {
+ Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
+ *scale_diff += result;
+ } else {
+ *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result);
+ }
+ } else {
+ caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_,
+ Dtype(1), sum_result, sum_mult, Dtype(scale_param),
+ scale_diff);
+ }
+ }
+ }
+ }
+ if (propagate_down[0]) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ const Dtype* scale_data = scale->cpu_data();
+ Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+ for (int n = 0; n < outer_dim_; ++n) {
+ for (int d = 0; d < scale_dim_; ++d) {
+ const Dtype factor = scale_data[d];
+ caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff);
+ bottom_diff += inner_dim_;
+ top_diff += inner_dim_;
+ }
+ }
+ }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(ScaleLayer);
+#endif
+
+INSTANTIATE_CLASS(ScaleLayer);
+REGISTER_LAYER_CLASS(Scale);
+
+} // namespace caffe
diff --git a/src/caffe/layers/scale_layer.cu b/src/caffe/layers/scale_layer.cu
new file mode 100644
index 0000000..fc9a806
--- /dev/null
+++ b/src/caffe/layers/scale_layer.cu
@@ -0,0 +1,135 @@
+#include <cfloat>
+#include <vector>
+
+#include "caffe/layers/scale_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void ScaleForward(const int n, const Dtype* in,
+ const Dtype* scale, const int scale_dim, const int inner_dim,
+ Dtype* out) {
+ CUDA_KERNEL_LOOP(index, n) {
+ const int scale_index = (index / inner_dim) % scale_dim;
+ out[index] = in[index] * scale[scale_index];
+ }
+}
+
+template <typename Dtype>
+__global__ void ScaleBiasForward(const int n, const Dtype* in,
+ const Dtype* scale, const Dtype* bias,
+ const int scale_dim, const int inner_dim, Dtype* out) {
+ CUDA_KERNEL_LOOP(index, n) {
+ const int scale_index = (index / inner_dim) % scale_dim;
+ out[index] = in[index] * scale[scale_index] + bias[scale_index];
+ }
+}
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::Forward_gpu(
+ const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+ const int count = top[0]->count();
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ if (bottom[0] == top[0]) {
+ // in-place computation; need to store bottom data before overwriting it.
+ // Note that this is only necessary for Backward; we could skip this if not
+ // doing Backward, but Caffe currently provides no way of knowing whether
+ // we'll need to do Backward at the time of the Forward call.
+ caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(),
+ temp_.mutable_gpu_data());
+ }
+ const Dtype* scale_data =
+ ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+ if (bias_layer_) {
+ const Dtype* bias_data = this->blobs_[bias_param_id_]->gpu_data();
+ ScaleBiasForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, bottom_data, scale_data, bias_data, scale_dim_, inner_dim_,
+ top_data);
+ } else {
+ ScaleForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, bottom_data, scale_data, scale_dim_, inner_dim_, top_data);
+ }
+}
+
+template <typename Dtype>
+void ScaleLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ if (bias_layer_ &&
+ this->param_propagate_down_[this->param_propagate_down_.size() - 1]) {
+ bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_);
+ }
+ const bool scale_param = (bottom.size() == 1);
+ Blob<Dtype>* scale = scale_param ? this->blobs_[0].get() : bottom[1];
+ if ((!scale_param && propagate_down[1]) ||
+ (scale_param && this->param_propagate_down_[0])) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const bool in_place = (bottom[0] == top[0]);
+ const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data();
+ // Hack: store big eltwise product in bottom[0] diff, except in the special
+ // case where this layer itself does the eltwise product, in which case we
+ // can store it directly in the scale diff, and we're done.
+ // If we're computing in-place (and not doing eltwise computation), this
+ // hack doesn't work and we store the product in temp_.
+ const bool is_eltwise = (bottom[0]->count() == scale->count());
+ Dtype* product = (is_eltwise ? scale->mutable_gpu_diff() :
+ (in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff()));
+ caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product);
+ if (!is_eltwise) {
+ Dtype* sum_result = NULL;
+ if (inner_dim_ == 1) {
+ sum_result = product;
+ } else if (sum_result_.count() == 1) {
+ const Dtype* sum_mult = sum_multiplier_.gpu_data();
+ Dtype* scale_diff = scale->mutable_cpu_diff();
+ if (scale_param) {
+ Dtype result;
+ caffe_gpu_dot(inner_dim_, product, sum_mult, &result);
+ *scale_diff += result;
+ } else {
+ caffe_gpu_dot(inner_dim_, product, sum_mult, scale_diff);
+ }
+ } else {
+ const Dtype* sum_mult = sum_multiplier_.gpu_data();
+ sum_result = (outer_dim_ == 1) ?
+ scale->mutable_gpu_diff() : sum_result_.mutable_gpu_data();
+ caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_,
+ Dtype(1), product, sum_mult, Dtype(0), sum_result);
+ }
+ if (outer_dim_ != 1) {
+ const Dtype* sum_mult = sum_multiplier_.gpu_data();
+ if (scale_dim_ == 1) {
+ Dtype* scale_diff = scale->mutable_cpu_diff();
+ if (scale_param) {
+ Dtype result;
+ caffe_gpu_dot(outer_dim_, sum_mult, sum_result, &result);
+ *scale_diff += result;
+ } else {
+ caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scale_diff);
+ }
+ } else {
+ Dtype* scale_diff = scale->mutable_gpu_diff();
+ caffe_gpu_gemv(CblasTrans, outer_dim_, scale_dim_,
+ Dtype(1), sum_result, sum_mult, Dtype(scale_param),
+ scale_diff);
+ }
+ }
+ }
+ }
+ if (propagate_down[0]) {
+ const int count = top[0]->count();
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* scale_data = scale->gpu_data();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+ ScaleForward<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+ count, top_diff, scale_data, scale_dim_, inner_dim_, bottom_diff);
+ }
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(ScaleLayer);
+
+} // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 787369f..6493a72 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -306,7 +306,7 @@ message ParamSpec {
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
-// LayerParameter next available layer-specific ID: 140 (last added: batch_norm_param)
+// LayerParameter next available layer-specific ID: 143 (last added: scale_param)
message LayerParameter {
optional string name = 1; // the layer name
optional string type = 2; // the layer type
@@ -356,6 +356,7 @@ message LayerParameter {
optional AccuracyParameter accuracy_param = 102;
optional ArgMaxParameter argmax_param = 103;
optional BatchNormParameter batch_norm_param = 139;
+ optional BiasParameter bias_param = 141;
optional ConcatParameter concat_param = 104;
optional ContrastiveLossParameter contrastive_loss_param = 105;
optional ConvolutionParameter convolution_param = 106;
@@ -363,6 +364,7 @@ message LayerParameter {
optional DropoutParameter dropout_param = 108;
optional DummyDataParameter dummy_data_param = 109;
optional EltwiseParameter eltwise_param = 110;
+ optional ELUParameter elu_param = 140;
optional EmbedParameter embed_param = 137;
optional ExpParameter exp_param = 111;
optional FlattenParameter flatten_param = 135;
@@ -383,6 +385,7 @@ message LayerParameter {
optional ReductionParameter reduction_param = 136;
optional ReLUParameter relu_param = 123;
optional ReshapeParameter reshape_param = 133;
+ optional ScaleParameter scale_param = 142;
optional SigmoidParameter sigmoid_param = 124;
optional SoftmaxParameter softmax_param = 125;
optional SPPParameter spp_param = 132;
@@ -497,6 +500,38 @@ message BatchNormParameter {
optional float eps = 3 [default = 1e-5];
}
+message BiasParameter {
+ // The first axis of bottom[0] (the first input Blob) along which to apply
+ // bottom[1] (the second input Blob). May be negative to index from the end
+ // (e.g., -1 for the last axis).
+ //
+ // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
+ // top[0] will have the same shape, and bottom[1] may have any of the
+ // following shapes (for the given value of axis):
+ // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
+ // (axis == 1 == -3) 3; 3x40; 3x40x60
+ // (axis == 2 == -2) 40; 40x60
+ // (axis == 3 == -1) 60
+ // Furthermore, bottom[1] may have the empty shape (regardless of the value of
+ // "axis") -- a scalar bias.
+ optional int32 axis = 1 [default = 1];
+
+ // (num_axes is ignored unless just one bottom is given and the bias is
+ // a learned parameter of the layer. Otherwise, num_axes is determined by the
+ // number of axes by the second bottom.)
+ // The number of axes of the input (bottom[0]) covered by the bias
+ // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
+ // Set num_axes := 0, to add a zero-axis Blob: a scalar.
+ optional int32 num_axes = 2 [default = 1];
+
+ // (filler is ignored unless just one bottom is given and the bias is
+ // a learned parameter of the layer.)
+ // The initialization for the learned bias parameter.
+ // Default is the zero (0) initialization, resulting in the BiasLayer
+ // initially performing the identity operation.
+ optional FillerParameter filler = 3;
+}
+
message ContrastiveLossParameter {
// margin for dissimilar pair
optional float margin = 1 [default = 1.0];
@@ -518,6 +553,10 @@ message ConvolutionParameter {
repeated uint32 pad = 3; // The padding size; defaults to 0
repeated uint32 kernel_size = 4; // The kernel size
repeated uint32 stride = 6; // The stride; defaults to 1
+ // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
+ // holes. (Kernel dilation is sometimes referred to by its use in the
+ // algorithme à trous from Holschneider et al. 1987.)
+ repeated uint32 dilation = 18; // The dilation; defaults to 1
// For 2D convolution only, the *_h and *_w versions may also be used to
// specify both spatial dimensions.
@@ -629,6 +668,14 @@ message EltwiseParameter {
optional bool stable_prod_grad = 3 [default = true];
}
+// Message that stores parameters used by ELULayer
+message ELUParameter {
+ // Described in:
+ // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
+ // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
+ optional float alpha = 1 [default = 1];
+}
+
// Message that stores parameters used by EmbedLayer
message EmbedParameter {
optional uint32 num_output = 1; // The number of outputs for the layer
@@ -947,6 +994,43 @@ message ReshapeParameter {
optional int32 num_axes = 3 [default = -1];
}
+message ScaleParameter {
+ // The first axis of bottom[0] (the first input Blob) along which to apply
+ // bottom[1] (the second input Blob). May be negative to index from the end
+ // (e.g., -1 for the last axis).
+ //
+ // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
+ // top[0] will have the same shape, and bottom[1] may have any of the
+ // following shapes (for the given value of axis):
+ // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
+ // (axis == 1 == -3) 3; 3x40; 3x40x60
+ // (axis == 2 == -2) 40; 40x60
+ // (axis == 3 == -1) 60
+ // Furthermore, bottom[1] may have the empty shape (regardless of the value of
+ // "axis") -- a scalar multiplier.
+ optional int32 axis = 1 [default = 1];
+
+ // (num_axes is ignored unless just one bottom is given and the scale is
+ // a learned parameter of the layer. Otherwise, num_axes is determined by the
+ // number of axes by the second bottom.)
+ // The number of axes of the input (bottom[0]) covered by the scale
+ // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
+ // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
+ optional int32 num_axes = 2 [default = 1];
+
+ // (filler is ignored unless just one bottom is given and the scale is
+ // a learned parameter of the layer.)
+ // The initialization for the learned scale parameter.
+ // Default is the unit (1) initialization, resulting in the ScaleLayer
+ // initially performing the identity operation.
+ optional FillerParameter filler = 3;
+
+ // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
+ // may be more efficient). Initialized with bias_filler (defaults to 0).
+ optional bool bias_term = 4 [default = false];
+ optional FillerParameter bias_filler = 5;
+}
+
message SigmoidParameter {
enum Engine {
DEFAULT = 0;
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 95d7506..a5ccf9c 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -196,8 +196,8 @@ void Solver<Dtype>::Step(int iters) {
const int start_iter = iter_;
const int stop_iter = iter_ + iters;
int average_loss = this->param_.average_loss();
- vector<Dtype> losses;
- Dtype smoothed_loss = 0;
+ losses_.clear();
+ smoothed_loss_ = 0;
while (iter_ < stop_iter) {
// zero-init the params
@@ -224,18 +224,10 @@ void Solver<Dtype>::Step(int iters) {
}
loss /= param_.iter_size();
// average the loss across iterations for smoothed reporting
- if (losses.size() < average_loss) {
- losses.push_back(loss);
- int size = losses.size();
- smoothed_loss = (smoothed_loss * (size - 1) + loss) / size;
- } else {
- int idx = (iter_ - start_iter) % average_loss;
- smoothed_loss += (loss - losses[idx]) / average_loss;
- losses[idx] = loss;
- }
+ UpdateSmoothedLoss(loss, start_iter, average_loss);
if (display) {
LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
- << ", loss = " << smoothed_loss;
+ << ", loss = " << smoothed_loss_;
const vector<Blob<Dtype>*>& result = net_->output_blobs();
int score_index = 0;
for (int j = 0; j < result.size(); ++j) {
@@ -298,6 +290,7 @@ void Solver<Dtype>::Solve(const char* resume_file) {
// For a network that is trained by the solver, no bottom or top vecs
// should be given, and we will just provide dummy vecs.
+ int start_iter = iter_;
Step(param_.max_iter() - iter_);
// If we haven't already, save a snapshot after optimization, unless
// overridden by setting snapshot_after_train := false
@@ -316,9 +309,13 @@ void Solver<Dtype>::Solve(const char* resume_file) {
// updated the parameters "max_iter" times -- this final pass is only done to
// display the loss, which is computed in the forward pass.
if (param_.display() && iter_ % param_.display() == 0) {
+ int average_loss = this->param_.average_loss();
Dtype loss;
net_->ForwardPrefilled(&loss);
- LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss;
+
+ UpdateSmoothedLoss(loss, start_iter, average_loss);
+
+ LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss_;
}
if (param_.test_interval() && iter_ % param_.test_interval() == 0) {
TestAll();
@@ -483,6 +480,20 @@ void Solver<Dtype>::Restore(const char* state_file) {
}
}
+template <typename Dtype>
+void Solver<Dtype>::UpdateSmoothedLoss(Dtype loss, int start_iter,
+ int average_loss) {
+ if (losses_.size() < average_loss) {
+ losses_.push_back(loss);
+ int size = losses_.size();
+ smoothed_loss_ = (smoothed_loss_ * (size - 1) + loss) / size;
+ } else {
+ int idx = (iter_ - start_iter) % average_loss;
+ smoothed_loss_ += (loss - losses_[idx]) / average_loss;
+ losses_[idx] = loss;
+ }
+}
+
INSTANTIATE_CLASS(Solver);
} // namespace caffe
diff --git a/src/caffe/solvers/adadelta_solver.cpp b/src/caffe/solvers/adadelta_solver.cpp
index a37899e..fd30f19 100644
--- a/src/caffe/solvers/adadelta_solver.cpp
+++ b/src/caffe/solvers/adadelta_solver.cpp
@@ -16,6 +16,12 @@ void AdaDeltaSolver<Dtype>::AdaDeltaPreSolve() {
}
}
+#ifndef CPU_ONLY
+template <typename Dtype>
+void adadelta_update_gpu(int N, Dtype* g, Dtype* h, Dtype* h2, Dtype momentum,
+ Dtype delta, Dtype local_rate);
+#endif
+
template <typename Dtype>
void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
@@ -85,61 +91,11 @@ void AdaDeltaSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
}
case Caffe::GPU: {
#ifndef CPU_ONLY
- // compute square of gradient in update
- caffe_gpu_powx(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(), Dtype(2),
- this->update_[param_id]->mutable_gpu_data());
-
- // update history of gradients
- caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
- this->update_[param_id]->gpu_data(), momentum,
- this->history_[param_id]->mutable_gpu_data());
-
- // add delta to history to guard against dividing by zero later
- caffe_gpu_set(net_params[param_id]->count(), delta,
- this->temp_[param_id]->mutable_gpu_data());
-
- caffe_gpu_add(net_params[param_id]->count(),
- this->temp_[param_id]->gpu_data(),
- this->history_[update_history_offset + param_id]->gpu_data(),
- this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_add(net_params[param_id]->count(),
- this->temp_[param_id]->gpu_data(),
- this->history_[param_id]->gpu_data(),
- this->temp_[param_id]->mutable_gpu_data());
-
- // divide history of updates by history of gradients
- caffe_gpu_div(net_params[param_id]->count(),
- this->update_[param_id]->gpu_data(),
- this->temp_[param_id]->gpu_data(),
- this->update_[param_id]->mutable_gpu_data());
-
- // jointly compute the RMS of both for update and gradient history
- caffe_gpu_powx(net_params[param_id]->count(),
- this->update_[param_id]->gpu_data(), Dtype(0.5),
- this->update_[param_id]->mutable_gpu_data());
-
- // compute the update and copy to net_diff
- caffe_gpu_mul(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(),
- this->update_[param_id]->gpu_data(),
- net_params[param_id]->mutable_gpu_diff());
-
- // compute square of update
- caffe_gpu_powx(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(), Dtype(2),
- this->update_[param_id]->mutable_gpu_data());
-
- // update history of updates
- caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) - momentum,
- this->update_[param_id]->gpu_data(), momentum,
- this->history_[update_history_offset + param_id]->mutable_gpu_data());
-
- // apply learning rate
- caffe_gpu_scale(net_params[param_id]->count(), local_rate,
- net_params[param_id]->gpu_diff(),
- net_params[param_id]->mutable_gpu_diff());
+ adadelta_update_gpu(net_params[param_id]->count(),
+ net_params[param_id]->mutable_gpu_diff(),
+ this->history_[param_id]->mutable_gpu_data(),
+ this->history_[update_history_offset + param_id]->mutable_gpu_data(),
+ momentum, delta, local_rate);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/adadelta_solver.cu b/src/caffe/solvers/adadelta_solver.cu
new file mode 100644
index 0000000..6c94585
--- /dev/null
+++ b/src/caffe/solvers/adadelta_solver.cu
@@ -0,0 +1,30 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void AdaDeltaUpdate(int N, Dtype* g, Dtype* h, Dtype* h2,
+ Dtype momentum, Dtype delta, Dtype local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ float gi = g[i];
+ float hi = h[i] = momentum * h[i] + (1-momentum) * gi * gi;
+ gi = gi * sqrt((h2[i] + delta) / (hi + delta));
+ h2[i] = momentum * h2[i] + (1-momentum) * gi * gi;
+ g[i] = local_rate * gi;
+ }
+}
+template <typename Dtype>
+void adadelta_update_gpu(int N, Dtype* g, Dtype* h, Dtype* h2, Dtype momentum,
+ Dtype delta, Dtype local_rate) {
+ AdaDeltaUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, h, h2, momentum, delta, local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void adadelta_update_gpu<float>(int , float*, float*, float*,
+ float, float, float);
+template void adadelta_update_gpu<double>(int, double*, double*, double*,
+ double, double, double);
+
+} // namespace caffe
diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp
index 5e40632..e78eadc 100644
--- a/src/caffe/solvers/adagrad_solver.cpp
+++ b/src/caffe/solvers/adagrad_solver.cpp
@@ -4,6 +4,12 @@
namespace caffe {
+#ifndef CPU_ONLY
+template <typename Dtype>
+void adagrad_update_gpu(int N, Dtype* g, Dtype* h, Dtype delta,
+ Dtype local_rate);
+#endif
+
template <typename Dtype>
void AdaGradSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
CHECK(Caffe::root_solver());
@@ -45,34 +51,9 @@ void AdaGradSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
}
case Caffe::GPU: {
#ifndef CPU_ONLY
- // compute square of gradient in update
- caffe_gpu_powx(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(), Dtype(2),
- this->update_[param_id]->mutable_gpu_data());
-
- // update history
- caffe_gpu_add(net_params[param_id]->count(),
- this->update_[param_id]->gpu_data(),
- this->history_[param_id]->gpu_data(),
- this->history_[param_id]->mutable_gpu_data());
-
- // prepare update
- caffe_gpu_powx(net_params[param_id]->count(),
- this->history_[param_id]->gpu_data(), Dtype(0.5),
- this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_add_scalar(net_params[param_id]->count(),
- delta, this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_div(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(),
- this->update_[param_id]->gpu_data(),
- this->update_[param_id]->mutable_gpu_data());
-
- // scale and copy
- caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
- this->update_[param_id]->gpu_data(), Dtype(0),
- net_params[param_id]->mutable_gpu_diff());
+ adagrad_update_gpu(net_params[param_id]->count(),
+ net_params[param_id]->mutable_gpu_diff(),
+ this->history_[param_id]->mutable_gpu_data(), delta, local_rate);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/adagrad_solver.cu b/src/caffe/solvers/adagrad_solver.cu
new file mode 100644
index 0000000..adefd55
--- /dev/null
+++ b/src/caffe/solvers/adagrad_solver.cu
@@ -0,0 +1,26 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void AdaGradUpdate(int N, Dtype* g, Dtype* h, Dtype delta,
+ Dtype local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ float gi = g[i];
+ float hi = h[i] = h[i] + gi*gi;
+ g[i] = local_rate * gi / (sqrt(hi) + delta);
+ }
+}
+template <typename Dtype>
+void adagrad_update_gpu(int N, Dtype* g, Dtype* h, Dtype delta,
+ Dtype local_rate) {
+ AdaGradUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, h, delta, local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void adagrad_update_gpu<float>(int, float*, float*, float, float);
+template void adagrad_update_gpu<double>(int, double*, double*, double, double);
+
+} // namespace caffe
diff --git a/src/caffe/solvers/adam_solver.cpp b/src/caffe/solvers/adam_solver.cpp
index cb0fbfe..4a91f00 100644
--- a/src/caffe/solvers/adam_solver.cpp
+++ b/src/caffe/solvers/adam_solver.cpp
@@ -16,6 +16,12 @@ void AdamSolver<Dtype>::AdamPreSolve() {
}
}
+#ifndef CPU_ONLY
+template <typename Dtype>
+void adam_update_gpu(int N, Dtype* g, Dtype* m, Dtype* v, Dtype beta1,
+ Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate);
+#endif
+
template <typename Dtype>
void AdamSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
@@ -30,7 +36,7 @@ void AdamSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
Blob<Dtype>* val_v = this->history_[param_id + update_history_offset].get();
Blob<Dtype>* val_t = this->temp_[param_id].get();
- const int t = this->iter_ + 1;
+ const int t = this->iter_ + 1;
const Dtype correction = std::sqrt(Dtype(1) - pow(beta2, t)) /
(Dtype(1.) - pow(beta1, t));
const int N = net_params[param_id]->count();
@@ -69,34 +75,9 @@ void AdamSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
}
case Caffe::GPU: {
#ifndef CPU_ONLY
- // update m <- \beta_1 m_{t-1} + (1-\beta_1)g_t
- caffe_gpu_axpby(N, Dtype(1)-beta1,
- net_params[param_id]->gpu_diff(), beta1,
- val_m->mutable_gpu_data());
-
- // update v <- \beta_2 m_{t-1} + (1-\beta_2)g_t^2
- caffe_gpu_mul(N,
- net_params[param_id]->gpu_diff(),
- net_params[param_id]->gpu_diff(),
- val_t->mutable_gpu_data());
- caffe_gpu_axpby(N, Dtype(1)-beta2,
- val_t->gpu_data(), beta2,
- val_v->mutable_gpu_data());
-
- // set update
- caffe_gpu_powx(N,
- val_v->gpu_data(), Dtype(0.5),
- val_t->mutable_gpu_data());
- caffe_gpu_add_scalar(N, eps_hat,
- val_t->mutable_gpu_data());
- caffe_gpu_div(N,
- val_m->gpu_data(),
- val_t->gpu_data(),
- val_t->mutable_gpu_data());
-
- caffe_gpu_scale(N, local_rate*correction,
- val_t->gpu_data(),
- net_params[param_id]->mutable_gpu_diff());
+ adam_update_gpu(N, net_params[param_id]->mutable_gpu_diff(),
+ val_m->mutable_gpu_data(), val_v->mutable_gpu_data(), beta1, beta2,
+ eps_hat, local_rate*correction);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/adam_solver.cu b/src/caffe/solvers/adam_solver.cu
new file mode 100644
index 0000000..917ae10
--- /dev/null
+++ b/src/caffe/solvers/adam_solver.cu
@@ -0,0 +1,29 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void AdamUpdate(int N, Dtype* g, Dtype* m, Dtype* v,
+ Dtype beta1, Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ float gi = g[i];
+ float mi = m[i] = m[i]*beta1 + gi*(1-beta1);
+ float vi = v[i] = v[i]*beta2 + gi*gi*(1-beta2);
+ g[i] = corrected_local_rate * mi / (sqrt(vi) + eps_hat);
+ }
+}
+template <typename Dtype>
+void adam_update_gpu(int N, Dtype* g, Dtype* m, Dtype* v, Dtype beta1,
+ Dtype beta2, Dtype eps_hat, Dtype corrected_local_rate) {
+ AdamUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, m, v, beta1, beta2, eps_hat, corrected_local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void adam_update_gpu<float>(int, float*, float*, float*,
+ float, float, float, float);
+template void adam_update_gpu<double>(int, double*, double*, double*,
+ double, double, double, double);
+
+} // namespace caffe
diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp
index 34bf01e..23ab2d4 100644
--- a/src/caffe/solvers/nesterov_solver.cpp
+++ b/src/caffe/solvers/nesterov_solver.cpp
@@ -4,6 +4,12 @@
namespace caffe {
+#ifndef CPU_ONLY
+template <typename Dtype>
+void nesterov_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum,
+ Dtype local_rate);
+#endif
+
template <typename Dtype>
void NesterovSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
CHECK(Caffe::root_solver());
@@ -36,25 +42,10 @@ void NesterovSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
}
case Caffe::GPU: {
#ifndef CPU_ONLY
- // save history momentum for stepping back
- caffe_copy(net_params[param_id]->count(),
- this->history_[param_id]->gpu_data(),
- this->update_[param_id]->mutable_gpu_data());
-
- // update history
- caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
- net_params[param_id]->gpu_diff(), momentum,
- this->history_[param_id]->mutable_gpu_data());
-
- // compute update: step back then over step
- caffe_gpu_axpby(net_params[param_id]->count(), Dtype(1) + momentum,
- this->history_[param_id]->gpu_data(), -momentum,
- this->update_[param_id]->mutable_gpu_data());
-
- // copy
- caffe_copy(net_params[param_id]->count(),
- this->update_[param_id]->gpu_data(),
- net_params[param_id]->mutable_gpu_diff());
+ nesterov_update_gpu(net_params[param_id]->count(),
+ net_params[param_id]->mutable_gpu_diff(),
+ this->history_[param_id]->mutable_gpu_data(),
+ momentum, local_rate);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/nesterov_solver.cu b/src/caffe/solvers/nesterov_solver.cu
new file mode 100644
index 0000000..57a456b
--- /dev/null
+++ b/src/caffe/solvers/nesterov_solver.cu
@@ -0,0 +1,27 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void NesterovUpdate(int N, Dtype* g, Dtype* h,
+ Dtype momentum, Dtype local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ float hi = h[i];
+ float hi_new = h[i] = momentum * hi + local_rate * g[i];
+ g[i] = (1+momentum) * hi_new - momentum * hi;
+ }
+}
+template <typename Dtype>
+void nesterov_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum,
+ Dtype local_rate) {
+ NesterovUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, h, momentum, local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void nesterov_update_gpu<float>(int, float*, float*, float, float);
+template void nesterov_update_gpu<double>(int, double*, double*, double,
+ double);
+
+} // namespace caffe
diff --git a/src/caffe/solvers/rmsprop_solver.cpp b/src/caffe/solvers/rmsprop_solver.cpp
index c624767..3251ee4 100644
--- a/src/caffe/solvers/rmsprop_solver.cpp
+++ b/src/caffe/solvers/rmsprop_solver.cpp
@@ -4,6 +4,12 @@
namespace caffe {
+#ifndef CPU_ONLY
+template <typename Dtype>
+void rmsprop_update_gpu(int N, Dtype* g, Dtype* h, Dtype rms_decay,
+ Dtype delta, Dtype local_rate);
+#endif
+
template <typename Dtype>
void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
@@ -45,31 +51,10 @@ void RMSPropSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
break;
case Caffe::GPU:
#ifndef CPU_ONLY
- // compute square of gradient in update
- caffe_gpu_powx(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(), Dtype(2),
- this->update_[param_id]->mutable_gpu_data());
-
- // update history
- caffe_gpu_axpby(net_params[param_id] -> count(),
- Dtype(1-rms_decay), this->update_[param_id]->gpu_data(),
- rms_decay, this->history_[param_id]-> mutable_gpu_data());
-
- // prepare update
- caffe_gpu_powx(net_params[param_id]->count(),
- this->history_[param_id]->gpu_data(), Dtype(0.5),
- this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_add_scalar(net_params[param_id]->count(),
- delta, this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_div(net_params[param_id]->count(),
- net_params[param_id]->gpu_diff(), this->update_[param_id]->gpu_data(),
- this->update_[param_id]->mutable_gpu_data());
-
- caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
- this->update_[param_id]->gpu_data(), Dtype(0),
- net_params[param_id]->mutable_gpu_diff());
+ rmsprop_update_gpu(net_params[param_id]->count(),
+ net_params[param_id]->mutable_gpu_diff(),
+ this->history_[param_id]->mutable_gpu_data(),
+ rms_decay, delta, local_rate);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/rmsprop_solver.cu b/src/caffe/solvers/rmsprop_solver.cu
new file mode 100644
index 0000000..c5ffd32
--- /dev/null
+++ b/src/caffe/solvers/rmsprop_solver.cu
@@ -0,0 +1,28 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void RMSPropUpdate(int N, Dtype* g, Dtype* h,
+ Dtype rms_decay, Dtype delta, Dtype local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ float gi = g[i];
+ float hi = h[i] = rms_decay*h[i] + (1-rms_decay)*gi*gi;
+ g[i] = local_rate * g[i] / (sqrt(hi) + delta);
+ }
+}
+template <typename Dtype>
+void rmsprop_update_gpu(int N, Dtype* g, Dtype* h, Dtype rms_decay,
+ Dtype delta, Dtype local_rate) {
+ RMSPropUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, h, rms_decay, delta, local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void rmsprop_update_gpu<float>(int, float*, float*, float, float,
+ float);
+template void rmsprop_update_gpu<double>(int, double*, double*, double, double,
+ double);
+
+} // namespace caffe
diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
index 32bf19b..f30f316 100644
--- a/src/caffe/solvers/sgd_solver.cpp
+++ b/src/caffe/solvers/sgd_solver.cpp
@@ -203,6 +203,12 @@ void SGDSolver<Dtype>::Regularize(int param_id) {
}
}
+#ifndef CPU_ONLY
+template <typename Dtype>
+void sgd_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum,
+ Dtype local_rate);
+#endif
+
template <typename Dtype>
void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
@@ -222,12 +228,10 @@ void SGDSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
}
case Caffe::GPU: {
#ifndef CPU_ONLY
- caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
- net_params[param_id]->gpu_diff(), momentum,
- history_[param_id]->mutable_gpu_data());
- caffe_copy(net_params[param_id]->count(),
- history_[param_id]->gpu_data(),
- net_params[param_id]->mutable_gpu_diff());
+ sgd_update_gpu(net_params[param_id]->count(),
+ net_params[param_id]->mutable_gpu_diff(),
+ history_[param_id]->mutable_gpu_data(),
+ momentum, local_rate);
#else
NO_GPU;
#endif
diff --git a/src/caffe/solvers/sgd_solver.cu b/src/caffe/solvers/sgd_solver.cu
new file mode 100644
index 0000000..e541035
--- /dev/null
+++ b/src/caffe/solvers/sgd_solver.cu
@@ -0,0 +1,24 @@
+#include "caffe/util/math_functions.hpp"
+
+
+namespace caffe {
+
+template <typename Dtype>
+__global__ void SGDUpdate(int N, Dtype* g, Dtype* h,
+ Dtype momentum, Dtype local_rate) {
+ CUDA_KERNEL_LOOP(i, N) {
+ g[i] = h[i] = momentum*h[i] + local_rate*g[i];
+ }
+}
+template <typename Dtype>
+void sgd_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum,
+ Dtype local_rate) {
+ SGDUpdate<Dtype> // NOLINT_NEXT_LINE(whitespace/operators)
+ <<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, g, h, momentum, local_rate);
+ CUDA_POST_KERNEL_CHECK;
+}
+template void sgd_update_gpu<float>(int, float*, float*, float, float);
+template void sgd_update_gpu<double>(int, double*, double*, double, double);
+
+} // namespace caffe
diff --git a/src/caffe/test/test_bias_layer.cpp b/src/caffe/test/test_bias_layer.cpp
new file mode 100644
index 0000000..3862e76
--- /dev/null
+++ b/src/caffe/test/test_bias_layer.cpp
@@ -0,0 +1,467 @@
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/bias_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class BiasLayerTest : public MultiDeviceTest<TypeParam> {
+ typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+ BiasLayerTest()
+ : blob_bottom_(new Blob<Dtype>(2, 3, 4, 5)),
+ blob_bottom_eltwise_(new Blob<Dtype>(2, 3, 4, 5)),
+ blob_bottom_broadcast_0_(new Blob<Dtype>()),
+ blob_bottom_broadcast_1_(new Blob<Dtype>()),
+ blob_bottom_broadcast_2_(new Blob<Dtype>()),
+ blob_bottom_bias_(new Blob<Dtype>(vector<int>())),
+ blob_top_(new Blob<Dtype>()) {
+ Caffe::set_random_seed(1701);
+ vector<int> broadcast_shape(2);
+ broadcast_shape[0] = 2; broadcast_shape[1] = 3;
+ this->blob_bottom_broadcast_0_->Reshape(broadcast_shape);
+ broadcast_shape[0] = 3; broadcast_shape[1] = 4;
+ this->blob_bottom_broadcast_1_->Reshape(broadcast_shape);
+ broadcast_shape[0] = 4; broadcast_shape[1] = 5;
+ this->blob_bottom_broadcast_2_->Reshape(broadcast_shape);
+ FillerParameter filler_param;
+ filler_param.set_min(1);
+ filler_param.set_max(10);
+ UniformFiller<Dtype> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ filler.Fill(this->blob_bottom_eltwise_);
+ filler.Fill(this->blob_bottom_broadcast_0_);
+ filler.Fill(this->blob_bottom_broadcast_1_);
+ filler.Fill(this->blob_bottom_broadcast_2_);
+ filler.Fill(this->blob_bottom_bias_);
+ blob_bottom_vec_.push_back(blob_bottom_);
+ blob_top_vec_.push_back(blob_top_);
+ }
+ virtual ~BiasLayerTest() {
+ delete blob_bottom_;
+ delete blob_bottom_eltwise_;
+ delete blob_bottom_broadcast_0_;
+ delete blob_bottom_broadcast_1_;
+ delete blob_bottom_broadcast_2_;
+ delete blob_bottom_bias_;
+ delete blob_top_;
+ }
+ Blob<Dtype>* const blob_bottom_;
+ Blob<Dtype>* const blob_bottom_eltwise_;
+ Blob<Dtype>* const blob_bottom_broadcast_0_;
+ Blob<Dtype>* const blob_bottom_broadcast_1_;
+ Blob<Dtype>* const blob_bottom_broadcast_2_;
+ Blob<Dtype>* const blob_bottom_bias_;
+ Blob<Dtype>* const blob_top_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(BiasLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(BiasLayerTest, TestForwardEltwise) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data_a = this->blob_bottom_->cpu_data();
+ const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardEltwiseInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_bottom_->cpu_data();
+ const int count = this->blob_bottom_->count();
+ const Dtype* in_data_a = orig_bottom.cpu_data();
+ const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestBackwardEltwiseInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ Blob<Dtype> top_diff(this->blob_bottom_->shape());
+ FillerParameter filler_param;
+ filler_param.set_type("gaussian");
+ filler_param.set_std(1);
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(&top_diff);
+ vector<bool> propagate_down(2, true);
+ // Run forward + backward without in-place computation;
+ // save resulting bottom diffs.
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_top_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ const bool kReshape = true;
+ const bool kCopyDiff = true;
+ Blob<Dtype> orig_bottom_diff;
+ orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
+ Blob<Dtype> orig_bias_diff;
+ orig_bias_diff.CopyFrom(*this->blob_bottom_eltwise_,
+ kCopyDiff, kReshape);
+ // Rerun forward + backward with in-place computation;
+ // check that resulting bottom diffs are the same.
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_bottom_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
+ this->blob_bottom_->cpu_diff()[i], 1e-5);
+ }
+ for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) {
+ EXPECT_NEAR(orig_bias_diff.cpu_diff()[i],
+ this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardEltwiseWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ BiasParameter* bias_param = layer_param.mutable_bias_param();
+ bias_param->set_axis(0);
+ bias_param->set_num_axes(-1);
+ bias_param->mutable_filler()->set_type("gaussian");
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data_a = this->blob_bottom_->cpu_data();
+ const Dtype* in_data_b = layer->blobs()[0]->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBroadcastBegin) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) +
+ this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddle) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(1);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) +
+ this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(1);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w),
+ orig_bottom.data_at(n, c, h, w) +
+ this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestBackwardBroadcastMiddleInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(1);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ Blob<Dtype> top_diff(this->blob_bottom_->shape());
+ FillerParameter filler_param;
+ filler_param.set_type("gaussian");
+ filler_param.set_std(1);
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(&top_diff);
+ vector<bool> propagate_down(2, true);
+ // Run forward + backward without in-place computation;
+ // save resulting bottom diffs.
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_top_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ const bool kReshape = true;
+ const bool kCopyDiff = true;
+ Blob<Dtype> orig_bottom_diff;
+ orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
+ Blob<Dtype> orig_bias_diff;
+ orig_bias_diff.CopyFrom(*this->blob_bottom_broadcast_1_,
+ kCopyDiff, kReshape);
+ // Rerun forward + backward with in-place computation;
+ // check that resulting bottom diffs are the same.
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_bottom_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
+ this->blob_bottom_->cpu_diff()[i], 1e-5);
+ }
+ for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) {
+ EXPECT_NEAR(orig_bias_diff.cpu_diff()[i],
+ this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ BiasParameter* bias_param = layer_param.mutable_bias_param();
+ bias_param->set_axis(1);
+ bias_param->set_num_axes(2);
+ bias_param->mutable_filler()->set_type("gaussian");
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) +
+ layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBroadcastEnd) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(2);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) +
+ this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBias) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_bias_);
+ LayerParameter layer_param;
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data = this->blob_bottom_->cpu_data();
+ const Dtype bias = *this->blob_bottom_bias_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestForwardBiasAxis2) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_bias_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(2);
+ shared_ptr<BiasLayer<Dtype> > layer(new BiasLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data = this->blob_bottom_->cpu_data();
+ const Dtype bias = *this->blob_bottom_bias_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5);
+ }
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientEltwise) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientEltwiseWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ BiasParameter* bias_param = layer_param.mutable_bias_param();
+ bias_param->set_axis(0);
+ bias_param->set_num_axes(-1);
+ bias_param->mutable_filler()->set_type("gaussian");
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBroadcastBegin) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(0);
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddle) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(1);
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddleWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ BiasParameter* bias_param = layer_param.mutable_bias_param();
+ bias_param->set_axis(1);
+ bias_param->set_num_axes(2);
+ bias_param->mutable_filler()->set_type("gaussian");
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBroadcastEnd) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(2);
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBias) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_bias_);
+ LayerParameter layer_param;
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(BiasLayerTest, TestGradientBiasAxis2) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_bias_);
+ LayerParameter layer_param;
+ layer_param.mutable_bias_param()->set_axis(2);
+ BiasLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+} // namespace caffe
diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp
index e2d43f3..9bb19d1 100644
--- a/src/caffe/test/test_convolution_layer.cpp
+++ b/src/caffe/test/test_convolution_layer.cpp
@@ -46,13 +46,17 @@ void caffe_conv(const Blob<Dtype>* in, ConvolutionParameter* conv_param,
} else {
stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1;
}
- int kernel_d, pad_d, stride_d;
+ int dilation_h, dilation_w;
+ dilation_h = dilation_w = conv_param->dilation_size() ?
+ conv_param->dilation(0) : 1;
+ int kernel_d, pad_d, stride_d, dilation_d;
if (has_depth) {
kernel_d = kernel_h;
stride_d = stride_h;
pad_d = pad_h;
+ dilation_d = dilation_h;
} else {
- kernel_d = stride_d = 1;
+ kernel_d = stride_d = dilation_d = 1;
pad_d = 0;
}
// Groups
@@ -77,9 +81,9 @@ void caffe_conv(const Blob<Dtype>* in, ConvolutionParameter* conv_param,
for (int r = 0; r < kernel_d; r++) {
for (int p = 0; p < kernel_h; p++) {
for (int q = 0; q < kernel_w; q++) {
- int in_z = z * stride_d - pad_d + r;
- int in_y = y * stride_h - pad_h + p;
- int in_x = x * stride_w - pad_w + q;
+ int in_z = z * stride_d - pad_d + r * dilation_d;
+ int in_y = y * stride_h - pad_h + p * dilation_h;
+ int in_x = x * stride_w - pad_w + q * dilation_w;
if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1)
&& in_y >= 0 && in_y < in->shape(2 + has_depth)
&& in_x >= 0 && in_x < in->shape(3 + has_depth)) {
@@ -260,6 +264,50 @@ TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) {
}
}
+TYPED_TEST(ConvolutionLayerTest, TestDilatedConvolution) {
+ typedef typename TypeParam::Dtype Dtype;
+ vector<int> bottom_shape;
+ bottom_shape.push_back(2);
+ bottom_shape.push_back(3);
+ bottom_shape.push_back(8);
+ bottom_shape.push_back(7);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
+ for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) {
+ this->blob_bottom_vec_[i]->Reshape(bottom_shape);
+ }
+ LayerParameter layer_param;
+ ConvolutionParameter* convolution_param =
+ layer_param.mutable_convolution_param();
+ convolution_param->add_kernel_size(3);
+ convolution_param->add_dilation(2);
+ convolution_param->set_num_output(4);
+ convolution_param->mutable_weight_filler()->set_type("gaussian");
+ convolution_param->mutable_bias_filler()->set_type("constant");
+ convolution_param->mutable_bias_filler()->set_value(0.1);
+ shared_ptr<Layer<Dtype> > layer(
+ new ConvolutionLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ // Check against reference convolution.
+ const Dtype* top_data;
+ const Dtype* ref_top_data;
+ caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(),
+ this->MakeReferenceTop(this->blob_top_));
+ top_data = this->blob_top_->cpu_data();
+ ref_top_data = this->ref_blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4);
+ }
+ caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(),
+ this->MakeReferenceTop(this->blob_top_2_));
+ top_data = this->blob_top_2_->cpu_data();
+ ref_top_data = this->ref_blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4);
+ }
+}
+
TYPED_TEST(ConvolutionLayerTest, Test0DConvolution) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
@@ -345,6 +393,53 @@ TYPED_TEST(ConvolutionLayerTest, TestSimple3DConvolution) {
}
}
+TYPED_TEST(ConvolutionLayerTest, TestDilated3DConvolution) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_2_);
+ this->blob_top_vec_.push_back(this->blob_top_2_);
+ vector<int> bottom_shape(5);
+ bottom_shape[0] = this->blob_bottom_vec_[0]->shape(0);
+ bottom_shape[1] = this->blob_bottom_vec_[0]->shape(1);
+ bottom_shape[2] = 6;
+ bottom_shape[3] = 7;
+ bottom_shape[4] = 8;
+ FillerParameter filler_param;
+ GaussianFiller<Dtype> filler(filler_param);
+ for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) {
+ this->blob_bottom_vec_[i]->Reshape(bottom_shape);
+ filler.Fill(this->blob_bottom_vec_[i]);
+ }
+ LayerParameter layer_param;
+ ConvolutionParameter* convolution_param =
+ layer_param.mutable_convolution_param();
+ convolution_param->add_kernel_size(3);
+ convolution_param->add_dilation(2);
+ convolution_param->set_num_output(4);
+ convolution_param->mutable_weight_filler()->set_type("gaussian");
+ convolution_param->mutable_bias_filler()->set_type("gaussian");
+ shared_ptr<Layer<Dtype> > layer(
+ new ConvolutionLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ // Check against reference convolution.
+ const Dtype* top_data;
+ const Dtype* ref_top_data;
+ caffe_conv(this->blob_bottom_, convolution_param, layer->blobs(),
+ this->MakeReferenceTop(this->blob_top_));
+ top_data = this->blob_top_->cpu_data();
+ ref_top_data = this->ref_blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4);
+ }
+ caffe_conv(this->blob_bottom_2_, convolution_param, layer->blobs(),
+ this->MakeReferenceTop(this->blob_top_2_));
+ top_data = this->blob_top_2_->cpu_data();
+ ref_top_data = this->ref_blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_NEAR(top_data[i], ref_top_data[i], 1e-4);
+ }
+}
+
TYPED_TEST(ConvolutionLayerTest, Test1x1Convolution) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
@@ -629,6 +724,30 @@ TYPED_TEST(ConvolutionLayerTest, TestGradient) {
this->blob_top_vec_);
}
+TYPED_TEST(ConvolutionLayerTest, TestDilatedGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ConvolutionParameter* convolution_param =
+ layer_param.mutable_convolution_param();
+ vector<int> bottom_shape;
+ bottom_shape.push_back(2);
+ bottom_shape.push_back(3);
+ bottom_shape.push_back(5);
+ bottom_shape.push_back(6);
+ for (int i = 0; i < this->blob_bottom_vec_.size(); ++i) {
+ this->blob_bottom_vec_[i]->Reshape(bottom_shape);
+ }
+ convolution_param->add_kernel_size(3);
+ convolution_param->add_dilation(2);
+ convolution_param->set_num_output(2);
+ convolution_param->mutable_weight_filler()->set_type("gaussian");
+ convolution_param->mutable_bias_filler()->set_type("gaussian");
+ ConvolutionLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
TYPED_TEST(ConvolutionLayerTest, TestGradient3D) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
diff --git a/src/caffe/test/test_data_transformer.cpp b/src/caffe/test/test_data_transformer.cpp
index 8a10137..6103918 100644
--- a/src/caffe/test/test_data_transformer.cpp
+++ b/src/caffe/test/test_data_transformer.cpp
@@ -40,23 +40,21 @@ class DataTransformTest : public ::testing::Test {
int NumSequenceMatches(const TransformationParameter transform_param,
const Datum& datum, Phase phase) {
// Get crop sequence with Caffe seed 1701.
- DataTransformer<Dtype>* transformer =
- new DataTransformer<Dtype>(transform_param, phase);
+ DataTransformer<Dtype> transformer(transform_param, phase);
const int crop_size = transform_param.crop_size();
Caffe::set_random_seed(seed_);
- transformer->InitRand();
- Blob<Dtype>* blob =
- new Blob<Dtype>(1, datum.channels(), datum.height(), datum.width());
+ transformer.InitRand();
+ Blob<Dtype> blob(1, datum.channels(), datum.height(), datum.width());
if (transform_param.crop_size() > 0) {
- blob->Reshape(1, datum.channels(), crop_size, crop_size);
+ blob.Reshape(1, datum.channels(), crop_size, crop_size);
}
vector<vector<Dtype> > crop_sequence;
for (int iter = 0; iter < this->num_iter_; ++iter) {
vector<Dtype> iter_crop_sequence;
- transformer->Transform(datum, blob);
- for (int j = 0; j < blob->count(); ++j) {
- iter_crop_sequence.push_back(blob->cpu_data()[j]);
+ transformer.Transform(datum, &blob);
+ for (int j = 0; j < blob.count(); ++j) {
+ iter_crop_sequence.push_back(blob.cpu_data()[j]);
}
crop_sequence.push_back(iter_crop_sequence);
}
@@ -64,17 +62,14 @@ class DataTransformTest : public ::testing::Test {
int num_sequence_matches = 0;
for (int iter = 0; iter < this->num_iter_; ++iter) {
vector<Dtype> iter_crop_sequence = crop_sequence[iter];
- transformer->Transform(datum, blob);
- for (int j = 0; j < blob->count(); ++j) {
- num_sequence_matches +=
- (crop_sequence[iter][j] == blob->cpu_data()[j]);
+ transformer.Transform(datum, &blob);
+ for (int j = 0; j < blob.count(); ++j) {
+ num_sequence_matches += (crop_sequence[iter][j] == blob.cpu_data()[j]);
}
}
return num_sequence_matches;
}
- virtual ~DataTransformTest() { }
-
int seed_;
int num_iter_;
};
@@ -91,17 +86,16 @@ TYPED_TEST(DataTransformTest, TestEmptyTransform) {
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- transformer->Transform(datum, blob);
- EXPECT_EQ(blob->num(), 1);
- EXPECT_EQ(blob->channels(), datum.channels());
- EXPECT_EQ(blob->height(), datum.height());
- EXPECT_EQ(blob->width(), datum.width());
- for (int j = 0; j < blob->count(); ++j) {
- EXPECT_EQ(blob->cpu_data()[j], label);
+ Blob<TypeParam> blob(1, channels, height, width);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ transformer.Transform(datum, &blob);
+ EXPECT_EQ(blob.num(), 1);
+ EXPECT_EQ(blob.channels(), datum.channels());
+ EXPECT_EQ(blob.height(), datum.height());
+ EXPECT_EQ(blob.width(), datum.width());
+ for (int j = 0; j < blob.count(); ++j) {
+ EXPECT_EQ(blob.cpu_data()[j], label);
}
}
@@ -115,17 +109,16 @@ TYPED_TEST(DataTransformTest, TestEmptyTransformUniquePixels) {
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- Blob<TypeParam>* blob = new Blob<TypeParam>(1, 3, 4, 5);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- transformer->Transform(datum, blob);
- EXPECT_EQ(blob->num(), 1);
- EXPECT_EQ(blob->channels(), datum.channels());
- EXPECT_EQ(blob->height(), datum.height());
- EXPECT_EQ(blob->width(), datum.width());
- for (int j = 0; j < blob->count(); ++j) {
- EXPECT_EQ(blob->cpu_data()[j], j);
+ Blob<TypeParam> blob(1, 3, 4, 5);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ transformer.Transform(datum, &blob);
+ EXPECT_EQ(blob.num(), 1);
+ EXPECT_EQ(blob.channels(), datum.channels());
+ EXPECT_EQ(blob.height(), datum.height());
+ EXPECT_EQ(blob.width(), datum.width());
+ for (int j = 0; j < blob.count(); ++j) {
+ EXPECT_EQ(blob.cpu_data()[j], j);
}
}
@@ -141,19 +134,17 @@ TYPED_TEST(DataTransformTest, TestCropSize) {
transform_param.set_crop_size(crop_size);
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- Blob<TypeParam>* blob =
- new Blob<TypeParam>(1, channels, crop_size, crop_size);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ Blob<TypeParam> blob(1, channels, crop_size, crop_size);
for (int iter = 0; iter < this->num_iter_; ++iter) {
- transformer->Transform(datum, blob);
- EXPECT_EQ(blob->num(), 1);
- EXPECT_EQ(blob->channels(), datum.channels());
- EXPECT_EQ(blob->height(), crop_size);
- EXPECT_EQ(blob->width(), crop_size);
- for (int j = 0; j < blob->count(); ++j) {
- EXPECT_EQ(blob->cpu_data()[j], label);
+ transformer.Transform(datum, &blob);
+ EXPECT_EQ(blob.num(), 1);
+ EXPECT_EQ(blob.channels(), datum.channels());
+ EXPECT_EQ(blob.height(), crop_size);
+ EXPECT_EQ(blob.width(), crop_size);
+ for (int j = 0; j < blob.count(); ++j) {
+ EXPECT_EQ(blob.cpu_data()[j], label);
}
}
}
@@ -280,13 +271,12 @@ TYPED_TEST(DataTransformTest, TestMeanValue) {
transform_param.add_mean_value(mean_value);
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- transformer->Transform(datum, blob);
- for (int j = 0; j < blob->count(); ++j) {
- EXPECT_EQ(blob->cpu_data()[j], label - mean_value);
+ Blob<TypeParam> blob(1, channels, height, width);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ transformer.Transform(datum, &blob);
+ for (int j = 0; j < blob.count(); ++j) {
+ EXPECT_EQ(blob.cpu_data()[j], label - mean_value);
}
}
@@ -303,14 +293,13 @@ TYPED_TEST(DataTransformTest, TestMeanValues) {
transform_param.add_mean_value(2);
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- transformer->Transform(datum, blob);
+ Blob<TypeParam> blob(1, channels, height, width);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ transformer.Transform(datum, &blob);
for (int c = 0; c < channels; ++c) {
for (int j = 0; j < height * width; ++j) {
- EXPECT_EQ(blob->cpu_data()[blob->offset(0, c) + j], label - c);
+ EXPECT_EQ(blob.cpu_data()[blob.offset(0, c) + j], label - c);
}
}
}
@@ -325,8 +314,8 @@ TYPED_TEST(DataTransformTest, TestMeanFile) {
const int size = channels * height * width;
// Create a mean file
- string* mean_file = new string();
- MakeTempFilename(mean_file);
+ string mean_file;
+ MakeTempFilename(&mean_file);
BlobProto blob_mean;
blob_mean.set_num(1);
blob_mean.set_channels(channels);
@@ -337,19 +326,18 @@ TYPED_TEST(DataTransformTest, TestMeanFile) {
blob_mean.add_data(j);
}
- LOG(INFO) << "Using temporary mean_file " << *mean_file;
- WriteProtoToBinaryFile(blob_mean, *mean_file);
+ LOG(INFO) << "Using temporary mean_file " << mean_file;
+ WriteProtoToBinaryFile(blob_mean, mean_file);
- transform_param.set_mean_file(*mean_file);
+ transform_param.set_mean_file(mean_file);
Datum datum;
FillDatum(label, channels, height, width, unique_pixels, &datum);
- Blob<TypeParam>* blob = new Blob<TypeParam>(1, channels, height, width);
- DataTransformer<TypeParam>* transformer =
- new DataTransformer<TypeParam>(transform_param, TEST);
- transformer->InitRand();
- transformer->Transform(datum, blob);
- for (int j = 0; j < blob->count(); ++j) {
- EXPECT_EQ(blob->cpu_data()[j], 0);
+ Blob<TypeParam> blob(1, channels, height, width);
+ DataTransformer<TypeParam> transformer(transform_param, TEST);
+ transformer.InitRand();
+ transformer.Transform(datum, &blob);
+ for (int j = 0; j < blob.count(); ++j) {
+ EXPECT_EQ(blob.cpu_data()[j], 0);
}
}
diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp
index acd4b0f..dc7f5c4 100644
--- a/src/caffe/test/test_embed_layer.cpp
+++ b/src/caffe/test/test_embed_layer.cpp
@@ -12,10 +12,6 @@
namespace caffe {
-#ifndef CPU_ONLY
-extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
-#endif
-
template <typename TypeParam>
class EmbedLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;
diff --git a/src/caffe/test/test_im2col_kernel.cu b/src/caffe/test/test_im2col_kernel.cu
index 3f97cf6..e3a9791 100644
--- a/src/caffe/test/test_im2col_kernel.cu
+++ b/src/caffe/test/test_im2col_kernel.cu
@@ -18,6 +18,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
const int height_col, const int width_col,
Dtype* data_col);
@@ -25,19 +26,18 @@ template <typename Dtype, int num_axes>
__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col);
-
-extern cudaDeviceProp CAFFE_TEST_CUDA_PROP;
+ const int* dilation, Dtype* data_col);
template <typename Dtype>
class Im2colKernelTest : public GPUDeviceTest<Dtype> {
protected:
Im2colKernelTest()
// big so launches > 1024 threads
- : blob_bottom_(new Blob<Dtype>(5, 500, 10, 10)),
+ : blob_bottom_(new Blob<Dtype>(5, 500, 15, 15)),
blob_kernel_shape_(new Blob<int>()),
blob_stride_(new Blob<int>()),
blob_pad_(new Blob<int>()),
+ blob_dilation_(new Blob<int>()),
blob_top_(new Blob<Dtype>()),
blob_top_cpu_(new Blob<Dtype>()) {
FillerParameter filler_param;
@@ -47,20 +47,25 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
blob_kernel_shape_->Reshape(dim_blob_shape);
blob_stride_->Reshape(dim_blob_shape);
blob_pad_->Reshape(dim_blob_shape);
+ blob_dilation_->Reshape(dim_blob_shape);
height_ = blob_bottom_->height();
width_ = blob_bottom_->width();
channels_ = blob_bottom_->channels();
pad_ = 0;
stride_ = 2;
+ dilation_ = 3;
kernel_size_ = 3;
- height_col_ = (height_ + 2 * pad_ - kernel_size_) / stride_ + 1;
- width_col_ = (width_ + 2 * pad_ - kernel_size_) / stride_ + 1;
+ height_col_ = (height_ + 2 * pad_ -
+ (dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1;
+ width_col_ = (width_ + 2 * pad_ -
+ (dilation_ * (kernel_size_ - 1) + 1)) / stride_ + 1;
for (int i = 0; i < 2; ++i) {
blob_kernel_shape_->mutable_cpu_data()[i] = kernel_size_;
blob_stride_->mutable_cpu_data()[i] = stride_;
blob_pad_->mutable_cpu_data()[i] = pad_;
+ blob_dilation_->mutable_cpu_data()[i] = dilation_;
}
}
@@ -71,11 +76,13 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
delete blob_kernel_shape_;
delete blob_stride_;
delete blob_pad_;
+ delete blob_dilation_;
}
Blob<int>* const blob_kernel_shape_;
Blob<int>* const blob_stride_;
Blob<int>* const blob_pad_;
+ Blob<int>* const blob_dilation_;
Blob<Dtype>* const blob_bottom_;
Blob<Dtype>* const blob_top_;
Blob<Dtype>* const blob_top_cpu_;
@@ -84,6 +91,7 @@ class Im2colKernelTest : public GPUDeviceTest<Dtype> {
int channels_;
int pad_;
int stride_;
+ int dilation_;
int kernel_size_;
int height_col_;
int width_col_;
@@ -112,7 +120,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) {
im2col_cpu(this->blob_bottom_->cpu_data() + this->blob_bottom_->offset(n),
this->channels_, this->height_, this->width_,
this->kernel_size_, this->kernel_size_, this->pad_, this->pad_,
- this->stride_, this->stride_,
+ this->stride_, this->stride_, this->dilation_, this->dilation_,
cpu_data + this->blob_top_cpu_->offset(n));
}
@@ -129,6 +137,7 @@ TYPED_TEST(Im2colKernelTest, Test2D) {
num_kernels, bottom_data + this->blob_bottom_->offset(n),
this->height_, this->width_, this->kernel_size_, this->kernel_size_,
this->pad_, this->pad_, this->stride_, this->stride_,
+ this->dilation_, this->dilation_,
this->height_col_, this->width_col_,
top_data + this->blob_top_->offset(n));
CUDA_POST_KERNEL_CHECK;
@@ -165,6 +174,7 @@ TYPED_TEST(Im2colKernelTest, TestND) {
this->blob_top_cpu_->shape().data() + 1,
this->blob_kernel_shape_->cpu_data(),
this->blob_pad_->cpu_data(), this->blob_stride_->cpu_data(),
+ this->blob_dilation_->cpu_data(),
top_data_cpu + this->blob_top_cpu_->offset(n));
}
@@ -183,7 +193,7 @@ TYPED_TEST(Im2colKernelTest, TestND) {
num_kernels, bottom_data_gpu + this->blob_bottom_->offset(n),
this->blob_bottom_->gpu_shape() + 1, this->blob_top_->gpu_shape() + 1,
this->blob_kernel_shape_->gpu_data(), this->blob_pad_->gpu_data(),
- this->blob_stride_->gpu_data(),
+ this->blob_stride_->gpu_data(), this->blob_dilation_->gpu_data(),
top_data_gpu + this->blob_top_->offset(n));
CUDA_POST_KERNEL_CHECK;
}
diff --git a/src/caffe/test/test_im2col_layer.cpp b/src/caffe/test/test_im2col_layer.cpp
index 8274dd4..a7faf18 100644
--- a/src/caffe/test/test_im2col_layer.cpp
+++ b/src/caffe/test/test_im2col_layer.cpp
@@ -41,14 +41,21 @@ TYPED_TEST(Im2colLayerTest, TestSetup) {
LayerParameter layer_param;
ConvolutionParameter* convolution_param =
layer_param.mutable_convolution_param();
+ vector<int> bottom_shape;
+ bottom_shape.push_back(2);
+ bottom_shape.push_back(3);
+ bottom_shape.push_back(10);
+ bottom_shape.push_back(11);
+ this->blob_bottom_->Reshape(bottom_shape);
convolution_param->add_kernel_size(3);
convolution_param->add_stride(2);
+ convolution_param->add_dilation(3);
Im2colLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
EXPECT_EQ(this->blob_top_->num(), 2);
EXPECT_EQ(this->blob_top_->channels(), 27);
EXPECT_EQ(this->blob_top_->height(), 2);
- EXPECT_EQ(this->blob_top_->width(), 2);
+ EXPECT_EQ(this->blob_top_->width(), 3);
}
TYPED_TEST(Im2colLayerTest, TestForward) {
@@ -81,6 +88,26 @@ TYPED_TEST(Im2colLayerTest, TestGradient) {
this->blob_top_vec_);
}
+TYPED_TEST(Im2colLayerTest, TestDilatedGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ConvolutionParameter* convolution_param =
+ layer_param.mutable_convolution_param();
+ vector<int> bottom_shape;
+ bottom_shape.push_back(2);
+ bottom_shape.push_back(3);
+ bottom_shape.push_back(10);
+ bottom_shape.push_back(9);
+ this->blob_bottom_->Reshape(bottom_shape);
+ convolution_param->add_kernel_size(3);
+ convolution_param->add_stride(2);
+ convolution_param->add_dilation(3);
+ Im2colLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-2);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
TYPED_TEST(Im2colLayerTest, TestGradientForceND) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
@@ -95,6 +122,27 @@ TYPED_TEST(Im2colLayerTest, TestGradientForceND) {
this->blob_top_vec_);
}
+TYPED_TEST(Im2colLayerTest, TestDilatedGradientForceND) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ConvolutionParameter* convolution_param =
+ layer_param.mutable_convolution_param();
+ vector<int> bottom_shape;
+ bottom_shape.push_back(2);
+ bottom_shape.push_back(3);
+ bottom_shape.push_back(10);
+ bottom_shape.push_back(9);
+ this->blob_bottom_->Reshape(bottom_shape);
+ convolution_param->add_kernel_size(3);
+ convolution_param->add_stride(2);
+ convolution_param->add_dilation(3);
+ convolution_param->set_force_nd_im2col(true);
+ Im2colLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-2);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
TYPED_TEST(Im2colLayerTest, TestRect) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
@@ -113,7 +161,6 @@ TYPED_TEST(Im2colLayerTest, TestRect) {
}
}
-
TYPED_TEST(Im2colLayerTest, TestRectGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 21441b4..dd591f7 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -11,6 +11,7 @@
#include "caffe/layers/absval_layer.hpp"
#include "caffe/layers/bnll_layer.hpp"
#include "caffe/layers/dropout_layer.hpp"
+#include "caffe/layers/elu_layer.hpp"
#include "caffe/layers/exp_layer.hpp"
#include "caffe/layers/inner_product_layer.hpp"
#include "caffe/layers/log_layer.hpp"
@@ -259,6 +260,64 @@ TYPED_TEST(NeuronLayerTest, TestReLUGradientWithNegativeSlope) {
this->blob_top_vec_);
}
+TYPED_TEST(NeuronLayerTest, TestELU) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ CHECK(google::protobuf::TextFormat::ParseFromString(
+ "elu_param { alpha: 0.5 }", &layer_param));
+ ELULayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype kDelta = 2e-4;
+ // Now, check values
+ const Dtype* bottom_data = this->blob_bottom_->cpu_data();
+ const Dtype* top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ if (bottom_data[i] > 0) {
+ EXPECT_FLOAT_EQ(top_data[i], bottom_data[i]);
+ } else {
+ EXPECT_NEAR(top_data[i], 0.5 * (exp(bottom_data[i]) - 1), kDelta);
+ }
+ }
+}
+
+TYPED_TEST(NeuronLayerTest, TestELUasReLU) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ CHECK(google::protobuf::TextFormat::ParseFromString(
+ "elu_param { alpha: 0 }", &layer_param));
+ ELULayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ // Now, check values
+ const Dtype* bottom_data = this->blob_bottom_->cpu_data();
+ const Dtype* top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ EXPECT_GE(top_data[i], 0.);
+ EXPECT_TRUE(top_data[i] == 0 || top_data[i] == bottom_data[i]);
+ }
+}
+
+TYPED_TEST(NeuronLayerTest, TestELUGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ELULayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3, 1701, 0., 0.01);
+ checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(NeuronLayerTest, TestELUasReLUGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ CHECK(google::protobuf::TextFormat::ParseFromString(
+ "elu_param { alpha: 0 }", &layer_param));
+ ELULayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3, 1701, 0., 0.01);
+ checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
TYPED_TEST(NeuronLayerTest, TestSigmoid) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
diff --git a/src/caffe/test/test_scale_layer.cpp b/src/caffe/test/test_scale_layer.cpp
new file mode 100644
index 0000000..ad11679
--- /dev/null
+++ b/src/caffe/test/test_scale_layer.cpp
@@ -0,0 +1,507 @@
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/scale_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class ScaleLayerTest : public MultiDeviceTest<TypeParam> {
+ typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+ ScaleLayerTest()
+ : blob_bottom_(new Blob<Dtype>(2, 3, 4, 5)),
+ blob_bottom_eltwise_(new Blob<Dtype>(2, 3, 4, 5)),
+ blob_bottom_broadcast_0_(new Blob<Dtype>()),
+ blob_bottom_broadcast_1_(new Blob<Dtype>()),
+ blob_bottom_broadcast_2_(new Blob<Dtype>()),
+ blob_bottom_scale_(new Blob<Dtype>(vector<int>())),
+ blob_top_(new Blob<Dtype>()) {
+ Caffe::set_random_seed(1701);
+ vector<int> broadcast_shape(2);
+ broadcast_shape[0] = 2; broadcast_shape[1] = 3;
+ this->blob_bottom_broadcast_0_->Reshape(broadcast_shape);
+ broadcast_shape[0] = 3; broadcast_shape[1] = 4;
+ this->blob_bottom_broadcast_1_->Reshape(broadcast_shape);
+ broadcast_shape[0] = 4; broadcast_shape[1] = 5;
+ this->blob_bottom_broadcast_2_->Reshape(broadcast_shape);
+ FillerParameter filler_param;
+ filler_param.set_min(1);
+ filler_param.set_max(10);
+ UniformFiller<Dtype> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ filler.Fill(this->blob_bottom_eltwise_);
+ filler.Fill(this->blob_bottom_broadcast_0_);
+ filler.Fill(this->blob_bottom_broadcast_1_);
+ filler.Fill(this->blob_bottom_broadcast_2_);
+ filler.Fill(this->blob_bottom_scale_);
+ blob_bottom_vec_.push_back(blob_bottom_);
+ blob_top_vec_.push_back(blob_top_);
+ }
+ virtual ~ScaleLayerTest() {
+ delete blob_bottom_;
+ delete blob_bottom_eltwise_;
+ delete blob_bottom_broadcast_0_;
+ delete blob_bottom_broadcast_1_;
+ delete blob_bottom_broadcast_2_;
+ delete blob_bottom_scale_;
+ delete blob_top_;
+ }
+ Blob<Dtype>* const blob_bottom_;
+ Blob<Dtype>* const blob_bottom_eltwise_;
+ Blob<Dtype>* const blob_bottom_broadcast_0_;
+ Blob<Dtype>* const blob_bottom_broadcast_1_;
+ Blob<Dtype>* const blob_bottom_broadcast_2_;
+ Blob<Dtype>* const blob_bottom_scale_;
+ Blob<Dtype>* const blob_top_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(ScaleLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(ScaleLayerTest, TestForwardEltwise) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data_a = this->blob_bottom_->cpu_data();
+ const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardEltwiseInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_bottom_->cpu_data();
+ const int count = this->blob_bottom_->count();
+ const Dtype* in_data_a = orig_bottom.cpu_data();
+ const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestBackwardEltwiseInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ Blob<Dtype> top_diff(this->blob_bottom_->shape());
+ FillerParameter filler_param;
+ filler_param.set_type("gaussian");
+ filler_param.set_std(1);
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(&top_diff);
+ vector<bool> propagate_down(2, true);
+ // Run forward + backward without in-place computation;
+ // save resulting bottom diffs.
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_top_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ const bool kReshape = true;
+ const bool kCopyDiff = true;
+ Blob<Dtype> orig_bottom_diff;
+ orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
+ Blob<Dtype> orig_scale_diff;
+ orig_scale_diff.CopyFrom(*this->blob_bottom_eltwise_,
+ kCopyDiff, kReshape);
+ // Rerun forward + backward with in-place computation;
+ // check that resulting bottom diffs are the same.
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_bottom_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
+ this->blob_bottom_->cpu_diff()[i], 1e-5);
+ }
+ for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) {
+ EXPECT_NEAR(orig_scale_diff.cpu_diff()[i],
+ this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardEltwiseWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_axis(0);
+ scale_param->set_num_axes(-1);
+ scale_param->mutable_filler()->set_type("gaussian");
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data_a = this->blob_bottom_->cpu_data();
+ const Dtype* in_data_b = layer->blobs()[0]->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastBegin) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) *
+ this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddle) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(1);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) *
+ this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(1);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w),
+ orig_bottom.data_at(n, c, h, w) *
+ this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestBackwardBroadcastMiddleInPlace) {
+ typedef typename TypeParam::Dtype Dtype;
+ Blob<Dtype> orig_bottom(this->blob_bottom_->shape());
+ orig_bottom.CopyFrom(*this->blob_bottom_);
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(1);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ Blob<Dtype> top_diff(this->blob_bottom_->shape());
+ FillerParameter filler_param;
+ filler_param.set_type("gaussian");
+ filler_param.set_std(1);
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(&top_diff);
+ vector<bool> propagate_down(2, true);
+ // Run forward + backward without in-place computation;
+ // save resulting bottom diffs.
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_top_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ const bool kReshape = true;
+ const bool kCopyDiff = true;
+ Blob<Dtype> orig_bottom_diff;
+ orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape);
+ Blob<Dtype> orig_scale_diff;
+ orig_scale_diff.CopyFrom(*this->blob_bottom_broadcast_1_,
+ kCopyDiff, kReshape);
+ // Rerun forward + backward with in-place computation;
+ // check that resulting bottom diffs are the same.
+ this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ caffe_copy(top_diff.count(), top_diff.cpu_data(),
+ this->blob_bottom_->mutable_cpu_diff());
+ layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+ EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i],
+ this->blob_bottom_->cpu_diff()[i], 1e-5);
+ }
+ for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) {
+ EXPECT_NEAR(orig_scale_diff.cpu_diff()[i],
+ this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_axis(1);
+ scale_param->set_num_axes(2);
+ scale_param->mutable_filler()->set_type("gaussian");
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) *
+ layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParamAndBias) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_axis(1);
+ scale_param->set_num_axes(2);
+ scale_param->mutable_filler()->set_type("gaussian");
+ scale_param->set_bias_term(true);
+ scale_param->mutable_bias_filler()->set_type("gaussian");
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) *
+ layer->blobs()[0]->data_at(c, h, 0, 0) +
+ layer->blobs()[1]->data_at(c, h, 0, 0), 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardBroadcastEnd) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(2);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ for (int n = 0; n < this->blob_bottom_->num(); ++n) {
+ for (int c = 0; c < this->blob_bottom_->channels(); ++c) {
+ for (int h = 0; h < this->blob_bottom_->height(); ++h) {
+ for (int w = 0; w < this->blob_bottom_->width(); ++w) {
+ EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w),
+ this->blob_bottom_->data_at(n, c, h, w) *
+ this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0),
+ 1e-5);
+ }
+ }
+ }
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardScale) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_scale_);
+ LayerParameter layer_param;
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data = this->blob_bottom_->cpu_data();
+ const Dtype scale = *this->blob_bottom_scale_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestForwardScaleAxis2) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_scale_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(2);
+ shared_ptr<ScaleLayer<Dtype> > layer(new ScaleLayer<Dtype>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape());
+ layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ const Dtype* data = this->blob_top_->cpu_data();
+ const int count = this->blob_top_->count();
+ const Dtype* in_data = this->blob_bottom_->cpu_data();
+ const Dtype scale = *this->blob_bottom_scale_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+ EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5);
+ }
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientEltwise) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientEltwiseWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_axis(0);
+ scale_param->set_num_axes(-1);
+ scale_param->mutable_filler()->set_type("gaussian");
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientBroadcastBegin) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(0);
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddle) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(1);
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddleWithParam) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_);
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_axis(1);
+ scale_param->set_num_axes(2);
+ scale_param->mutable_filler()->set_type("gaussian");
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientBroadcastEnd) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(2);
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientScale) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_scale_);
+ LayerParameter layer_param;
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientScaleAndBias) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_scale_);
+ LayerParameter layer_param;
+ ScaleParameter* scale_param = layer_param.mutable_scale_param();
+ scale_param->set_bias_term(true);
+ scale_param->mutable_bias_filler()->set_type("gaussian");
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+TYPED_TEST(ScaleLayerTest, TestGradientScaleAxis2) {
+ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_vec_.push_back(this->blob_bottom_scale_);
+ LayerParameter layer_param;
+ layer_param.mutable_scale_param()->set_axis(2);
+ ScaleLayer<Dtype> layer(layer_param);
+ GradientChecker<Dtype> checker(1e-2, 1e-3);
+ checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+ this->blob_top_vec_);
+}
+
+} // namespace caffe
diff --git a/src/caffe/util/im2col.cpp b/src/caffe/util/im2col.cpp
index 27e5b7c..114a86c 100644
--- a/src/caffe/util/im2col.cpp
+++ b/src/caffe/util/im2col.cpp
@@ -5,26 +5,50 @@
namespace caffe {
+// Function uses casting from int to unsigned to compare if value of
+// parameter a is greater or equal to zero and lower than value of
+// parameter b. The b parameter is of type signed and is always positive,
+// therefore its value is always lower than 0x800... where casting
+// negative value of a parameter converts it to value higher than 0x800...
+// The casting allows to use one condition instead of two.
+inline bool is_a_ge_zero_and_a_lt_b(int a, int b) {
+ return static_cast<unsigned>(a) < static_cast<unsigned>(b);
+}
+
template <typename Dtype>
void im2col_cpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
Dtype* data_col) {
- const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
- const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
- const int channels_col = channels * kernel_h * kernel_w;
- for (int c_col = 0; c_col < channels_col; ++c_col) {
- int w_offset = c_col % kernel_w;
- int h_offset = (c_col / kernel_w) % kernel_h;
- int c_im = c_col / kernel_h / kernel_w;
- for (int h_col = 0; h_col < height_col; ++h_col) {
- for (int w_col = 0; w_col < width_col; ++w_col) {
- int h_im = h_col * stride_h - pad_h + h_offset;
- int w_im = w_col * stride_w - pad_w + w_offset;
- data_col[(c_col * height_col + h_col) * width_col + w_col] =
- (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
- data_im[(c_im * height + h_im) * width + w_im] : 0;
+ const int output_h = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int output_w = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channel_size = height * width;
+ for (int channel = channels; channel--; data_im += channel_size) {
+ for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
+ for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
+ int input_row = -pad_h + kernel_row * dilation_h;
+ for (int output_rows = output_h; output_rows; output_rows--) {
+ if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
+ for (int output_cols = output_w; output_cols; output_cols--) {
+ *(data_col++) = 0;
+ }
+ } else {
+ int input_col = -pad_w + kernel_col * dilation_w;
+ for (int output_col = output_w; output_col; output_col--) {
+ if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
+ *(data_col++) = data_im[input_row * width + input_col];
+ } else {
+ *(data_col++) = 0;
+ }
+ input_col += stride_w;
+ }
+ }
+ input_row += stride_h;
+ }
}
}
}
@@ -34,17 +58,19 @@ void im2col_cpu(const Dtype* data_im, const int channels,
template void im2col_cpu<float>(const float* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, float* data_col);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ float* data_col);
template void im2col_cpu<double>(const double* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, double* data_col);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ double* data_col);
template <typename Dtype>
inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
const int num_spatial_axes, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_output) {
+ const int* dilation, Dtype* data_output) {
if (!im2col) {
int im_size = im_shape[0];
for (int i = 0; i < num_spatial_axes; ++i) {
@@ -76,7 +102,8 @@ inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col,
bool is_padding = false;
for (int d_i = 0; d_i < num_spatial_axes; ++d_i) {
const int d = d_iter[d_i];
- const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i];
+ const int d_im = d * stride[d_i] - pad[d_i] +
+ d_offset[d_i] * dilation[d_i];
is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1];
index_col *= col_shape[d_i + 1];
index_col += d;
@@ -114,10 +141,10 @@ template <typename Dtype>
void im2col_nd_cpu(const Dtype* data_im, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col) {
+ const int* dilation, Dtype* data_col) {
const bool kIm2Col = true;
im2col_nd_core_cpu(data_im, kIm2Col, num_spatial_axes, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
}
// Explicit instantiation
@@ -125,34 +152,45 @@ template void im2col_nd_cpu<float>(const float* data_im,
const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- float* data_col);
+ const int* dilation, float* data_col);
template void im2col_nd_cpu<double>(const double* data_im,
const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- double* data_col);
+ const int* dilation, double* data_col);
template <typename Dtype>
void col2im_cpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
Dtype* data_im) {
caffe_set(height * width * channels, Dtype(0), data_im);
- const int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
- const int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
- const int channels_col = channels * kernel_h * kernel_w;
- for (int c_col = 0; c_col < channels_col; ++c_col) {
- int w_offset = c_col % kernel_w;
- int h_offset = (c_col / kernel_w) % kernel_h;
- int c_im = c_col / kernel_h / kernel_w;
- for (int h_col = 0; h_col < height_col; ++h_col) {
- for (int w_col = 0; w_col < width_col; ++w_col) {
- int h_im = h_col * stride_h - pad_h + h_offset;
- int w_im = w_col * stride_w - pad_w + w_offset;
- if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
- data_im[(c_im * height + h_im) * width + w_im] +=
- data_col[(c_col * height_col + h_col) * width_col + w_col];
+ const int output_h = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ const int output_w = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
+ const int channel_size = height * width;
+ for (int channel = channels; channel--; data_im += channel_size) {
+ for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) {
+ for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) {
+ int input_row = -pad_h + kernel_row * dilation_h;
+ for (int output_rows = output_h; output_rows; output_rows--) {
+ if (!is_a_ge_zero_and_a_lt_b(input_row, height)) {
+ data_col += output_w;
+ } else {
+ int input_col = -pad_w + kernel_col * dilation_w;
+ for (int output_col = output_w; output_col; output_col--) {
+ if (is_a_ge_zero_and_a_lt_b(input_col, width)) {
+ data_im[input_row * width + input_col] += *data_col;
+ }
+ data_col++;
+ input_col += stride_w;
+ }
+ }
+ input_row += stride_h;
+ }
}
}
}
@@ -162,20 +200,22 @@ void col2im_cpu(const Dtype* data_col, const int channels,
template void col2im_cpu<float>(const float* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, float* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ float* data_im);
template void col2im_cpu<double>(const double* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, double* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ double* data_im);
template <typename Dtype>
void col2im_nd_cpu(const Dtype* data_col, const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_im) {
+ const int* dilation, Dtype* data_im) {
const bool kIm2Col = false;
im2col_nd_core_cpu(data_col, kIm2Col, num_spatial_axes, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
}
// Explicit instantiation
@@ -183,12 +223,12 @@ template void col2im_nd_cpu<float>(const float* data_col,
const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- float* data_im);
+ const int* dilation, float* data_im);
template void col2im_nd_cpu<double>(const double* data_col,
const int num_spatial_axes,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- double* data_im);
+ const int* dilation, double* data_im);
} // namespace caffe
diff --git a/src/caffe/util/im2col.cu b/src/caffe/util/im2col.cu
index 49354ab..a8f30a0 100644
--- a/src/caffe/util/im2col.cu
+++ b/src/caffe/util/im2col.cu
@@ -10,6 +10,7 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
const int height_col, const int width_col,
Dtype* data_col) {
CUDA_KERNEL_LOOP(index, n) {
@@ -26,11 +27,11 @@ __global__ void im2col_gpu_kernel(const int n, const Dtype* data_im,
data_im_ptr += (c_im * height + h_offset) * width + w_offset;
for (int i = 0; i < kernel_h; ++i) {
for (int j = 0; j < kernel_w; ++j) {
- int h_im = h_offset + i;
- int w_im = w_offset + j;
+ int h_im = h_offset + i * dilation_h;
+ int w_im = w_offset + j * dilation_w;
*data_col_ptr =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
- data_im_ptr[i * width + j] : 0;
+ data_im_ptr[i * dilation_h * width + j * dilation_w] : 0;
data_col_ptr += height_col * width_col;
}
}
@@ -42,17 +43,20 @@ void im2col_gpu(const Dtype* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
Dtype* data_col) {
// We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid.
- int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
- int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+ int height_col = (height + 2 * pad_h -
+ (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
+ int width_col = (width + 2 * pad_w -
+ (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
int num_kernels = channels * height_col * width_col;
// NOLINT_NEXT_LINE(whitespace/operators)
im2col_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels),
CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h,
- pad_w, stride_h, stride_w, height_col,
+ pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col,
width_col, data_col);
CUDA_POST_KERNEL_CHECK;
}
@@ -61,19 +65,39 @@ void im2col_gpu(const Dtype* data_im, const int channels,
template void im2col_gpu<float>(const float* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* data_col);
+ const int dilation_h, const int dilation_w, float* data_col);
template void im2col_gpu<double>(const double* data_im, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- double* data_col);
+ const int dilation_h, const int dilation_w, double* data_col);
template <typename Dtype, int num_axes>
__global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col) {
+ const int* dilation, Dtype* data_col) {
int d_temp[num_axes]; // NOLINT(runtime/arrays)
int d_iter[num_axes]; // NOLINT(runtime/arrays)
+
+ __shared__ int shared_dilation[num_axes];
+ __shared__ int shared_kernel_shape[num_axes];
+ __shared__ int shared_pad[num_axes];
+ __shared__ int shared_stride[num_axes];
+ __shared__ int shared_col_shape[num_axes + 1];
+ __shared__ int shared_im_shape[num_axes + 1];
+
+ if (threadIdx.x < num_axes) {
+ shared_dilation[threadIdx.x] = dilation[threadIdx.x];
+ shared_kernel_shape[threadIdx.x] = kernel_shape[threadIdx.x];
+ shared_pad[threadIdx.x] = pad[threadIdx.x];
+ shared_stride[threadIdx.x] = stride[threadIdx.x];
+ }
+ if (threadIdx.x < num_axes + 1) {
+ shared_col_shape[threadIdx.x] = col_shape[threadIdx.x];
+ shared_im_shape[threadIdx.x] = im_shape[threadIdx.x];
+ }
+ __syncthreads();
+
int i;
CUDA_KERNEL_LOOP(index, n) {
// Initialize channel_in, computed in the loop below, with intermediate
@@ -81,19 +105,19 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
int channel_in = index;
int channel_out = 1;
for (i = num_axes - 1; i >= 0; --i) {
- d_temp[i] = channel_in % col_shape[i + 1];
- channel_in /= col_shape[i + 1];
- channel_out *= kernel_shape[i];
+ d_temp[i] = channel_in % shared_col_shape[i + 1];
+ channel_in /= shared_col_shape[i + 1];
+ channel_out *= shared_kernel_shape[i];
}
channel_out *= channel_in;
int data_col_inc = 1;
for (i = 0; i < num_axes; ++i) {
- channel_out *= col_shape[i + 1];
+ channel_out *= shared_col_shape[i + 1];
channel_out += d_temp[i];
- d_temp[i] = d_temp[i] * stride[i] - pad[i];
- channel_in *= im_shape[i + 1];
+ d_temp[i] = d_temp[i] * shared_stride[i] - shared_pad[i];
+ channel_in *= shared_im_shape[i + 1];
channel_in += d_temp[i];
- data_col_inc *= col_shape[i + 1];
+ data_col_inc *= shared_col_shape[i + 1];
d_iter[i] = 0;
}
Dtype* data_col_ptr = data_col + channel_out;
@@ -102,15 +126,15 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
do {
bool in_range = true;
for (i = 0; i < num_axes; ++i) {
- const int d_iter_im = d_iter[i] + d_temp[i];
- in_range &= d_iter_im >= 0 && d_iter_im < im_shape[i + 1];
+ const int d_iter_im = d_iter[i] * shared_dilation[i] + d_temp[i];
+ in_range &= d_iter_im >= 0 && d_iter_im < shared_im_shape[i + 1];
if (!in_range) { break; }
}
if (in_range) {
- int data_im_offset = d_iter[0];
+ int data_im_offset = d_iter[0] * shared_dilation[0];
for (i = 1; i < num_axes; ++i) {
- data_im_offset *= im_shape[i + 1];
- data_im_offset += d_iter[i];
+ data_im_offset *= shared_im_shape[i + 1];
+ data_im_offset += d_iter[i] * shared_dilation[i];
}
*data_col_ptr = data_im_ptr[data_im_offset];
} else {
@@ -119,7 +143,7 @@ __global__ void im2col_nd_gpu_kernel(const int n, const Dtype* data_im,
data_col_ptr += data_col_inc;
incremented = false;
for (i = num_axes - 1; i >= 0; --i) {
- const int d_max = kernel_shape[i];
+ const int d_max = shared_kernel_shape[i];
if (d_iter[i] == d_max - 1) {
d_iter[i] = 0;
} else { // d_iter[i] < d_max - 1
@@ -136,67 +160,69 @@ template <typename Dtype>
void im2col_nd_gpu(const Dtype* data_im, const int num_spatial_axes,
const int num_kernels, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_col) {
+ const int* dilation, Dtype* data_col) {
+ // num_axes should be smaller than block size
+ DCHECK_LT(num_spatial_axes, CAFFE_CUDA_NUM_THREADS);
switch (num_spatial_axes) {
case 1:
im2col_nd_gpu_kernel<Dtype, 1> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 2:
im2col_nd_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 3:
im2col_nd_gpu_kernel<Dtype, 3> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 4:
im2col_nd_gpu_kernel<Dtype, 4> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 5:
im2col_nd_gpu_kernel<Dtype, 5> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 6:
im2col_nd_gpu_kernel<Dtype, 6> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 7:
im2col_nd_gpu_kernel<Dtype, 7> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 8:
im2col_nd_gpu_kernel<Dtype, 8> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 9:
im2col_nd_gpu_kernel<Dtype, 9> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
case 10:
im2col_nd_gpu_kernel<Dtype, 10> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(num_kernels), CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_im, im_shape, col_shape,
- kernel_shape, pad, stride, data_col);
+ kernel_shape, pad, stride, dilation, data_col);
break;
default:
LOG(FATAL) << "im2col_nd_gpu does not support computation with "
@@ -210,12 +236,12 @@ template void im2col_nd_gpu<float>(const float* data_im,
const int num_spatial_axes, const int col_size,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- float* data_col);
+ const int* dilation, float* data_col);
template void im2col_nd_gpu<double>(const double* data_im,
const int num_spatial_axes, const int col_size,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- double* data_col);
+ const int* dilation, double* data_col);
template <typename Dtype>
__global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
@@ -223,6 +249,7 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w,
const int stride_h, const int stride_w,
+ const int dilation_h, const int dilation_w,
const int height_col, const int width_col,
Dtype* data_im) {
CUDA_KERNEL_LOOP(index, n) {
@@ -230,33 +257,27 @@ __global__ void col2im_gpu_kernel(const int n, const Dtype* data_col,
const int w_im = index % width + pad_w;
const int h_im = (index / width) % height + pad_h;
const int c_im = index / (width * height);
+ int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
+ int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
// compute the start and end of the output
const int w_col_start =
- (w_im < kernel_w) ? 0 : (w_im - kernel_w) / stride_w + 1;
- const int w_col_end =
- min(w_im / stride_w + 1, width_col);
+ (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
+ const int w_col_end = min(w_im / stride_w + 1, width_col);
const int h_col_start =
- (h_im < kernel_h) ? 0 : (h_im - kernel_h) / stride_h + 1;
- const int h_col_end =
- min(h_im / stride_h + 1, height_col);
- /*
- for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
- for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
- // the col location: [c * width * height + h_out, w_out]
- int c_col = c_im * kernel_h * kernel_w
- + (h_im - h_col * stride_h) * kernel_w + (w_im - w_col * stride_w);
- val += data_col[(c_col * height_col + h_col) * width_col + w_col];
- }
- }
- */
- // equivalent implementation
- int offset = (c_im * kernel_h * kernel_w + h_im * kernel_w + w_im)
- * height_col * width_col;
- int coeff_h_col = (1 - stride_h * kernel_w * height_col) * width_col;
- int coeff_w_col = (1 - stride_w * height_col * width_col);
- for (int h_col = h_col_start; h_col < h_col_end; ++h_col) {
- for (int w_col = w_col_start; w_col < w_col_end; ++w_col) {
- val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col];
+ (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
+ const int h_col_end = min(h_im / stride_h + 1, height_col);
+ // TODO: use LCM of stride and dilation to avoid unnecessary loops
+ for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
+ for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
+ int h_k = (h_im - h_col * stride_h);
+ int w_k = (w_im - w_col * stride_w);
+ if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
+ h_k /= dilation_h;
+ w_k /= dilation_w;
+ int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
+ height_col + h_col) * width_col + w_col;
+ val += data_col[data_col_index];
+ }
}
}
data_im[index] = val;
@@ -267,9 +288,12 @@ template <typename Dtype>
void col2im_gpu(const Dtype* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, Dtype* data_im) {
- int height_col = (height + 2 * pad_h - kernel_h) / stride_h + 1;
- int width_col = (width + 2 * pad_w - kernel_w) / stride_w + 1;
+ const int stride_w, const int dilation_h, const int dilation_w,
+ Dtype* data_im) {
+ int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) /
+ stride_h + 1;
+ int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) /
+ stride_w + 1;
int num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
@@ -277,7 +301,7 @@ void col2im_gpu(const Dtype* data_col, const int channels,
col2im_gpu_kernel<Dtype><<<CAFFE_GET_BLOCKS(num_kernels),
CAFFE_CUDA_NUM_THREADS>>>(
num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
- pad_h, pad_w, stride_h, stride_w,
+ pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
height_col, width_col, data_im);
CUDA_POST_KERNEL_CHECK;
}
@@ -286,37 +310,62 @@ void col2im_gpu(const Dtype* data_col, const int channels,
template void col2im_gpu<float>(const float* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, float* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ float* data_im);
template void col2im_gpu<double>(const double* data_col, const int channels,
const int height, const int width, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w, const int stride_h,
- const int stride_w, double* data_im);
+ const int stride_w, const int dilation_h, const int dilation_w,
+ double* data_im);
template <typename Dtype, int num_axes>
__global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_im) {
+ const int* dilation, Dtype* data_im) {
int d_im[num_axes]; // NOLINT(runtime/arrays)
int d_col_iter[num_axes]; // NOLINT(runtime/arrays)
int d_col_start[num_axes]; // NOLINT(runtime/arrays)
int d_col_end[num_axes]; // NOLINT(runtime/arrays)
+
+ __shared__ int shared_dilation[num_axes];
+ __shared__ int shared_kernel_shape[num_axes];
+ __shared__ int shared_pad[num_axes];
+ __shared__ int shared_stride[num_axes];
+ __shared__ int shared_col_shape[num_axes + 1];
+ __shared__ int shared_im_shape[num_axes + 1];
+
+ if (threadIdx.x < num_axes) {
+ shared_dilation[threadIdx.x] = dilation[threadIdx.x];
+ shared_kernel_shape[threadIdx.x] = kernel_shape[threadIdx.x];
+ shared_pad[threadIdx.x] = pad[threadIdx.x];
+ shared_stride[threadIdx.x] = stride[threadIdx.x];
+ }
+ if (threadIdx.x < num_axes + 1) {
+ shared_col_shape[threadIdx.x] = col_shape[threadIdx.x];
+ shared_im_shape[threadIdx.x] = im_shape[threadIdx.x];
+ }
+ __syncthreads();
+
CUDA_KERNEL_LOOP(index, n) {
// Initialize channel_in, computed in the loop below, with intermediate
// computations used to compute the spatial indices.
int c_im = index;
// Calculate d_im (image dimensions).
for (int i = num_axes - 1; i >= 0; --i) {
- d_im[i] = c_im % im_shape[i + 1] + pad[i];
- c_im /= im_shape[i + 1];
+ d_im[i] = c_im % shared_im_shape[i + 1] + shared_pad[i];
+ c_im /= shared_im_shape[i + 1];
}
// Calculate col start/end indices.
bool done = false;
for (int i = 0; i < num_axes; ++i) {
+ const int kernel_extent =
+ shared_dilation[i] * (shared_kernel_shape[i] - 1) + 1;
d_col_start[i] = d_col_iter[i] =
- (d_im[i] < kernel_shape[i]) ?
- 0 : (d_im[i] - kernel_shape[i]) / stride[i] + 1;
- d_col_end[i] = min(d_im[i] / stride[i] + 1, col_shape[i + 1]);
+ (d_im[i] < kernel_extent) ? 0 :
+ (d_im[i] - kernel_extent) / shared_stride[i] + 1;
+ d_col_end[i] =
+ min(d_im[i] / shared_stride[i] + 1, shared_col_shape[i + 1]);
if (d_col_start[i] >= d_col_end[i]) {
// Skip computation if the dimension is 0 at any spatial axis --
// final val will be 0.
@@ -331,21 +380,32 @@ __global__ void col2im_nd_gpu_kernel(const int n, const Dtype* data_col,
// Loop over the col to compute the output val.
Dtype val = 0;
bool incremented = true;
+ bool skip = false;
do {
// Compute the final offset.
int final_offset = 0;
int kernel_shape_prod = 1;
+ int kernel_index;
for (int i = num_axes - 1; i >= 0; --i) {
- final_offset +=
- (d_im[i] - d_col_iter[i] * stride[i]) * kernel_shape_prod;
- kernel_shape_prod *= kernel_shape[i];
+ kernel_index = d_im[i] - d_col_iter[i] * shared_stride[i];
+ if (kernel_index % shared_dilation[i]) {
+ skip = true;
+ break;
+ } else {
+ kernel_index /= shared_dilation[i];
+ final_offset += kernel_index * kernel_shape_prod;
+ kernel_shape_prod *= shared_kernel_shape[i];
+ }
}
- final_offset += kernel_shape_prod * c_im;
- for (int i = 0; i < num_axes; ++i) {
- final_offset *= col_shape[i + 1];
- final_offset += d_col_iter[i];
+ if (!skip) {
+ final_offset += kernel_shape_prod * c_im;
+ for (int i = 0; i < num_axes; ++i) {
+ final_offset *= shared_col_shape[i + 1];
+ final_offset += d_col_iter[i];
+ }
+ val += data_col[final_offset];
}
- val += data_col[final_offset];
+ skip = false;
incremented = false;
for (int i = num_axes - 1; i >= 0; --i) {
const int d_max = d_col_end[i];
@@ -366,67 +426,69 @@ template <typename Dtype>
void col2im_nd_gpu(const Dtype* data_col, const int num_spatial_axes,
const int im_size, const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- Dtype* data_im) {
+ const int* dilation, Dtype* data_im) {
+ // num_axes should be smaller than block size
+ DCHECK_LT(num_spatial_axes, CAFFE_CUDA_NUM_THREADS);
switch (num_spatial_axes) {
case 1:
col2im_nd_gpu_kernel<Dtype, 1> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 2:
col2im_nd_gpu_kernel<Dtype, 2> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 3:
col2im_nd_gpu_kernel<Dtype, 3> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 4:
col2im_nd_gpu_kernel<Dtype, 4> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 5:
col2im_nd_gpu_kernel<Dtype, 5> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 6:
col2im_nd_gpu_kernel<Dtype, 6> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 7:
col2im_nd_gpu_kernel<Dtype, 7> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 8:
col2im_nd_gpu_kernel<Dtype, 8> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 9:
col2im_nd_gpu_kernel<Dtype, 9> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
case 10:
col2im_nd_gpu_kernel<Dtype, 10> // NOLINT_NEXT_LINE(whitespace/operators)
<<<CAFFE_GET_BLOCKS(im_size), CAFFE_CUDA_NUM_THREADS>>>(
im_size, data_col, im_shape, col_shape,
- kernel_shape, pad, stride, data_im);
+ kernel_shape, pad, stride, dilation, data_im);
break;
default:
LOG(FATAL) << "col2im_nd_gpu does not support computation with "
@@ -440,11 +502,11 @@ template void col2im_nd_gpu<float>(const float* data_col,
const int num_spatial_axes, const int im_size,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- float* data_im);
+ const int* dilation, float* data_im);
template void col2im_nd_gpu<double>(const double* data_col,
const int num_spatial_axes, const int im_size,
const int* im_shape, const int* col_shape,
const int* kernel_shape, const int* pad, const int* stride,
- double* data_im);
+ const int* dilation, double* data_im);
} // namespace caffe
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 305cfc3..470165a 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -3,6 +3,7 @@
namespace bp = boost::python;
#endif
+#include <gflags/gflags.h>
#include <glog/logging.h>
#include <cstring>
@@ -164,7 +165,7 @@ int train() {
if (FLAGS_gpu.size() == 0
&& solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
if (solver_param.has_device_id()) {
- FLAGS_gpu = "" +
+ FLAGS_gpu = "" +
boost::lexical_cast<string>(solver_param.device_id());
} else { // Set default GPU if unspecified
FLAGS_gpu = "" + boost::lexical_cast<string>(0);
@@ -378,6 +379,8 @@ RegisterBrewFunction(time);
int main(int argc, char** argv) {
// Print output to stderr (while still logging).
FLAGS_alsologtostderr = 1;
+ // Set version
+ gflags::SetVersionString(AS_STRING(CAFFE_VERSION));
// Usage message.
gflags::SetUsageMessage("command line brew\n"
"usage: caffe <command> <args>\n\n"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/caffe-contrib.git
More information about the debian-science-commits
mailing list