[caffe] 01/05: patches: Apply updates from upstream master branch. Remove merged patch cmake-change-static-lib-name.patch.
Zhou Mo
cdluminate-guest at moszumanska.debian.org
Tue Dec 19 02:48:13 UTC 2017
This is an automated email from the git hooks/post-receive script.
cdluminate-guest pushed a commit to branch master
in repository caffe.
commit 052fec4f55fc211e0209dd3d834e9035f5917671
Author: Mo Zhou <cdluminate at gmail.com>
Date: Tue Dec 19 02:28:35 2017 +0000
patches: Apply updates from upstream master branch. Remove merged patch cmake-change-static-lib-name.patch.
---
debian/patches/cmake-change-static-lib-name.patch | 46 -
debian/patches/series | 3 +-
debian/patches/upstream-master-1.0-to-head.patch | 1651 +++++++++++++++++++++
3 files changed, 1653 insertions(+), 47 deletions(-)
diff --git a/debian/patches/cmake-change-static-lib-name.patch b/debian/patches/cmake-change-static-lib-name.patch
deleted file mode 100644
index 1c892e5..0000000
--- a/debian/patches/cmake-change-static-lib-name.patch
+++ /dev/null
@@ -1,46 +0,0 @@
-Purpose: change the statis library name so it would not be ambiguous.
-Forward: This is already merged upstream.
-diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
-index b9152e9..4a80556 100644
---- a/src/caffe/CMakeLists.txt
-+++ b/src/caffe/CMakeLists.txt
-@@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto)
- caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files})
-
- # include python files either to force generation
--add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
--caffe_default_properties(proto)
--target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES})
--target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR})
-+add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
-+caffe_default_properties(caffeproto)
-+target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES})
-+target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR})
-
--list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend!
-+list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend!
-
- # --[ Caffe library
-
-@@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES
- # ---[ Install
- install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
- install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto)
--install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
-+install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
-
- file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
- list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)
-diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
-index ad91f54..09bb09b 100644
---- a/cmake/ConfigGen.cmake
-+++ b/cmake/ConfigGen.cmake
-@@ -33,7 +33,7 @@ function(caffe_generate_export_configs)
- configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY)
-
- # Add targets to the build-tree export set
-- export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
-+ export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
- export(PACKAGE Caffe)
-
- # ---[ Configure install-tree CaffeConfig.cmake file ]---
diff --git a/debian/patches/series b/debian/patches/series
index 10d6a91..3f0216c 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -1,5 +1,6 @@
+upstream-master-1.0-to-head.patch
+
cmake-using-basic-blas
cmake-fix-python-module-installdir
fix-more-float-comparison-precision-issue2
cmake-link-correct-python-lib.patch
-cmake-change-static-lib-name.patch
diff --git a/debian/patches/upstream-master-1.0-to-head.patch b/debian/patches/upstream-master-1.0-to-head.patch
new file mode 100644
index 0000000..d3fed86
--- /dev/null
+++ b/debian/patches/upstream-master-1.0-to-head.patch
@@ -0,0 +1,1651 @@
+diff --git a/Makefile b/Makefile
+index 4d324160..c6d5685b 100644
+--- a/Makefile
++++ b/Makefile
+@@ -694,6 +694,6 @@ $(DISTRIBUTE_DIR): all py | $(DISTRIBUTE_SUBDIRS)
+ install -m 644 $(DYNAMIC_NAME) $(DISTRIBUTE_DIR)/lib
+ cd $(DISTRIBUTE_DIR)/lib; rm -f $(DYNAMIC_NAME_SHORT); ln -s $(DYNAMIC_VERSIONED_NAME_SHORT) $(DYNAMIC_NAME_SHORT)
+ # add python - it's not the standard way, indeed...
+- cp -r python $(DISTRIBUTE_DIR)/python
++ cp -r python $(DISTRIBUTE_DIR)/
+
+ -include $(DEPS)
+diff --git a/Makefile.config.example b/Makefile.config.example
+index d552b38a..79905935 100644
+--- a/Makefile.config.example
++++ b/Makefile.config.example
+@@ -33,6 +33,7 @@ CUDA_DIR := /usr/local/cuda
+ # CUDA architecture setting: going with all of them.
+ # For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
+ # For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
++# For CUDA >= 9.0, comment the *_20 and *_21 lines for compatibility.
+ CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
+ -gencode arch=compute_20,code=sm_21 \
+ -gencode arch=compute_30,code=sm_30 \
+diff --git a/README.md b/README.md
+index 0ae3616b..fe259535 100644
+--- a/README.md
++++ b/README.md
+@@ -15,6 +15,14 @@ Check out the [project site](http://caffe.berkeleyvision.org) for all the detail
+
+ and step-by-step examples.
+
++## Custom distributions
++
++ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, SKX, Xeon Phi).
++- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
++- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
++
++## Community
++
+ [](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
+ Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models.
+diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
+index ad91f542..09bb09b4 100644
+--- a/cmake/ConfigGen.cmake
++++ b/cmake/ConfigGen.cmake
+@@ -33,7 +33,7 @@ function(caffe_generate_export_configs)
+ configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY)
+
+ # Add targets to the build-tree export set
+- export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
++ export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
+ export(PACKAGE Caffe)
+
+ # ---[ Configure install-tree CaffeConfig.cmake file ]---
+diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
+index 4a5bac47..c48255c8 100644
+--- a/cmake/Dependencies.cmake
++++ b/cmake/Dependencies.cmake
+@@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "")
+ set(Caffe_COMPILE_OPTIONS "")
+
+ # ---[ Boost
+-find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem)
++find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem)
+ list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS})
+ list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES})
+
+diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
+index 8eaab594..4d44e613 100644
+--- a/cmake/Modules/FindvecLib.cmake
++++ b/cmake/Modules/FindvecLib.cmake
+@@ -12,11 +12,12 @@ endif()
+
+ set(__veclib_include_suffix "Frameworks/vecLib.framework/Versions/Current/Headers")
+
++exec_program(xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
+ find_path(vecLib_INCLUDE_DIR vecLib.h
+ DOC "vecLib include directory"
+ PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
+ /System/Library/${__veclib_include_suffix}
+- /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
++ ${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+ NO_DEFAULT_PATH)
+
+ include(FindPackageHandleStandardArgs)
+diff --git a/docs/development.md b/docs/development.md
+index ec05bbee..36cd3995 100644
+--- a/docs/development.md
++++ b/docs/development.md
+@@ -116,5 +116,5 @@ To get a list of all options `googletest` provides, simply pass the `--help` fla
+
+ - **Run `make lint` to check C++ code.**
+ - Wrap lines at 80 chars.
+-- Follow [Google C++ style](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml) and [Google python style](http://google-styleguide.googlecode.com/svn/trunk/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/).
++- Follow [Google C++ style](https://google.github.io/styleguide/cppguide.html) and [Google python style](https://google.github.io/styleguide/pyguide.html) + [PEP 8](http://legacy.python.org/dev/peps/pep-0008/).
+ - Remember that “a foolish consistency is the hobgoblin of little minds,” so use your best judgement to write the clearest code for your particular case.
+diff --git a/docs/install_apt.md b/docs/install_apt.md
+index ee2cd287..e361a92d 100644
+--- a/docs/install_apt.md
++++ b/docs/install_apt.md
+@@ -4,10 +4,43 @@ title: "Installation: Ubuntu"
+
+ # Ubuntu Installation
+
++### For Ubuntu (>= 17.04)
++
++**Installing pre-compiled Caffe**
++
++Everything including caffe itself is packaged in 17.04 and higher versions.
++To install pre-compiled Caffe package, just do it by
++
++ sudo apt install caffe-cpu
++
++for CPU-only version, or
++
++ sudo apt install caffe-cuda
++
++for CUDA version. Note, the cuda version may break if your NVIDIA driver
++and CUDA toolkit are not installed by APT.
++
++[Package status of CPU-only version](https://launchpad.net/ubuntu/+source/caffe)
++
++[Package status of CUDA version](https://launchpad.net/ubuntu/+source/caffe-contrib)
++
++**Installing Caffe from source**
++
++We may install the dependencies by merely one line
++
++ sudo apt build-dep caffe-cpu # dependencies for CPU-only version
++ sudo apt build-dep caffe-cuda # dependencies for CUDA version
++
++It requires a `deb-src` line in your `sources.list`.
++Continue with [compilation](installation.html#compilation).
++
++### For Ubuntu (\< 17.04)
++
+ **General dependencies**
+
+ sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
+ sudo apt-get install --no-install-recommends libboost-all-dev
++ sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
+
+ **CUDA**: Install by `apt-get` or the NVIDIA `.run` package.
+ The NVIDIA package tends to follow more recent library and driver versions, but the installation is more manual.
+@@ -22,12 +55,6 @@ This can be skipped for CPU-only installation.
+
+ CUDA 8 is required on Ubuntu 16.04.
+
+-**Remaining dependencies, 14.04**
+-
+-Everything is packaged in 14.04.
+-
+- sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
+-
+ **Remaining dependencies, 12.04**
+
+ These dependencies need manual installation in 12.04.
+diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
+index 65fe7092..0a6a3b96 100644
+--- a/docs/install_apt_debian.md
++++ b/docs/install_apt_debian.md
+@@ -8,24 +8,28 @@ Caffe packages are available for several Debian versions, as shown in the
+ following chart:
+
+ ```
+-Your Distro | CPU_ONLY | CUDA | Alias
++Your Distro | CPU_ONLY | CUDA | Codename
+ ----------------+------------+--------+-------------------
+-Debian/stable | ✘ | ✘ | Debian Jessie
+-Debian/testing | ✔ | ✔ | Debian Stretch/Sid
+-Debian/unstable | ✔ | ✔ | Debian Sid
++Debian/oldstable| ✘ | ✘ | Jessie (8.0)
++Debian/stable | ✔ | ✔ | Stretch (9.0)
++Debian/testing | ✔ | ✔ | Buster
++Debian/unstable | ✔ | ✔ | Buster
+ ```
+
+ * `✘ ` You should take a look at [Ubuntu installation instruction](install_apt.html).
+
+ * `✔ ` You can install caffe with a single command line following this guide.
+
+-Last update: 2017-02-01
++* [Package status of CPU-only version](https://tracker.debian.org/pkg/caffe)
++
++* [Package status of CUDA version](https://tracker.debian.org/pkg/caffe-contrib)
++
++Last update: 2017-07-08
+
+ ## Binary installation with APT
+
+-Apart from the installation methods based on source, Debian/unstable
+-and Debian/testing users can install pre-compiled Caffe packages from
+-the official archive.
++Apart from the installation methods based on source, Debian users can install
++pre-compiled Caffe packages from the official archive with APT.
+
+ Make sure that your `/etc/apt/sources.list` contains `contrib` and `non-free`
+ sections if you want to install the CUDA version, for instance:
+@@ -44,7 +48,8 @@ $ caffe # command line interface wo
+ $ python3 -c 'import caffe; print(caffe.__path__)' # python3 interface working
+ ```
+
+-These Caffe packages should work for you out of box.
++These Caffe packages should work for you out of box. However, the CUDA version
++may break if your NVIDIA driver and CUDA toolkit are not installed with APT.
+
+ #### Customizing caffe packages
+
+@@ -96,18 +101,22 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
+ Some users may find their favorate compiler doesn't work with CUDA.
+
+ ```
+-CXX compiler | CUDA 7.5 | CUDA 8.0 |
+--------------+------------+------------+-
+-GCC-7 | ? | ? |
+-GCC-6 | ✘ | ✘ |
+-GCC-5 | ✔ [1] | ✔ |
+-CLANG-4.0 | ? | ? |
+-CLANG-3.9 | ✘ | ✘ |
+-CLANG-3.8 | ? | ✔ |
++CXX compiler | CUDA 7.5 | CUDA 8.0 | CUDA 9.0 |
++-------------+------------+------------+------------+
++GCC-8 | ? | ? | ? |
++GCC-7 | ? | ? | ? |
++GCC-6 | ✘ | ✘ | ✔ |
++GCC-5 | ✔ [1] | ✔ | ✔ |
++-------------+------------+------------+------------+
++CLANG-4.0 | ? | ? | ? |
++CLANG-3.9 | ✘ | ✘ | ✔ |
++CLANG-3.8 | ? | ✔ | ✔ |
+ ```
+
+ `[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5.
+
++`[2]` CUDA 9.0: https://devblogs.nvidia.com/parallelforall/cuda-9-features-revealed/
++
+ BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's.
+ You may encounter failure linking GCC-4.X object files against GCC-5 libraries.
+ (See https://wiki.debian.org/GCC5 )
+@@ -152,10 +161,3 @@ and hack the packaging scripts, then build your customized package.
+ $ sudo apt install caffe-doc
+ $ dpkg -L caffe-doc
+ ```
+-
+-* Where can I find the Debian package status?
+-
+-```
+-https://tracker.debian.org/pkg/caffe (for the CPU_ONLY version)
+-https://tracker.debian.org/pkg/caffe-contrib (for the CUDA version)
+-```
+diff --git a/docs/installation.md b/docs/installation.md
+index 42f1d0ce..6104cc24 100644
+--- a/docs/installation.md
++++ b/docs/installation.md
+@@ -42,14 +42,14 @@ Optional dependencies:
+
+ * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
+ * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
+-* cuDNN for GPU acceleration (v6)
++* cuDNN for GPU acceleration (v7)
+
+ Pycaffe and Matcaffe interfaces have their own natural needs.
+
+ * For Python Caffe: `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
+ * For MATLAB Caffe: MATLAB with the `mex` compiler.
+
+-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v6; older versions are supported in older Caffe.
++**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v7; older versions are supported in older Caffe.
+
+ **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
+
+diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
+index 2faacc58..78a46f3a 100644
+--- a/docs/tutorial/layers.md
++++ b/docs/tutorial/layers.md
+@@ -87,7 +87,7 @@ Layers:
+ * [ELU](layers/elu.html) - exponential linear rectification.
+ * [Sigmoid](layers/sigmoid.html)
+ * [TanH](layers/tanh.html)
+-* [Absolute Value](layers/abs.html)
++* [Absolute Value](layers/absval.html)
+ * [Power](layers/power.html) - f(x) = (shift + scale * x) ^ power.
+ * [Exp](layers/exp.html) - f(x) = base ^ (shift + scale * x).
+ * [Log](layers/log.html) - f(x) = log(x).
+diff --git a/docs/tutorial/layers/lrn.md b/docs/tutorial/layers/lrn.md
+index 2fbef734..f5e48292 100644
+--- a/docs/tutorial/layers/lrn.md
++++ b/docs/tutorial/layers/lrn.md
+@@ -14,7 +14,7 @@ title: Local Response Normalization (LRN)
+ - `local_size` [default 5]: the number of channels to sum over (for cross channel LRN) or the side length of the square region to sum over (for within channel LRN)
+ - `alpha` [default 1]: the scaling parameter (see below)
+ - `beta` [default 5]: the exponent (see below)
+- - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locaitons (`WITHIN_CHANNEL`)
++ - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locations (`WITHIN_CHANNEL`)
+
+ The local response normalization layer performs a kind of "lateral inhibition" by normalizing over local input regions. In `ACROSS_CHANNELS` mode, the local regions extend across nearby channels, but have no spatial extent (i.e., they have shape `local_size x 1 x 1`). In `WITHIN_CHANNEL` mode, the local regions extend spatially, but are in separate channels (i.e., they have shape `1 x local_size x local_size`). Each input value is divided by $$(1 + (\alpha/n) \sum_i x_i^2)^\beta$$, wher [...]
+
+diff --git a/examples/brewing-logreg.ipynb b/examples/brewing-logreg.ipynb
+index c053b73b..0f87185a 100644
+--- a/examples/brewing-logreg.ipynb
++++ b/examples/brewing-logreg.ipynb
+@@ -73,12 +73,12 @@
+ ")\n",
+ "\n",
+ "# Split into train and test\n",
+- "X, Xt, y, yt = sklearn.cross_validation.train_test_split(X, y)\n",
++ "X, Xt, y, yt = sklearn.model_selection.train_test_split(X, y)\n",
+ "\n",
+ "# Visualize sample of the data\n",
+ "ind = np.random.permutation(X.shape[0])[:1000]\n",
+ "df = pd.DataFrame(X[ind])\n",
+- "_ = pd.scatter_matrix(df, figsize=(9, 9), diagonal='kde', marker='o', s=40, alpha=.4, c=y[ind])"
++ "_ = pd.plotting.scatter_matrix(df, figsize=(9, 9), diagonal='kde', marker='o', s=40, alpha=.4, c=y[ind])"
+ ]
+ },
+ {
+@@ -111,7 +111,7 @@
+ "%%timeit\n",
+ "# Train and test the scikit-learn SGD logistic regression.\n",
+ "clf = sklearn.linear_model.SGDClassifier(\n",
+- " loss='log', n_iter=1000, penalty='l2', alpha=5e-4, class_weight='auto')\n",
++ " loss='log', n_iter=1000, penalty='l2', alpha=5e-4, class_weight='balanced')\n",
+ "\n",
+ "clf.fit(X, y)\n",
+ "yt_pred = clf.predict(Xt)\n",
+diff --git a/examples/web_demo/readme.md b/examples/web_demo/readme.md
+index fe74b9ef..e50c4f10 100644
+--- a/examples/web_demo/readme.md
++++ b/examples/web_demo/readme.md
+@@ -11,7 +11,7 @@ priority: 10
+ ## Requirements
+
+ The demo server requires Python with some dependencies.
+-To make sure you have the dependencies, please run `pip install -r examples/web_demo/requirements.txt`, and also make sure that you've compiled the Python Caffe interface and that it is on your `PYTHONPATH` (see [installation instructions](/installation.html)).
++To make sure you have the dependencies, please run `pip install -r examples/web_demo/requirements.txt`, and also make sure that you've compiled the Python Caffe interface and that it is on your `PYTHONPATH` (see [installation instructions](http://caffe.berkeleyvision.org/installation.html)).
+
+ Make sure that you have obtained the Reference CaffeNet Model and the ImageNet Auxiliary Data:
+
+diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp
+index dad9ad46..bb92ded7 100644
+--- a/include/caffe/filler.hpp
++++ b/include/caffe/filler.hpp
+@@ -250,10 +250,10 @@ class BilinearFiller : public Filler<Dtype> {
+ CHECK_EQ(blob->width(), blob->height()) << "Filter must be square";
+ Dtype* data = blob->mutable_cpu_data();
+ int f = ceil(blob->width() / 2.);
+- float c = (2 * f - 1 - f % 2) / (2. * f);
++ Dtype c = (blob->width() - 1) / (2. * f);
+ for (int i = 0; i < blob->count(); ++i) {
+- float x = i % blob->width();
+- float y = (i / blob->width()) % blob->height();
++ Dtype x = i % blob->width();
++ Dtype y = (i / blob->width()) % blob->height();
+ data[i] = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
+ }
+ CHECK_EQ(this->filler_param_.sparse(), -1)
+diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp
+index a9ad3225..dd2247b9 100644
+--- a/include/caffe/layers/accuracy_layer.hpp
++++ b/include/caffe/layers/accuracy_layer.hpp
+@@ -68,6 +68,8 @@ class AccuracyLayer : public Layer<Dtype> {
+ */
+ virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
++ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
++ const vector<Blob<Dtype>*>& top);
+
+
+ /// @brief Not implemented -- AccuracyLayer cannot be used as a loss.
+@@ -77,6 +79,8 @@ class AccuracyLayer : public Layer<Dtype> {
+ if (propagate_down[i]) { NOT_IMPLEMENTED; }
+ }
+ }
++ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
++ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+ int label_axis_, outer_num_, inner_num_;
+
+diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp
+index c4fda122..5219fa5c 100644
+--- a/include/caffe/layers/crop_layer.hpp
++++ b/include/caffe/layers/crop_layer.hpp
+@@ -41,13 +41,15 @@ class CropLayer : public Layer<Dtype> {
+ virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+- vector<int> offsets;
++ Blob<int> offsets;
++ Blob<int> src_strides_;
++ Blob<int> dest_strides_;
+
+ private:
+ // Recursive copy function.
+ void crop_copy(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top,
+- const vector<int>& offsets,
++ const int* offsets,
+ vector<int> indices,
+ int cur_dim,
+ const Dtype* src_data,
+diff --git a/include/caffe/layers/infogain_loss_layer.hpp b/include/caffe/layers/infogain_loss_layer.hpp
+index edecde82..3b3caa27 100644
+--- a/include/caffe/layers/infogain_loss_layer.hpp
++++ b/include/caffe/layers/infogain_loss_layer.hpp
+@@ -13,20 +13,21 @@
+ namespace caffe {
+
+ /**
+- * @brief A generalization of MultinomialLogisticLossLayer that takes an
++ * @brief A generalization of SoftmaxWithLossLayer that takes an
+ * "information gain" (infogain) matrix specifying the "value" of all label
+ * pairs.
+ *
+- * Equivalent to the MultinomialLogisticLossLayer if the infogain matrix is the
++ * Equivalent to the SoftmaxWithLossLayer if the infogain matrix is the
+ * identity.
+ *
+ * @param bottom input Blob vector (length 2-3)
+ * -# @f$ (N \times C \times H \times W) @f$
+- * the predictions @f$ \hat{p} @f$, a Blob with values in
+- * @f$ [0, 1] @f$ indicating the predicted probability of each of the
+- * @f$ K = CHW @f$ classes. Each prediction vector @f$ \hat{p}_n @f$
+- * should sum to 1 as in a probability distribution: @f$
+- * \forall n \sum\limits_{k=1}^K \hat{p}_{nk} = 1 @f$.
++ * the predictions @f$ x @f$, a Blob with values in
++ * @f$ [-\infty, +\infty] @f$ indicating the predicted score for each of
++ * the @f$ K = CHW @f$ classes. This layer maps these scores to a
++ * probability distribution over classes using the softmax function
++ * @f$ \hat{p}_{nk} = \exp(x_{nk}) /
++ * \left[\sum_{k'} \exp(x_{nk'})\right] @f$ (see SoftmaxLayer).
+ * -# @f$ (N \times 1 \times 1 \times 1) @f$
+ * the labels @f$ l @f$, an integer-valued Blob with values
+ * @f$ l_n \in [0, 1, 2, ..., K - 1] @f$
+@@ -35,7 +36,7 @@ namespace caffe {
+ * (\b optional) the infogain matrix @f$ H @f$. This must be provided as
+ * the third bottom blob input if not provided as the infogain_mat in the
+ * InfogainLossParameter. If @f$ H = I @f$, this layer is equivalent to the
+- * MultinomialLogisticLossLayer.
++ * SoftmaxWithLossLayer.
+ * @param top output Blob vector (length 1)
+ * -# @f$ (1 \times 1 \times 1 \times 1) @f$
+ * the computed infogain multinomial logistic loss: @f$ E =
+@@ -98,8 +99,8 @@ class InfogainLossLayer : public LossLayer<Dtype> {
+ * infogain matrix, if provided as bottom[2])
+ * @param bottom input Blob vector (length 2-3)
+ * -# @f$ (N \times C \times H \times W) @f$
+- * the predictions @f$ \hat{p} @f$; Backward computes diff
+- * @f$ \frac{\partial E}{\partial \hat{p}} @f$
++ * the predictions @f$ x @f$; Backward computes diff
++ * @f$ \frac{\partial E}{\partial x} @f$
+ * -# @f$ (N \times 1 \times 1 \times 1) @f$
+ * the labels -- ignored as we can't compute their error gradients
+ * -# @f$ (1 \times 1 \times K \times K) @f$
+diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
+index 498cfe38..cd3f93f6 100644
+--- a/include/caffe/util/cudnn.hpp
++++ b/include/caffe/util/cudnn.hpp
+@@ -44,6 +44,12 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
+ #if CUDNN_VERSION_MIN(6, 0, 0)
+ case CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING:
+ return "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING";
++#endif
++#if CUDNN_VERSION_MIN(7, 0, 0)
++ case CUDNN_STATUS_RUNTIME_IN_PROGRESS:
++ return "CUDNN_STATUS_RUNTIME_IN_PROGRESS";
++ case CUDNN_STATUS_RUNTIME_FP_OVERFLOW:
++ return "CUDNN_STATUS_RUNTIME_FP_OVERFLOW";
+ #endif
+ }
+ return "Unknown cudnn status";
+diff --git a/matlab/+caffe/Net.m b/matlab/+caffe/Net.m
+index 349e060e..bb99ec89 100644
+--- a/matlab/+caffe/Net.m
++++ b/matlab/+caffe/Net.m
+@@ -69,7 +69,9 @@ classdef Net < handle
+ self.blob_names = self.attributes.blob_names;
+ end
+ function delete (self)
+- caffe_('delete_net', self.hNet_self);
++ if ~isempty(self.hNet_self)
++ caffe_('delete_net', self.hNet_self);
++ end
+ end
+ function layer = layers(self, layer_name)
+ CHECK(ischar(layer_name), 'layer_name must be a string');
+diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
+index d7f43fff..72659a4f 100644
+--- a/python/caffe/_caffe.cpp
++++ b/python/caffe/_caffe.cpp
+@@ -464,6 +464,14 @@ BOOST_PYTHON_MODULE(_caffe) {
+ .add_property("count", static_cast<int (Blob<Dtype>::*)() const>(
+ &Blob<Dtype>::count))
+ .def("reshape", bp::raw_function(&Blob_Reshape))
++#ifndef CPU_ONLY
++ .add_property("_gpu_data_ptr",
++ reinterpret_cast<uintptr_t (Blob<Dtype>::*)()>(
++ &Blob<Dtype>::mutable_gpu_data))
++ .add_property("_gpu_diff_ptr",
++ reinterpret_cast<uintptr_t (Blob<Dtype>::*)()>(
++ &Blob<Dtype>::mutable_gpu_diff))
++#endif
+ .add_property("data", bp::make_function(&Blob<Dtype>::mutable_cpu_data,
+ NdarrayCallPolicies()))
+ .add_property("diff", bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
+diff --git a/python/caffe/classifier.py b/python/caffe/classifier.py
+index ea29fed8..983760a7 100644
+--- a/python/caffe/classifier.py
++++ b/python/caffe/classifier.py
+@@ -92,7 +92,7 @@ class Classifier(caffe.Net):
+
+ # For oversampling, average predictions across crops.
+ if oversample:
+- predictions = predictions.reshape((len(predictions) / 10, 10, -1))
++ predictions = predictions.reshape((len(predictions) // 10, 10, -1))
+ predictions = predictions.mean(1)
+
+ return predictions
+diff --git a/python/caffe/io.py b/python/caffe/io.py
+index 966c164c..ed4b3bef 100644
+--- a/python/caffe/io.py
++++ b/python/caffe/io.py
+@@ -256,7 +256,12 @@ class Transformer:
+ if len(ms) != 3:
+ raise ValueError('Mean shape invalid')
+ if ms != self.inputs[in_][1:]:
+- raise ValueError('Mean shape incompatible with input shape.')
++ in_shape = self.inputs[in_][1:]
++ m_min, m_max = mean.min(), mean.max()
++ normal_mean = (mean - m_min) / (m_max - m_min)
++ mean = resize_image(normal_mean.transpose((1,2,0)),
++ in_shape[1:]).transpose((2,0,1)) * \
++ (m_max - m_min) + m_min
+ self.mean[in_] = mean
+
+ def set_input_scale(self, in_, scale):
+@@ -323,7 +328,7 @@ def resize_image(im, new_dims, interp_order=1):
+ # skimage is fast but only understands {1,3} channel images
+ # in [0, 1].
+ im_std = (im - im_min) / (im_max - im_min)
+- resized_std = resize(im_std, new_dims, order=interp_order)
++ resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
+ resized_im = resized_std * (im_max - im_min) + im_min
+ else:
+ # the image is a constant -- avoid divide by 0
+diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
+index afd27690..ee1d38c3 100644
+--- a/python/caffe/test/test_net.py
++++ b/python/caffe/test/test_net.py
+@@ -72,41 +72,41 @@ class TestNet(unittest.TestCase):
+ self.net.backward()
+
+ def test_forward_start_end(self):
+- conv_blob=self.net.blobs['conv'];
+- ip_blob=self.net.blobs['ip_blob'];
+- sample_data=np.random.uniform(size=conv_blob.data.shape);
+- sample_data=sample_data.astype(np.float32);
+- conv_blob.data[:]=sample_data;
+- forward_blob=self.net.forward(start='ip',end='ip');
+- self.assertIn('ip_blob',forward_blob);
+-
+- manual_forward=[];
++ conv_blob=self.net.blobs['conv']
++ ip_blob=self.net.blobs['ip_blob']
++ sample_data=np.random.uniform(size=conv_blob.data.shape)
++ sample_data=sample_data.astype(np.float32)
++ conv_blob.data[:]=sample_data
++ forward_blob=self.net.forward(start='ip',end='ip')
++ self.assertIn('ip_blob',forward_blob)
++
++ manual_forward=[]
+ for i in range(0,conv_blob.data.shape[0]):
+ dot=np.dot(self.net.params['ip'][0].data,
+- conv_blob.data[i].reshape(-1));
+- manual_forward.append(dot+self.net.params['ip'][1].data);
+- manual_forward=np.array(manual_forward);
++ conv_blob.data[i].reshape(-1))
++ manual_forward.append(dot+self.net.params['ip'][1].data)
++ manual_forward=np.array(manual_forward)
+
+- np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3);
++ np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3,atol=1e-5)
+
+ def test_backward_start_end(self):
+- conv_blob=self.net.blobs['conv'];
+- ip_blob=self.net.blobs['ip_blob'];
++ conv_blob=self.net.blobs['conv']
++ ip_blob=self.net.blobs['ip_blob']
+ sample_data=np.random.uniform(size=ip_blob.data.shape)
+- sample_data=sample_data.astype(np.float32);
+- ip_blob.diff[:]=sample_data;
+- backward_blob=self.net.backward(start='ip',end='ip');
+- self.assertIn('conv',backward_blob);
++ sample_data=sample_data.astype(np.float32)
++ ip_blob.diff[:]=sample_data
++ backward_blob=self.net.backward(start='ip',end='ip')
++ self.assertIn('conv',backward_blob)
+
+- manual_backward=[];
++ manual_backward=[]
+ for i in range(0,conv_blob.data.shape[0]):
+ dot=np.dot(self.net.params['ip'][0].data.transpose(),
+- sample_data[i].reshape(-1));
+- manual_backward.append(dot);
+- manual_backward=np.array(manual_backward);
+- manual_backward=manual_backward.reshape(conv_blob.data.shape);
++ sample_data[i].reshape(-1))
++ manual_backward.append(dot)
++ manual_backward=np.array(manual_backward)
++ manual_backward=manual_backward.reshape(conv_blob.data.shape)
+
+- np.testing.assert_allclose(conv_blob.diff,manual_backward,rtol=1e-3);
++ np.testing.assert_allclose(conv_blob.diff,manual_backward,rtol=1e-3,atol=1e-5)
+
+ def test_clear_param_diffs(self):
+ # Run a forward/backward step to have non-zero diffs
+diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
+index dac5d2f9..abf9cf1c 100755
+--- a/scripts/travis/install-deps.sh
++++ b/scripts/travis/install-deps.sh
+@@ -9,10 +9,10 @@ apt-get -y update
+ apt-get install -y --no-install-recommends \
+ build-essential \
+ graphviz \
+- libboost-filesystem1.55-dev \
+- libboost-python1.55-dev \
+- libboost-system1.55-dev \
+- libboost-thread1.55-dev \
++ libboost-filesystem-dev \
++ libboost-python-dev \
++ libboost-system-dev \
++ libboost-thread-dev \
+ libgflags-dev \
+ libgoogle-glog-dev \
+ libhdf5-serial-dev \
+@@ -106,7 +106,7 @@ if $WITH_CUDA ; then
+ ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda
+
+ if $WITH_CUDNN ; then
+- apt-get install -y --no-install-recommends libcudnn6-dev
++ apt-get install -y --no-install-recommends libcudnn7-dev
+ fi
+ fi
+
+diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
+index b9152e92..4a805568 100644
+--- a/src/caffe/CMakeLists.txt
++++ b/src/caffe/CMakeLists.txt
+@@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto)
+ caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files})
+
+ # include python files either to force generation
+-add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
+-caffe_default_properties(proto)
+-target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES})
+-target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR})
++add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
++caffe_default_properties(caffeproto)
++target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES})
++target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR})
+
+-list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend!
++list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend!
+
+ # --[ Caffe library
+
+@@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES
+ # ---[ Install
+ install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+ install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto)
+-install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
++install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+ file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
+ list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)
+diff --git a/src/caffe/layers/accuracy_layer.cpp b/src/caffe/layers/accuracy_layer.cpp
+index 4eddbb5c..b6d95b54 100644
+--- a/src/caffe/layers/accuracy_layer.cpp
++++ b/src/caffe/layers/accuracy_layer.cpp
+@@ -52,8 +52,6 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ const Dtype* bottom_label = bottom[1]->cpu_data();
+ const int dim = bottom[0]->count() / outer_num_;
+ const int num_labels = bottom[0]->shape(label_axis_);
+- vector<Dtype> maxval(top_k_+1);
+- vector<int> max_id(top_k_+1);
+ if (top.size() > 1) {
+ caffe_set(nums_buffer_.count(), Dtype(0), nums_buffer_.mutable_cpu_data());
+ caffe_set(top[1]->count(), Dtype(0), top[1]->mutable_cpu_data());
+@@ -66,32 +64,29 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ if (has_ignore_label_ && label_value == ignore_label_) {
+ continue;
+ }
+- if (top.size() > 1) ++nums_buffer_.mutable_cpu_data()[label_value];
+ DCHECK_GE(label_value, 0);
+ DCHECK_LT(label_value, num_labels);
++ if (top.size() > 1) ++nums_buffer_.mutable_cpu_data()[label_value];
++ const Dtype prob_of_true_class = bottom_data[i * dim
++ + label_value * inner_num_
++ + j];
++ int num_better_predictions = -1; // true_class also counts as "better"
+ // Top-k accuracy
+- std::vector<std::pair<Dtype, int> > bottom_data_vector;
+- for (int k = 0; k < num_labels; ++k) {
+- bottom_data_vector.push_back(std::make_pair(
+- bottom_data[i * dim + k * inner_num_ + j], k));
++ for (int k = 0; k < num_labels && num_better_predictions < top_k_; ++k) {
++ num_better_predictions +=
++ (bottom_data[i * dim + k * inner_num_ + j] >= prob_of_true_class);
+ }
+- std::partial_sort(
+- bottom_data_vector.begin(), bottom_data_vector.begin() + top_k_,
+- bottom_data_vector.end(), std::greater<std::pair<Dtype, int> >());
+- // check if true label is in top k predictions
+- for (int k = 0; k < top_k_; k++) {
+- if (bottom_data_vector[k].second == label_value) {
+- ++accuracy;
+- if (top.size() > 1) ++top[1]->mutable_cpu_data()[label_value];
+- break;
+- }
++ // check if there are less than top_k_ predictions
++ if (num_better_predictions < top_k_) {
++ ++accuracy;
++ if (top.size() > 1) ++top[1]->mutable_cpu_data()[label_value];
+ }
+ ++count;
+ }
+ }
+
+ // LOG(INFO) << "Accuracy: " << accuracy;
+- top[0]->mutable_cpu_data()[0] = accuracy / count;
++ top[0]->mutable_cpu_data()[0] = (count == 0) ? 0 : (accuracy / count);
+ if (top.size() > 1) {
+ for (int i = 0; i < top[1]->count(); ++i) {
+ top[1]->mutable_cpu_data()[i] =
+@@ -102,6 +97,10 @@ void AccuracyLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ // Accuracy layer should not be used as a loss function.
+ }
+
++#ifdef CPU_ONLY
++STUB_GPU(AccuracyLayer);
++#endif
++
+ INSTANTIATE_CLASS(AccuracyLayer);
+ REGISTER_LAYER_CLASS(Accuracy);
+
+diff --git a/src/caffe/layers/accuracy_layer.cu b/src/caffe/layers/accuracy_layer.cu
+new file mode 100644
+index 00000000..a8cff936
+--- /dev/null
++++ b/src/caffe/layers/accuracy_layer.cu
+@@ -0,0 +1,147 @@
++#include <vector>
++
++#include "caffe/layers/accuracy_layer.hpp"
++#include "caffe/util/math_functions.hpp"
++
++
++namespace caffe {
++
++template <typename Dtype>
++__global__ void AccuracyForwardGPU(const int nthreads,
++ const Dtype* bottom_data, const Dtype* label, Dtype* acc,
++ const int num, const int dim, const int spatial_dim,
++ const int num_labels, const int top_k,
++ const bool has_ignore_label_, const int ignore_label_,
++ Dtype* counts) {
++ CUDA_KERNEL_LOOP(index, nthreads) {
++ const int n = index / spatial_dim;
++ const int s = index % spatial_dim;
++ const int label_value = static_cast<int>(label[n * spatial_dim + s]);
++ const Dtype prob_of_true_class = bottom_data[n * dim
++ + label_value * spatial_dim
++ + s];
++ int num_better_predictions = -1; // true_class also counts as "better"
++ if (has_ignore_label_ && label_value == ignore_label_) {
++ acc[index] = 0;
++ counts[index] = 0;
++ } else {
++ for (int k = 0; k < num_labels & num_better_predictions < top_k; k++) {
++ num_better_predictions +=
++ (bottom_data[n * dim + k * spatial_dim + s] >= prob_of_true_class);
++ }
++ acc[index] = (num_better_predictions < top_k);
++ counts[index] = 1;
++ }
++ }
++}
++
++template <typename Dtype>
++__global__ void AccuracyForwardWithPerClassGPU(const int nthreads,
++ const Dtype* bottom_data, const Dtype* label,
++ Dtype* acc, Dtype* counts,
++ const int num, const int dim, const int spatial_dim,
++ const int num_labels, const int top_k,
++ const bool has_ignore_label_, const int ignore_label_) {
++ CUDA_KERNEL_LOOP(index, nthreads) {
++ const int n = index / spatial_dim;
++ const int s = index % spatial_dim;
++ const int label_value = static_cast<int>(label[n * spatial_dim + s]);
++ const Dtype prob_of_true_class = bottom_data[n * dim
++ + label_value * spatial_dim
++ + s];
++ if (has_ignore_label_ && label_value == ignore_label_) {
++ // nothing to be done.
++ } else {
++ int num_better_predictions = -1; // true_class also counts as "better"
++ for (int k = 0; k < num_labels & num_better_predictions < top_k; k++) {
++ num_better_predictions +=
++ (bottom_data[n * dim + k * spatial_dim + s] >= prob_of_true_class);
++ }
++ acc[label_value*nthreads + index] += (num_better_predictions < top_k);
++ counts[label_value*nthreads + index] = 1;
++ }
++ }
++}
++
++template <typename Dtype>
++void AccuracyLayer<Dtype>::Forward_gpu(
++ const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
++ const Dtype* bottom_data = bottom[0]->gpu_data();
++ const Dtype* bottom_label = bottom[1]->gpu_data();
++ const int dim = bottom[0]->count() / outer_num_;
++ const int num_labels = bottom[0]->shape(label_axis_);
++ const int nthreads = outer_num_ * inner_num_;
++ // Since this memory is not used for anything,
++ // we use it here to avoid having to allocate new GPU
++ // memory to accumulate intermediate results in the kernel.
++ Dtype* acc_data = bottom[0]->mutable_gpu_diff();
++ if (top.size() == 1) {
++ // simple case - report only global accuracy.
++
++ // Similarly, this memory is never used elsewhere, and thus we can use it
++ // to avoid having to allocate additional GPU memory.
++ Dtype* counts = bottom[1]->mutable_gpu_diff();
++ // NOLINT_NEXT_LINE(whitespace/operators)
++ AccuracyForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
++ CAFFE_CUDA_NUM_THREADS>>>(nthreads, bottom_data, bottom_label,
++ acc_data, outer_num_, dim, inner_num_, num_labels, top_k_,
++ has_ignore_label_, ignore_label_, counts);
++ Dtype acc;
++ caffe_gpu_asum(nthreads, acc_data, &acc);
++ Dtype valid_count;
++ caffe_gpu_asum(nthreads, counts, &valid_count);
++ if (valid_count > 0) {
++ top[0]->mutable_cpu_data()[0] = acc / valid_count;
++ } else {
++ top[0]->mutable_cpu_data()[0] = 0;
++ }
++ } else {
++ // need to report per-class accuracy as well
++
++ // allocate space for more detailed "counts"
++ nums_buffer_.ReshapeLike(*bottom[0]);
++ Dtype* counts = nums_buffer_.mutable_gpu_data();
++
++ caffe_gpu_set(bottom[0]->count(), Dtype(0), acc_data);
++ caffe_gpu_set(nums_buffer_.count(), Dtype(0), counts);
++
++ // NOLINT_NEXT_LINE(whitespace/operators)
++ AccuracyForwardWithPerClassGPU<Dtype><<<CAFFE_GET_BLOCKS(nthreads),
++ CAFFE_CUDA_NUM_THREADS>>>(nthreads, bottom_data, bottom_label,
++ acc_data, counts, outer_num_, dim, inner_num_, num_labels, top_k_,
++ has_ignore_label_, ignore_label_);
++
++ // get the overall accuracy
++ Dtype acc;
++ caffe_gpu_asum(bottom[0]->count(), acc_data, &acc);
++ Dtype valid_count;
++ caffe_gpu_asum(nums_buffer_.count(), counts, &valid_count);
++ if (valid_count > 0) {
++ top[0]->mutable_cpu_data()[0] = acc / valid_count;
++ } else {
++ top[0]->mutable_cpu_data()[0] = 0;
++ }
++
++ // get per-class accuracy
++ Dtype* per_class_acc = top[1]->mutable_cpu_data();
++ for (int l = 0; l < num_labels; l++) {
++ caffe_gpu_asum(nthreads, acc_data + l*nthreads, per_class_acc+l);
++ caffe_gpu_asum(nthreads, counts + l*nthreads, &valid_count);
++ if (valid_count > 0) {
++ per_class_acc[l] /= valid_count;
++ } else {
++ per_class_acc[l] = 0;
++ }
++ }
++ }
++}
++
++
++template <typename Dtype>
++void AccuracyLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
++ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
++ if (propagate_down[1]) { NOT_IMPLEMENTED; }
++}
++
++INSTANTIATE_LAYER_GPU_FUNCS(AccuracyLayer);
++} // namespace caffe
+diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
+index ef8c177c..65ea8f8b 100644
+--- a/src/caffe/layers/crop_layer.cpp
++++ b/src/caffe/layers/crop_layer.cpp
+@@ -40,8 +40,10 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis());
+
+ // Initialize offsets to 0 and the new shape to the current shape of the data.
+- offsets = vector<int>(input_dim, 0);
+ vector<int> new_shape(bottom[0]->shape());
++ vector<int> offsets_shape(1, input_dim);
++ offsets.Reshape(offsets_shape);
++ int* offset_data = offsets.mutable_cpu_data();
+
+ // Determine crop offsets and the new shape post-crop.
+ for (int i = 0; i < input_dim; ++i) {
+@@ -63,15 +65,22 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+ << "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
+ }
+ new_shape[i] = new_size;
+- offsets[i] = crop_offset;
++ offset_data[i] = crop_offset;
+ }
+ top[0]->Reshape(new_shape);
++ // Compute strides
++ src_strides_.Reshape(offsets_shape);
++ dest_strides_.Reshape(offsets_shape);
++ for (int i = 0; i < input_dim; ++i) {
++ src_strides_.mutable_cpu_data()[i] = bottom[0]->count(i + 1, input_dim);
++ dest_strides_.mutable_cpu_data()[i] = top[0]->count(i + 1, input_dim);
++ }
+ }
+
+ template <typename Dtype>
+ void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top,
+- const vector<int>& offsets,
++ const int* offsets,
+ vector<int> indices,
+ int cur_dim,
+ const Dtype* src_data,
+@@ -115,7 +124,8 @@ void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ std::vector<int> indices(top[0]->num_axes(), 0);
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = top[0]->mutable_cpu_data();
+- crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
++ crop_copy(bottom, top, offsets.cpu_data(), indices, 0, bottom_data, top_data,
++ true);
+ }
+
+ template <typename Dtype>
+@@ -127,7 +137,8 @@ void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ if (propagate_down[0]) {
+ caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
+ std::vector<int> indices(top[0]->num_axes(), 0);
+- crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false);
++ crop_copy(bottom, top, offsets.cpu_data(), indices, 0, top_diff,
++ bottom_diff, false);
+ }
+ }
+
+diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
+index 677077cd..4ece9cd1 100644
+--- a/src/caffe/layers/crop_layer.cu
++++ b/src/caffe/layers/crop_layer.cu
+@@ -4,90 +4,63 @@
+
+ namespace caffe {
+
+-// Copy (one line per thread) from one array to another, with arbitrary
+-// strides in the last two dimensions.
++__device__ int compute_uncropped_index(
++ int index,
++ const int ndims,
++ const int* src_strides,
++ const int* dest_strides,
++ const int* offsets) {
++ int dest_index = index;
++ int src_index = 0;
++ for (int i = 0; i < ndims; ++i) {
++ int coord = dest_index / dest_strides[i];
++ dest_index -= coord * dest_strides[i];
++ src_index += src_strides[i] * (coord + offsets[i]);
++ }
++ return src_index;
++}
++
+ template <typename Dtype>
+-__global__ void copy_kernel(const int n, const int height, const int width,
+- const int src_inner_stride,
+- const int dest_inner_stride,
++__global__ void crop_kernel_forward(const int nthreads,
++ const int ndims,
++ const int* src_strides,
++ const int* dest_strides,
++ const int* offsets,
+ const Dtype* src, Dtype* dest) {
+- CUDA_KERNEL_LOOP(index, n) {
+- int src_start = index * src_inner_stride;
+- int dest_start = index * dest_inner_stride;
+- for (int i = 0; i < width; ++i) {
+- dest[dest_start + i] = src[src_start + i];
+- }
++ CUDA_KERNEL_LOOP(index, nthreads) {
++ int src_index = compute_uncropped_index(
++ index, ndims, src_strides, dest_strides, offsets);
++ dest[index] = src[src_index];
+ }
+ }
+
+ template <typename Dtype>
+-void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
+- const vector<Blob<Dtype>*>& top,
+- const vector<int>& offsets,
+- vector<int> indices,
+- int cur_dim,
+- const Dtype* src_data,
+- Dtype* dest_data,
+- bool is_forward) {
+- if (cur_dim + 2 < top[0]->num_axes()) {
+- // We are not yet at the final dimension, call copy recursivley
+- for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
+- indices[cur_dim] = i;
+- crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1,
+- src_data, dest_data, is_forward);
+- }
+- } else {
+- // We are at the last two dimensions, which are stored continuously in
+- // memory. With (N,C,H,W)
+- // (0,1,2,3) cur_dim -> H
+- // cur_dim+1 -> W
+- const int lines = top[0]->shape(cur_dim);
+- const int height = top[0]->shape(cur_dim);
+- const int width = top[0]->shape(cur_dim+1);
+- std::vector<int> ind_off(cur_dim+2, 0);
+- for (int j = 0; j < cur_dim; ++j) {
+- ind_off[j] = indices[j] + offsets[j];
+- }
+- ind_off[cur_dim] = offsets[cur_dim];
+- ind_off[cur_dim+1] = offsets[cur_dim+1];
+- // Compute copy strides
+- const int src_inner_stride = bottom[0]->shape(cur_dim+1);
+- const int dest_inner_stride = top[0]->shape(cur_dim+1);
+-
+- if (is_forward) {
+- const Dtype* bottom_data = bottom[0]->gpu_data() +
+- bottom[0]->offset(ind_off);
+- Dtype* top_data = top[0]->mutable_gpu_data() +
+- top[0]->offset(indices);
+- // NOLINT_NEXT_LINE(whitespace/operators)
+- copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
+- lines, height, width,
+- src_inner_stride,
+- dest_inner_stride,
+- bottom_data, top_data);
+-
+- } else {
+- const Dtype* top_diff = top[0]->gpu_diff() +
+- top[0]->offset(indices);
+- Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() +
+- bottom[0]->offset(ind_off);
+- // NOLINT_NEXT_LINE(whitespace/operators)
+- copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
+- lines, height, width,
+- dest_inner_stride,
+- src_inner_stride,
+- top_diff, bottom_diff);
+- }
++__global__ void crop_kernel_backward(const int nthreads,
++ const int ndims,
++ const int* src_strides,
++ const int* dest_strides,
++ const int* offsets,
++ Dtype* src, const Dtype* dest) {
++ CUDA_KERNEL_LOOP(index, nthreads) {
++ int src_index = compute_uncropped_index(
++ index, ndims, src_strides, dest_strides, offsets);
++ src[src_index] = dest[index];
+ }
+ }
+
+ template <typename Dtype>
+ void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top) {
+- std::vector<int> indices(top[0]->num_axes(), 0);
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = top[0]->mutable_gpu_data();
+- crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
++ int n = top[0]->count();
++ // NOLINT_NEXT_LINE(whitespace/operators)
++ crop_kernel_forward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
++ bottom[0]->num_axes(),
++ src_strides_.gpu_data(),
++ dest_strides_.gpu_data(),
++ offsets.gpu_data(),
++ bottom_data, top_data);
+ }
+
+ template <typename Dtype>
+@@ -95,12 +68,17 @@ void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
++ int n = top[0]->count();
+
+ if (propagate_down[0]) {
+ caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
+- std::vector<int> indices(top[0]->num_axes(), 0);
+- crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff,
+- false);
++ // NOLINT_NEXT_LINE(whitespace/operators)
++ crop_kernel_backward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
++ bottom[0]->num_axes(),
++ src_strides_.gpu_data(),
++ dest_strides_.gpu_data(),
++ offsets.gpu_data(),
++ bottom_diff, top_diff);
+ }
+ }
+
+diff --git a/src/caffe/test/test_accuracy_layer.cpp b/src/caffe/test/test_accuracy_layer.cpp
+index 6fe808bd..e5cc9d5e 100644
+--- a/src/caffe/test/test_accuracy_layer.cpp
++++ b/src/caffe/test/test_accuracy_layer.cpp
+@@ -13,8 +13,10 @@
+
+ namespace caffe {
+
+-template <typename Dtype>
+-class AccuracyLayerTest : public CPUDeviceTest<Dtype> {
++template <typename TypeParam>
++class AccuracyLayerTest : public MultiDeviceTest<TypeParam> {
++ typedef typename TypeParam::Dtype Dtype;
++
+ protected:
+ AccuracyLayerTest()
+ : blob_bottom_data_(new Blob<Dtype>()),
+@@ -69,11 +71,12 @@ class AccuracyLayerTest : public CPUDeviceTest<Dtype> {
+ int top_k_;
+ };
+
+-TYPED_TEST_CASE(AccuracyLayerTest, TestDtypes);
++TYPED_TEST_CASE(AccuracyLayerTest, TestDtypesAndDevices);
+
+ TYPED_TEST(AccuracyLayerTest, TestSetup) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ EXPECT_EQ(this->blob_top_->num(), 1);
+ EXPECT_EQ(this->blob_top_->channels(), 1);
+@@ -82,11 +85,12 @@ TYPED_TEST(AccuracyLayerTest, TestSetup) {
+ }
+
+ TYPED_TEST(AccuracyLayerTest, TestSetupTopK) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ AccuracyParameter* accuracy_param =
+ layer_param.mutable_accuracy_param();
+ accuracy_param->set_top_k(5);
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ EXPECT_EQ(this->blob_top_->num(), 1);
+ EXPECT_EQ(this->blob_top_->channels(), 1);
+@@ -95,8 +99,9 @@ TYPED_TEST(AccuracyLayerTest, TestSetupTopK) {
+ }
+
+ TYPED_TEST(AccuracyLayerTest, TestSetupOutputPerClass) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+ EXPECT_EQ(this->blob_top_->num(), 1);
+ EXPECT_EQ(this->blob_top_->channels(), 1);
+@@ -108,33 +113,39 @@ TYPED_TEST(AccuracyLayerTest, TestSetupOutputPerClass) {
+ EXPECT_EQ(this->blob_top_per_class_->width(), 1);
+ }
+
+-TYPED_TEST(AccuracyLayerTest, TestForwardCPU) {
++TYPED_TEST(AccuracyLayerTest, TestForward) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+-
+- TypeParam max_value;
+- int max_id;
+- int num_correct_labels = 0;
+- for (int i = 0; i < 100; ++i) {
+- max_value = -FLT_MAX;
+- max_id = 0;
+- for (int j = 0; j < 10; ++j) {
+- if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+- max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+- max_id = j;
++
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
++
++ Dtype max_value;
++ int max_id;
++ int num_correct_labels = 0;
++ for (int i = 0; i < 100; ++i) {
++ max_value = -FLT_MAX;
++ max_id = 0;
++ for (int j = 0; j < 10; ++j) {
++ if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
++ max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
++ max_id = j;
++ }
++ }
++ if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ ++num_correct_labels;
+ }
+ }
+- if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- ++num_correct_labels;
+- }
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / Dtype(100.0), 1e-4);
+ }
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / 100.0, 1e-4);
+ }
+
+ TYPED_TEST(AccuracyLayerTest, TestForwardWithSpatialAxes) {
++ typedef typename TypeParam::Dtype Dtype;
+ this->blob_bottom_data_->Reshape(2, 10, 4, 5);
+ vector<int> label_shape(3);
+ label_shape[0] = 2; label_shape[1] = 4; label_shape[2] = 5;
+@@ -142,195 +153,218 @@ TYPED_TEST(AccuracyLayerTest, TestForwardWithSpatialAxes) {
+ this->FillBottoms();
+ LayerParameter layer_param;
+ layer_param.mutable_accuracy_param()->set_axis(1);
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+-
+- TypeParam max_value;
+- const int num_labels = this->blob_bottom_label_->count();
+- int max_id;
+- int num_correct_labels = 0;
+- vector<int> label_offset(3);
+- for (int n = 0; n < this->blob_bottom_data_->num(); ++n) {
+- for (int h = 0; h < this->blob_bottom_data_->height(); ++h) {
+- for (int w = 0; w < this->blob_bottom_data_->width(); ++w) {
+- max_value = -FLT_MAX;
+- max_id = 0;
+- for (int c = 0; c < this->blob_bottom_data_->channels(); ++c) {
+- const TypeParam pred_value =
+- this->blob_bottom_data_->data_at(n, c, h, w);
+- if (pred_value > max_value) {
+- max_value = pred_value;
+- max_id = c;
++
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
++
++ Dtype max_value;
++ const int num_labels = this->blob_bottom_label_->count();
++ int max_id;
++ int num_correct_labels = 0;
++ vector<int> label_offset(3);
++ for (int n = 0; n < this->blob_bottom_data_->num(); ++n) {
++ for (int h = 0; h < this->blob_bottom_data_->height(); ++h) {
++ for (int w = 0; w < this->blob_bottom_data_->width(); ++w) {
++ max_value = -FLT_MAX;
++ max_id = 0;
++ for (int c = 0; c < this->blob_bottom_data_->channels(); ++c) {
++ const Dtype pred_value =
++ this->blob_bottom_data_->data_at(n, c, h, w);
++ if (pred_value > max_value) {
++ max_value = pred_value;
++ max_id = c;
++ }
++ }
++ label_offset[0] = n; label_offset[1] = h; label_offset[2] = w;
++ const int correct_label =
++ static_cast<int>(this->blob_bottom_label_->data_at(label_offset));
++ if (max_id == correct_label) {
++ ++num_correct_labels;
+ }
+- }
+- label_offset[0] = n; label_offset[1] = h; label_offset[2] = w;
+- const int correct_label =
+- static_cast<int>(this->blob_bottom_label_->data_at(label_offset));
+- if (max_id == correct_label) {
+- ++num_correct_labels;
+ }
+ }
+ }
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / Dtype(num_labels), 1e-4);
+ }
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / TypeParam(num_labels), 1e-4);
+ }
+
+ TYPED_TEST(AccuracyLayerTest, TestForwardIgnoreLabel) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- const TypeParam kIgnoreLabelValue = -1;
++ const Dtype kIgnoreLabelValue = -1;
+ layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue);
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ // Manually set some labels to the ignore label value (-1).
+ this->blob_bottom_label_->mutable_cpu_data()[2] = kIgnoreLabelValue;
+ this->blob_bottom_label_->mutable_cpu_data()[5] = kIgnoreLabelValue;
+ this->blob_bottom_label_->mutable_cpu_data()[32] = kIgnoreLabelValue;
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+-
+- TypeParam max_value;
+- int max_id;
+- int num_correct_labels = 0;
+- int count = 0;
+- for (int i = 0; i < 100; ++i) {
+- if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- continue;
+- }
+- ++count;
+- max_value = -FLT_MAX;
+- max_id = 0;
+- for (int j = 0; j < 10; ++j) {
+- if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+- max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+- max_id = j;
++
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
++
++ Dtype max_value;
++ int max_id;
++ int num_correct_labels = 0;
++ int count = 0;
++ for (int i = 0; i < 100; ++i) {
++ if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ continue;
++ }
++ ++count;
++ max_value = -FLT_MAX;
++ max_id = 0;
++ for (int j = 0; j < 10; ++j) {
++ if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
++ max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
++ max_id = j;
++ }
++ }
++ if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ ++num_correct_labels;
+ }
+ }
+- if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- ++num_correct_labels;
+- }
++ EXPECT_EQ(count, 97); // We set 3 out of 100 labels to kIgnoreLabelValue.
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / Dtype(count), 1e-4);
+ }
+- EXPECT_EQ(count, 97); // We set 3 out of 100 labels to kIgnoreLabelValue.
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / TypeParam(count), 1e-4);
+ }
+
+-TYPED_TEST(AccuracyLayerTest, TestForwardCPUTopK) {
++TYPED_TEST(AccuracyLayerTest, TestForwardTopK) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+ AccuracyParameter* accuracy_param = layer_param.mutable_accuracy_param();
+ accuracy_param->set_top_k(this->top_k_);
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+-
+- TypeParam current_value;
+- int current_rank;
+- int num_correct_labels = 0;
+- for (int i = 0; i < 100; ++i) {
+- for (int j = 0; j < 10; ++j) {
+- current_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+- current_rank = 0;
+- for (int k = 0; k < 10; ++k) {
+- if (this->blob_bottom_data_->data_at(i, k, 0, 0) > current_value) {
+- ++current_rank;
++
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
++
++ Dtype current_value;
++ int current_rank;
++ int num_correct_labels = 0;
++ for (int i = 0; i < 100; ++i) {
++ for (int j = 0; j < 10; ++j) {
++ current_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
++ current_rank = 0;
++ for (int k = 0; k < 10; ++k) {
++ if (this->blob_bottom_data_->data_at(i, k, 0, 0) > current_value) {
++ ++current_rank;
++ }
++ }
++ if (current_rank < this->top_k_ &&
++ j == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ ++num_correct_labels;
+ }
+- }
+- if (current_rank < this->top_k_ &&
+- j == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- ++num_correct_labels;
+ }
+ }
+- }
+
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / 100.0, 1e-4);
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / Dtype(100.0), 1e-4);
++ }
+ }
+
+-TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClass) {
++TYPED_TEST(AccuracyLayerTest, TestForwardPerClass) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+-
+- TypeParam max_value;
+- int max_id;
+- int num_correct_labels = 0;
+- const int num_class = this->blob_top_per_class_->num();
+- vector<int> correct_per_class(num_class, 0);
+- vector<int> num_per_class(num_class, 0);
+- for (int i = 0; i < 100; ++i) {
+- max_value = -FLT_MAX;
+- max_id = 0;
+- for (int j = 0; j < 10; ++j) {
+- if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+- max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+- max_id = j;
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
++
++ Dtype max_value;
++ int max_id;
++ int num_correct_labels = 0;
++ const int num_class = this->blob_top_per_class_->num();
++ vector<int> correct_per_class(num_class, 0);
++ vector<int> num_per_class(num_class, 0);
++ for (int i = 0; i < 100; ++i) {
++ max_value = -FLT_MAX;
++ max_id = 0;
++ for (int j = 0; j < 10; ++j) {
++ if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
++ max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
++ max_id = j;
++ }
++ }
++ ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
++ if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ ++num_correct_labels;
++ ++correct_per_class[max_id];
+ }
+ }
+- ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
+- if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- ++num_correct_labels;
+- ++correct_per_class[max_id];
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / 100.0, 1e-4);
++ for (int i = 0; i < num_class; ++i) {
++ Dtype accuracy_per_class = (num_per_class[i] > 0 ?
++ static_cast<Dtype>(correct_per_class[i]) / num_per_class[i] : 0);
++ EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
++ accuracy_per_class, 1e-4);
+ }
+ }
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / 100.0, 1e-4);
+- for (int i = 0; i < num_class; ++i) {
+- TypeParam accuracy_per_class = (num_per_class[i] > 0 ?
+- static_cast<TypeParam>(correct_per_class[i]) / num_per_class[i] : 0);
+- EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
+- accuracy_per_class, 1e-4);
+- }
+ }
+
+
+-TYPED_TEST(AccuracyLayerTest, TestForwardCPUPerClassWithIgnoreLabel) {
++TYPED_TEST(AccuracyLayerTest, TestForwardPerClassWithIgnoreLabel) {
++ typedef typename TypeParam::Dtype Dtype;
+ LayerParameter layer_param;
+- const TypeParam kIgnoreLabelValue = -1;
++ const Dtype kIgnoreLabelValue = -1;
+ layer_param.mutable_accuracy_param()->set_ignore_label(kIgnoreLabelValue);
+- AccuracyLayer<TypeParam> layer(layer_param);
++ AccuracyLayer<Dtype> layer(layer_param);
+ // Manually set some labels to the ignore label value (-1).
+ this->blob_bottom_label_->mutable_cpu_data()[2] = kIgnoreLabelValue;
+ this->blob_bottom_label_->mutable_cpu_data()[5] = kIgnoreLabelValue;
+ this->blob_bottom_label_->mutable_cpu_data()[32] = kIgnoreLabelValue;
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+- layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
+-
+- TypeParam max_value;
+- int max_id;
+- int num_correct_labels = 0;
+- const int num_class = this->blob_top_per_class_->num();
+- vector<int> correct_per_class(num_class, 0);
+- vector<int> num_per_class(num_class, 0);
+- int count = 0;
+- for (int i = 0; i < 100; ++i) {
+- if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- continue;
+- }
+- ++count;
+- max_value = -FLT_MAX;
+- max_id = 0;
+- for (int j = 0; j < 10; ++j) {
+- if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
+- max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
+- max_id = j;
++
++ // repeat the forward
++ for (int iter = 0; iter < 3; iter++) {
++ layer.Forward(this->blob_bottom_vec_, this->blob_top_per_class_vec_);
++
++ Dtype max_value;
++ int max_id;
++ int num_correct_labels = 0;
++ const int num_class = this->blob_top_per_class_->num();
++ vector<int> correct_per_class(num_class, 0);
++ vector<int> num_per_class(num_class, 0);
++ int count = 0;
++ for (int i = 0; i < 100; ++i) {
++ if (kIgnoreLabelValue == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ continue;
++ }
++ ++count;
++ max_value = -FLT_MAX;
++ max_id = 0;
++ for (int j = 0; j < 10; ++j) {
++ if (this->blob_bottom_data_->data_at(i, j, 0, 0) > max_value) {
++ max_value = this->blob_bottom_data_->data_at(i, j, 0, 0);
++ max_id = j;
++ }
++ }
++ ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
++ if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
++ ++num_correct_labels;
++ ++correct_per_class[max_id];
+ }
+ }
+- ++num_per_class[this->blob_bottom_label_->data_at(i, 0, 0, 0)];
+- if (max_id == this->blob_bottom_label_->data_at(i, 0, 0, 0)) {
+- ++num_correct_labels;
+- ++correct_per_class[max_id];
++ EXPECT_EQ(count, 97);
++ EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
++ num_correct_labels / Dtype(count), 1e-4);
++ for (int i = 0; i < 10; ++i) {
++ Dtype accuracy_per_class = (num_per_class[i] > 0 ?
++ static_cast<Dtype>(correct_per_class[i]) / num_per_class[i] : 0);
++ EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
++ accuracy_per_class, 1e-4);
+ }
+ }
+- EXPECT_EQ(count, 97);
+- EXPECT_NEAR(this->blob_top_->data_at(0, 0, 0, 0),
+- num_correct_labels / TypeParam(count), 1e-4);
+- for (int i = 0; i < 10; ++i) {
+- TypeParam accuracy_per_class = (num_per_class[i] > 0 ?
+- static_cast<TypeParam>(correct_per_class[i]) / num_per_class[i] : 0);
+- EXPECT_NEAR(this->blob_top_per_class_->data_at(i, 0, 0, 0),
+- accuracy_per_class, 1e-4);
+- }
+ }
+
+ } // namespace caffe
+diff --git a/src/caffe/test/test_filler.cpp b/src/caffe/test/test_filler.cpp
+index 26e9b217..f84d707b 100644
+--- a/src/caffe/test/test_filler.cpp
++++ b/src/caffe/test/test_filler.cpp
+@@ -29,7 +29,7 @@ TYPED_TEST(ConstantFillerTest, TestFill) {
+ const int count = this->blob_->count();
+ const TypeParam* data = this->blob_->cpu_data();
+ for (int i = 0; i < count; ++i) {
+- EXPECT_GE(data[i], this->filler_param_.value());
++ EXPECT_EQ(data[i], this->filler_param_.value());
+ }
+ }
+
+@@ -238,4 +238,45 @@ TYPED_TEST(MSRAFillerTest, TestFillAverage) {
+ this->test_params(FillerParameter_VarianceNorm_AVERAGE, n);
+ }
+
++template <typename Dtype>
++class BilinearFillerTest : public ::testing::Test {
++ protected:
++ BilinearFillerTest() : filler_param_() {}
++ virtual void test_params(const int n) {
++ this->blob_ = new Blob<Dtype>(1000, 2, n, n);
++ this->filler_.reset(new BilinearFiller<Dtype>(this->filler_param_));
++ this->filler_->Fill(blob_);
++ EXPECT_TRUE(this->blob_);
++ const int outer_num = this->blob_->count(0, 2);
++ const int inner_num = this->blob_->count(2, 4);
++ const Dtype* data = this->blob_->cpu_data();
++ int f = ceil(this->blob_->width() / 2.);
++ Dtype c = (this->blob_->width() - 1) / (2. * f);
++ for (int i = 0; i < outer_num; ++i) {
++ for (int j = 0; j < inner_num; ++j) {
++ Dtype x = j % this->blob_->width();
++ Dtype y = (j / this->blob_->width()) % this->blob_->height();
++ Dtype expected_value = (1 - fabs(x / f - c)) * (1 - fabs(y / f - c));
++ const Dtype actual_value = data[i * inner_num + j];
++ EXPECT_NEAR(expected_value, actual_value, 0.01);
++ }
++ }
++ }
++ virtual ~BilinearFillerTest() { delete blob_; }
++ Blob<Dtype>* blob_;
++ FillerParameter filler_param_;
++ shared_ptr<BilinearFiller<Dtype> > filler_;
++};
++
++TYPED_TEST_CASE(BilinearFillerTest, TestDtypes);
++
++TYPED_TEST(BilinearFillerTest, TestFillOdd) {
++ const int n = 7;
++ this->test_params(n);
++}
++TYPED_TEST(BilinearFillerTest, TestFillEven) {
++ const int n = 6;
++ this->test_params(n);
++}
++
+ } // namespace caffe
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/caffe.git
More information about the debian-science-commits
mailing list