[caffe] 01/06: New upstream version 1.0.0~rc3+20161127-g24d2f67
Zhou Mo
cdluminate-guest at moszumanska.debian.org
Mon Nov 28 11:08:22 UTC 2016
This is an automated email from the git hooks/post-receive script.
cdluminate-guest pushed a commit to branch master
in repository caffe.
commit 3031d92ffd7893b7eac5d96a1731f9c287683858
Author: Zhou Mo <cdluminate at gmail.com>
Date: Mon Nov 28 10:26:02 2016 +0000
New upstream version 1.0.0~rc3+20161127-g24d2f67
---
.github/ISSUE_TEMPLATE.md | 19 +++++
Makefile | 6 +-
cmake/Targets.cmake | 2 +-
docs/install_apt.md | 2 +-
docs/install_osx.md | 2 +-
docs/install_yum.md | 4 +-
docs/installation.md | 3 +-
examples/02-fine-tuning.ipynb | 2 +-
examples/mnist/train_lenet_docker.sh | 2 +-
.../pycaffe/layers/pascal_multilabel_datalayers.py | 6 +-
examples/pycaffe/tools.py | 4 +-
include/caffe/layers/accuracy_layer.hpp | 2 +-
.../layers/sigmoid_cross_entropy_loss_layer.hpp | 18 +++++
matlab/+caffe/private/caffe_.cpp | 2 +-
matlab/CMakeLists.txt | 2 +-
scripts/cpp_lint.py | 6 +-
scripts/travis/install-deps.sh | 2 +-
src/caffe/layer_factory.cpp | 7 ++
src/caffe/layers/batch_norm_layer.cpp | 4 +-
src/caffe/layers/crop_layer.cpp | 2 +-
src/caffe/layers/crop_layer.cu | 8 +--
src/caffe/layers/hdf5_data_layer.cpp | 4 +-
src/caffe/layers/rnn_layer.cpp | 2 +-
.../layers/sigmoid_cross_entropy_loss_layer.cpp | 79 ++++++++++++++++++---
.../layers/sigmoid_cross_entropy_loss_layer.cu | 80 ++++++++++++++++++++--
src/caffe/proto/caffe.proto | 8 ++-
src/caffe/test/CMakeLists.txt | 2 +-
src/caffe/test/test_euclidean_loss_layer.cpp | 2 +-
.../test/test_sigmoid_cross_entropy_loss_layer.cpp | 28 ++++++++
src/gtest/gtest-all.cpp | 4 +-
src/gtest/gtest.h | 2 +-
tools/extra/parse_log.py | 9 ++-
tools/extra/plot_log.gnuplot.example | 2 +-
33 files changed, 270 insertions(+), 57 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..d78a3dc
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,19 @@
+Please use the [caffe-users list](https://groups.google.com/forum/#!forum/caffe-users) for usage, installation, or modeling questions, or other requests for help.
+_Do not post such requests to Issues._ Doing so interferes with the development of Caffe.
+
+Please read the [guidelines for contributing](https://github.com/BVLC/caffe/blob/master/CONTRIBUTING.md) before submitting this issue.
+
+### Issue summary
+
+
+### Steps to reproduce
+
+If you are having difficulty building Caffe or training a model, please ask the caffe-users mailing list. If you are reporting a build error that seems to be due to a bug in Caffe, please attach your build configuration (either Makefile.config or CMakeCache.txt) and the output of the make (or cmake) command.
+
+### Your system configuration
+Operating system:
+Compiler:
+CUDA version (if applicable):
+CUDNN version (if applicable):
+BLAS:
+Python or MATLAB version (for pycaffe and matcaffe respectively):
diff --git a/Makefile b/Makefile
index 2489406..ccc4d8b 100644
--- a/Makefile
+++ b/Makefile
@@ -192,12 +192,12 @@ ifeq ($(USE_LMDB), 1)
LIBRARIES += lmdb
endif
ifeq ($(USE_OPENCV), 1)
- LIBRARIES += opencv_core opencv_highgui opencv_imgproc
+ LIBRARIES += opencv_core opencv_highgui opencv_imgproc
ifeq ($(OPENCV_VERSION), 3)
LIBRARIES += opencv_imgcodecs
endif
-
+
endif
PYTHON_LIBRARIES ?= boost_python python2.7
WARNINGS := -Wall -Wno-sign-compare
@@ -385,7 +385,7 @@ else
XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
ifeq ($(XCODE_CLT_GEQ_7), 1)
- BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
+ BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/$(shell ls /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/ | sort | tail -1)/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
else ifeq ($(XCODE_CLT_GEQ_6), 1)
BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
LDFLAGS += -framework Accelerate
diff --git a/cmake/Targets.cmake b/cmake/Targets.cmake
index a796d00..2cb1158 100644
--- a/cmake/Targets.cmake
+++ b/cmake/Targets.cmake
@@ -94,7 +94,7 @@ function(caffe_pickup_caffe_sources root)
caffe_convert_absolute_paths(test_srcs)
caffe_convert_absolute_paths(test_cuda)
- # propogate to parent scope
+ # propagate to parent scope
set(srcs ${srcs} PARENT_SCOPE)
set(cuda ${cuda} PARENT_SCOPE)
set(test_srcs ${test_srcs} PARENT_SCOPE)
diff --git a/docs/install_apt.md b/docs/install_apt.md
index 3de5a49..e95b022 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -1,5 +1,5 @@
---
-title: Installation: Ubuntu
+title: "Installation: Ubuntu"
---
# Ubuntu Installation
diff --git a/docs/install_osx.md b/docs/install_osx.md
index 6405d8a..a2da82f 100644
--- a/docs/install_osx.md
+++ b/docs/install_osx.md
@@ -1,5 +1,5 @@
---
-title: Installation: OS X
+title: "Installation: OS X"
---
# OS X Installation
diff --git a/docs/install_yum.md b/docs/install_yum.md
index 2104912..842fbd6 100644
--- a/docs/install_yum.md
+++ b/docs/install_yum.md
@@ -1,5 +1,5 @@
---
-title: Installation: RHEL / Fedora / CentOS
+title: "Installation: RHEL / Fedora / CentOS"
---
# RHEL / Fedora / CentOS Installation
@@ -15,7 +15,7 @@ title: Installation: RHEL / Fedora / CentOS
**Remaining dependencies, if not found**
# glog
- wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz
+ wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/google-glog/glog-0.3.3.tar.gz
tar zxvf glog-0.3.3.tar.gz
cd glog-0.3.3
./configure
diff --git a/docs/installation.md b/docs/installation.md
index 4aac7c4..3254be3 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -14,8 +14,9 @@ The official Makefile and `Makefile.config` build are complemented by a [communi
- [Ubuntu installation](install_apt.html) *the standard platform*
- [OS X installation](install_osx.html)
- [RHEL / CentOS / Fedora installation](install_yum.html)
-- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Microsoft*
+- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Guillaume Dumont*
- [OpenCL](https://github.com/BVLC/caffe/tree/opencl) *see the OpenCL branch led by Fabian Tschopp*
+- [AWS AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-caffe) *pre-configured for AWS*
**Overview**:
diff --git a/examples/02-fine-tuning.ipynb b/examples/02-fine-tuning.ipynb
index 07ca8df..f44eaf9 100644
--- a/examples/02-fine-tuning.ipynb
+++ b/examples/02-fine-tuning.ipynb
@@ -1141,7 +1141,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "So we did finetuning and it is awesome. Let's take a look at what kind of results we are able to get with a longer, more complete run of the style recognition dataset. Note: the below URL might be occassionally down because it is run on a research machine.\n",
+ "So we did finetuning and it is awesome. Let's take a look at what kind of results we are able to get with a longer, more complete run of the style recognition dataset. Note: the below URL might be occasionally down because it is run on a research machine.\n",
"\n",
"http://demo.vislab.berkeleyvision.org/"
]
diff --git a/examples/mnist/train_lenet_docker.sh b/examples/mnist/train_lenet_docker.sh
index 32cf1c8..e946ba0 100755
--- a/examples/mnist/train_lenet_docker.sh
+++ b/examples/mnist/train_lenet_docker.sh
@@ -25,7 +25,7 @@ set -e
# executed.
#
# In order to provide additional flexibility, the following shell (environment)
-# variables can be used to controll the execution of each of the phases:
+# variables can be used to control the execution of each of the phases:
#
# DOWNLOAD_DATA: Enable (1) or disable (0) the downloading of the MNIST dataset
# CREATE_LMDB: Enable (1) or disable (0) the creation of the LMDB database
diff --git a/examples/pycaffe/layers/pascal_multilabel_datalayers.py b/examples/pycaffe/layers/pascal_multilabel_datalayers.py
index 68e4fa7..9420cb3 100644
--- a/examples/pycaffe/layers/pascal_multilabel_datalayers.py
+++ b/examples/pycaffe/layers/pascal_multilabel_datalayers.py
@@ -20,7 +20,7 @@ from tools import SimpleTransformer
class PascalMultilabelDataLayerSync(caffe.Layer):
"""
- This is a simple syncronous datalayer for training a multilabel model on
+ This is a simple synchronous datalayer for training a multilabel model on
PASCAL.
"""
@@ -33,7 +33,7 @@ class PascalMultilabelDataLayerSync(caffe.Layer):
# params is a python dictionary with layer parameters.
params = eval(self.param_str)
- # Check the paramameters for validity.
+ # Check the parameters for validity.
check_params(params)
# store input as class variables
@@ -207,7 +207,7 @@ def check_params(params):
def print_info(name, params):
"""
- Ouput some info regarding the class
+ Output some info regarding the class
"""
print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(
name,
diff --git a/examples/pycaffe/tools.py b/examples/pycaffe/tools.py
index 88b1834..7f6c2d8 100644
--- a/examples/pycaffe/tools.py
+++ b/examples/pycaffe/tools.py
@@ -26,7 +26,7 @@ class SimpleTransformer:
def preprocess(self, im):
"""
- preprocess() emulate the pre-processing occuring in the vgg16 caffe
+ preprocess() emulate the pre-processing occurring in the vgg16 caffe
prototxt.
"""
@@ -75,7 +75,7 @@ class CaffeSolver:
# looks:
self.sp['display'] = '25'
self.sp['snapshot'] = '2500'
- self.sp['snapshot_prefix'] = '"snapshot"' # string withing a string!
+ self.sp['snapshot_prefix'] = '"snapshot"' # string within a string!
# learning rate policy
self.sp['lr_policy'] = '"fixed"'
diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp
index fe2adb9..a9ad322 100644
--- a/include/caffe/layers/accuracy_layer.hpp
+++ b/include/caffe/layers/accuracy_layer.hpp
@@ -39,7 +39,7 @@ class AccuracyLayer : public Layer<Dtype> {
// If there are two top blobs, then the second blob will contain
// accuracies per class.
virtual inline int MinTopBlobs() const { return 1; }
- virtual inline int MaxTopBlos() const { return 2; }
+ virtual inline int MaxTopBlobs() const { return 2; }
protected:
/**
diff --git a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
index 598dca5..3d92524 100644
--- a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
+++ b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
@@ -59,6 +59,8 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
/// @copydoc SigmoidCrossEntropyLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ const vector<Blob<Dtype>*>& top);
/**
* @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the
@@ -95,6 +97,13 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+ /// Read the normalization mode parameter and compute the normalizer based
+ /// on the blob size. If normalization_mode is VALID, the count of valid
+ /// outputs will be read from valid_count, unless it is -1 in which case
+ /// all outputs are assumed to be valid.
+ virtual Dtype get_normalizer(
+ LossParameter_NormalizationMode normalization_mode, int valid_count);
+
/// The internal SigmoidLayer used to map predictions to probabilities.
shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
/// sigmoid_output stores the output of the SigmoidLayer.
@@ -103,6 +112,15 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
vector<Blob<Dtype>*> sigmoid_bottom_vec_;
/// top vector holder to call the underlying SigmoidLayer::Forward
vector<Blob<Dtype>*> sigmoid_top_vec_;
+
+ /// Whether to ignore instances with a certain label.
+ bool has_ignore_label_;
+ /// The label indicating that an instance should be ignored.
+ int ignore_label_;
+ /// How to normalize the loss.
+ LossParameter_NormalizationMode normalization_;
+ Dtype normalizer_;
+ int outer_num_, inner_num_;
};
} // namespace caffe
diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp
index 1b1b2bf..4e466e6 100644
--- a/matlab/+caffe/private/caffe_.cpp
+++ b/matlab/+caffe/private/caffe_.cpp
@@ -44,7 +44,7 @@ void mxCHECK_FILE_EXIST(const char* file) {
// The pointers to caffe::Solver and caffe::Net instances
static vector<shared_ptr<Solver<float> > > solvers_;
static vector<shared_ptr<Net<float> > > nets_;
-// init_key is generated at the beginning and everytime you call reset
+// init_key is generated at the beginning and every time you call reset
static double init_key = static_cast<double>(caffe_rng_rand());
/** -----------------------------------------------------------------
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index f420df8..987730d 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -20,7 +20,7 @@ if(NOT BUILD_SHARED_LIBS AND build_using MATCHES Matlab)
message(FATAL_ERROR "Matlab MEX interface (with default mex options file) can only be built if caffe is compiled as shared library. Please enable 'BUILD_SHARED_LIBS' in CMake. Aternativelly you can switch to Octave compiler.")
endif()
-# helper function to set proper mex file extention
+# helper function to set proper mex file extension
function(caffe_fetch_and_set_proper_mexext mexfile_variable)
execute_process(COMMAND ${Matlab_mexext} OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE res OUTPUT_VARIABLE ext)
if(res MATCHES 0)
diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py
index 14c76ec..6ec4fb7 100755
--- a/scripts/cpp_lint.py
+++ b/scripts/cpp_lint.py
@@ -4460,7 +4460,7 @@ def UpdateIncludeState(filename, include_state, io=codecs):
io: The io factory to use to read the file. Provided for testability.
Returns:
- True if a header was succesfully added. False otherwise.
+ True if a header was successfully added. False otherwise.
"""
headerfile = None
try:
@@ -4532,7 +4532,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
# Let's copy the include_state so it is only messed up within this function.
include_state = include_state.copy()
- # Did we find the header for this file (if any) and succesfully load it?
+ # Did we find the header for this file (if any) and successfully load it?
header_found = False
# Use the absolute path so that matching works properly.
@@ -4833,7 +4833,7 @@ def ParseArguments(args):
try:
_valid_extensions = set(val.split(','))
except ValueError:
- PrintUsage('Extensions must be comma seperated list.')
+ PrintUsage('Extensions must be comma separated list.')
if not filenames:
PrintUsage('No files were specified.')
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index daef5c4..1900b16 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -84,7 +84,7 @@ if $WITH_CUDA ; then
rm $CUDA_REPO_PKG
if $WITH_CUDNN ; then
- ML_REPO_PKG=nvidia-machine-learning-repo_4.0-2_amd64.deb
+ ML_REPO_PKG=nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb
wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/$ML_REPO_PKG
dpkg -i $ML_REPO_PKG
fi
diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index e967bd6..f14253a 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -67,6 +67,7 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -104,6 +105,7 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -141,6 +143,7 @@ shared_ptr<Layer<Dtype> > GetLRNLayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -164,6 +167,7 @@ shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -187,6 +191,7 @@ shared_ptr<Layer<Dtype> > GetSigmoidLayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -210,6 +215,7 @@ shared_ptr<Layer<Dtype> > GetSoftmaxLayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
@@ -233,6 +239,7 @@ shared_ptr<Layer<Dtype> > GetTanHLayer(const LayerParameter& param) {
#endif
} else {
LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+ throw; // Avoids missing return warning
}
}
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index e661abb..0a08ed4 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -27,7 +27,7 @@ void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
sz.push_back(channels_);
this->blobs_[0].reset(new Blob<Dtype>(sz));
this->blobs_[1].reset(new Blob<Dtype>(sz));
- sz[0]=1;
+ sz[0] = 1;
this->blobs_[2].reset(new Blob<Dtype>(sz));
for (int i = 0; i < 3; ++i) {
caffe_set(this->blobs_[i]->count(), Dtype(0),
@@ -61,7 +61,7 @@ void BatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
variance_.Reshape(sz);
temp_.ReshapeLike(*bottom[0]);
x_norm_.ReshapeLike(*bottom[0]);
- sz[0]=bottom[0]->shape(0);
+ sz[0] = bottom[0]->shape(0);
batch_sum_multiplier_.Reshape(sz);
int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));
diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index aecdcd6..d36b61c 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -85,7 +85,7 @@ void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
src_data, dest_data, is_forward);
}
} else {
- // We are at the last dimensions, which is stored continously in memory
+ // We are at the last dimensions, which is stored continuously in memory
for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
// prepare index vector reduced(red) and with offsets(off)
std::vector<int> ind_red(cur_dim, 0);
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index f78cecb..1ea1325 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -39,10 +39,10 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
src_data, dest_data, is_forward);
}
} else {
- // We are at the last two dimensions, which are stored continously in memory
- // With (N,C,H,W)
- // (0,1,2,3) cur_dim -> H
- // cur_dim+1 -> W
+ // We are at the last two dimensions, which are stored continuously in
+ // memory. With (N,C,H,W)
+ // (0,1,2,3) cur_dim -> H
+ // cur_dim+1 -> W
const int lines = top[0]->shape(cur_dim);
const int height = top[0]->shape(cur_dim);
const int width = top[0]->shape(cur_dim+1);
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 2f13dc6..c957451 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -61,10 +61,10 @@ void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
// Shuffle if needed.
if (this->layer_param_.hdf5_data_param().shuffle()) {
std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
- DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)
+ DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0)
<< " rows (shuffled)";
} else {
- DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";
+ DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0) << " rows";
}
}
diff --git a/src/caffe/layers/rnn_layer.cpp b/src/caffe/layers/rnn_layer.cpp
index f62ae8c..8c2fa22 100644
--- a/src/caffe/layers/rnn_layer.cpp
+++ b/src/caffe/layers/rnn_layer.cpp
@@ -215,7 +215,7 @@ void RNNLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
}
// Add layers to compute
- // o_t := \tanh( W_ho h_t + b_o)
+ // o_t := \tanh( W_ho * h_t + b_o)
// = \tanh( W_ho_h_t )
{
LayerParameter* o_neuron_param = net_param->add_layer();
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
index 10ac947..99fa3eb 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
#include <vector>
#include "caffe/layers/sigmoid_cross_entropy_loss_layer.hpp"
@@ -14,17 +15,66 @@ void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(
sigmoid_top_vec_.clear();
sigmoid_top_vec_.push_back(sigmoid_output_.get());
sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);
+
+ has_ignore_label_ =
+ this->layer_param_.loss_param().has_ignore_label();
+ if (has_ignore_label_) {
+ ignore_label_ = this->layer_param_.loss_param().ignore_label();
+ }
+ if (this->layer_param_.loss_param().has_normalization()) {
+ normalization_ = this->layer_param_.loss_param().normalization();
+ } else if (this->layer_param_.loss_param().has_normalize()) {
+ normalization_ = this->layer_param_.loss_param().normalize() ?
+ LossParameter_NormalizationMode_VALID :
+ LossParameter_NormalizationMode_BATCH_SIZE;
+ } else {
+ normalization_ = LossParameter_NormalizationMode_BATCH_SIZE;
+ }
}
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
+ outer_num_ = bottom[0]->shape(0); // batch size
+ inner_num_ = bottom[0]->count(1); // instance size: |output| == |target|
CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
"SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);
}
+// TODO(shelhamer) loss normalization should be pulled up into LossLayer,
+// instead of duplicated here and in SoftMaxWithLossLayer
+template <typename Dtype>
+Dtype SigmoidCrossEntropyLossLayer<Dtype>::get_normalizer(
+ LossParameter_NormalizationMode normalization_mode, int valid_count) {
+ Dtype normalizer;
+ switch (normalization_mode) {
+ case LossParameter_NormalizationMode_FULL:
+ normalizer = Dtype(outer_num_ * inner_num_);
+ break;
+ case LossParameter_NormalizationMode_VALID:
+ if (valid_count == -1) {
+ normalizer = Dtype(outer_num_ * inner_num_);
+ } else {
+ normalizer = Dtype(valid_count);
+ }
+ break;
+ case LossParameter_NormalizationMode_BATCH_SIZE:
+ normalizer = Dtype(outer_num_);
+ break;
+ case LossParameter_NormalizationMode_NONE:
+ normalizer = Dtype(1);
+ break;
+ default:
+ LOG(FATAL) << "Unknown normalization mode: "
+ << LossParameter_NormalizationMode_Name(normalization_mode);
+ }
+ // Some users will have no labels for some examples in order to 'turn off' a
+ // particular loss in a multi-task setup. The max prevents NaNs in that case.
+ return std::max(Dtype(1.0), normalizer);
+}
+
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
@@ -32,17 +82,22 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
sigmoid_bottom_vec_[0] = bottom[0];
sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
// Compute the loss (negative log likelihood)
- const int count = bottom[0]->count();
- const int num = bottom[0]->num();
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
+ int valid_count = 0;
Dtype loss = 0;
- for (int i = 0; i < count; ++i) {
+ for (int i = 0; i < bottom[0]->count(); ++i) {
+ const int target_value = static_cast<int>(target[i]);
+ if (has_ignore_label_ && target_value == ignore_label_) {
+ continue;
+ }
loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
+ ++valid_count;
}
- top[0]->mutable_cpu_data()[0] = loss / num;
+ normalizer_ = get_normalizer(normalization_, valid_count);
+ top[0]->mutable_cpu_data()[0] = loss / normalizer_;
}
template <typename Dtype>
@@ -56,19 +111,27 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
if (propagate_down[0]) {
// First, compute the diff
const int count = bottom[0]->count();
- const int num = bottom[0]->num();
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
caffe_sub(count, sigmoid_output_data, target, bottom_diff);
+ // Zero out gradient of ignored targets.
+ if (has_ignore_label_) {
+ for (int i = 0; i < count; ++i) {
+ const int target_value = static_cast<int>(target[i]);
+ if (target_value == ignore_label_) {
+ bottom_diff[i] = 0;
+ }
+ }
+ }
// Scale down gradient
- const Dtype loss_weight = top[0]->cpu_diff()[0];
- caffe_scal(count, loss_weight / num, bottom_diff);
+ Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
+ caffe_scal(count, loss_weight, bottom_diff);
}
}
#ifdef CPU_ONLY
-STUB_GPU_BACKWARD(SigmoidCrossEntropyLossLayer, Backward);
+STUB_GPU(SigmoidCrossEntropyLossLayer);
#endif
INSTANTIATE_CLASS(SigmoidCrossEntropyLossLayer);
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
index 046cb9d..b9877e6 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
@@ -5,6 +5,72 @@
namespace caffe {
+
+template <typename Dtype>
+__global__ void SigmoidCrossEntropyLossForwardGPU(const int nthreads,
+ const Dtype* input_data, const Dtype* target, Dtype* loss,
+ const bool has_ignore_label_, const int ignore_label_,
+ Dtype* counts) {
+ CUDA_KERNEL_LOOP(i, nthreads) {
+ const int target_value = static_cast<int>(target[i]);
+ if (has_ignore_label_ && target_value == ignore_label_) {
+ loss[i] = 0;
+ counts[i] = 0;
+ } else {
+ loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
+ log(1 + exp(input_data[i] - 2 * input_data[i] *
+ (input_data[i] >= 0)));
+ counts[i] = 1;
+ }
+ }
+}
+
+template <typename Dtype>
+__global__ void SigmoidCrossEntropyLossIgnoreDiffGPU(const int count,
+ const int ignore_label, const Dtype* target, Dtype* diff) {
+ CUDA_KERNEL_LOOP(i, count) {
+ const int target_value = static_cast<int>(target[i]);
+ if (target_value == ignore_label) {
+ diff[i] = 0;
+ }
+ }
+}
+
+
+template <typename Dtype>
+void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
+ const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+ // The forward pass computes the sigmoid outputs.
+ sigmoid_bottom_vec_[0] = bottom[0];
+ sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
+ // Compute the loss (negative log likelihood)
+ const int count = bottom[0]->count();
+ // Stable version of loss computation from input data
+ const Dtype* input_data = bottom[0]->gpu_data();
+ const Dtype* target = bottom[1]->gpu_data();
+ // Since this memory is not used for anything until it is overwritten
+ // on the backward pass, we use it here to avoid having to allocate new GPU
+ // memory to accumulate intermediate results in the kernel.
+ Dtype* loss_data = bottom[0]->mutable_gpu_diff();
+ Dtype* count_data = bottom[1]->mutable_gpu_diff();
+ Dtype valid_count;
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ SigmoidCrossEntropyLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+ CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data,
+ has_ignore_label_, ignore_label_, count_data);
+ // Only launch another CUDA kernel if we actually need the valid count.
+ if (normalization_ == LossParameter_NormalizationMode_VALID &&
+ has_ignore_label_) {
+ caffe_gpu_asum(count, count_data, &valid_count);
+ } else {
+ valid_count = count;
+ }
+ Dtype loss;
+ caffe_gpu_asum(count, loss_data, &loss);
+ normalizer_ = get_normalizer(normalization_, valid_count);
+ top[0]->mutable_cpu_data()[0] = loss / normalizer_;
+}
+
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
@@ -16,19 +82,23 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
if (propagate_down[0]) {
// First, compute the diff
const int count = bottom[0]->count();
- const int num = bottom[0]->num();
const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
caffe_copy(count, sigmoid_output_data, bottom_diff);
caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
+ // Zero out gradient of ignored targets.
+ if (has_ignore_label_) {
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ SigmoidCrossEntropyLossIgnoreDiffGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+ CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, bottom_diff);
+ }
// Scale down gradient
- const Dtype loss_weight = top[0]->cpu_diff()[0];
- caffe_gpu_scal(count, loss_weight / num, bottom_diff);
+ Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
+ caffe_gpu_scal(count, loss_weight, bottom_diff);
}
}
-INSTANTIATE_LAYER_GPU_BACKWARD(SigmoidCrossEntropyLossLayer);
-
+INSTANTIATE_LAYER_GPU_FUNCS(SigmoidCrossEntropyLossLayer);
} // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 6940a70..430a0de 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -418,7 +418,7 @@ message TransformationParameter {
optional uint32 crop_size = 3 [default = 0];
// mean_file and mean_value cannot be specified at the same time
optional string mean_file = 4;
- // if specified can be repeated once (would substract it from all the channels)
+ // if specified can be repeated once (would subtract it from all the channels)
// or can be repeated the same number of times as channels
// (would subtract them from the corresponding channel)
repeated float mean_value = 5;
@@ -434,7 +434,7 @@ message LossParameter {
optional int32 ignore_label = 1;
// How to normalize the loss for loss layers that aggregate across batches,
// spatial dimensions, or other dimensions. Currently only implemented in
- // SoftmaxWithLoss layer.
+ // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
enum NormalizationMode {
// Divide by the number of examples in the batch times spatial dimensions.
// Outputs that receive the ignore label will NOT be ignored in computing
@@ -448,6 +448,8 @@ message LossParameter {
// Do not normalize the loss.
NONE = 3;
}
+ // For historical reasons, the default normalization for
+ // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
optional NormalizationMode normalization = 3 [default = VALID];
// Deprecated. Ignored if normalization is specified. If normalization
// is not specified, then setting this to false will be equivalent to
@@ -1394,6 +1396,6 @@ message PReLUParameter {
// Initial value of a_i. Default is a_i=0.25 for all i.
optional FillerParameter filler = 1;
- // Whether or not slope paramters are shared across channels.
+ // Whether or not slope parameters are shared across channels.
optional bool channel_shared = 2 [default = false];
}
diff --git a/src/caffe/test/CMakeLists.txt b/src/caffe/test/CMakeLists.txt
index 35a803f..d8afc30 100644
--- a/src/caffe/test/CMakeLists.txt
+++ b/src/caffe/test/CMakeLists.txt
@@ -1,7 +1,7 @@
# The option allows to include in build only selected test files and exclude all others
# Usage example:
# cmake -DBUILD_only_tests="common,net,blob,im2col_kernel"
-set(BUILD_only_tests "" CACHE STRING "Blank or comma-separated list of test files to build without 'test_' prefix and extention")
+set(BUILD_only_tests "" CACHE STRING "Blank or comma-separated list of test files to build without 'test_' prefix and extension")
caffe_leave_only_selected_tests(test_srcs ${BUILD_only_tests})
caffe_leave_only_selected_tests(test_cuda ${BUILD_only_tests})
diff --git a/src/caffe/test/test_euclidean_loss_layer.cpp b/src/caffe/test/test_euclidean_loss_layer.cpp
index f253f9f..b026f5b 100644
--- a/src/caffe/test/test_euclidean_loss_layer.cpp
+++ b/src/caffe/test/test_euclidean_loss_layer.cpp
@@ -39,7 +39,7 @@ class EuclideanLossLayerTest : public MultiDeviceTest<TypeParam> {
void TestForward() {
// Get the loss without a specified objective weight -- should be
- // equivalent to explicitly specifiying a weight of 1.
+ // equivalent to explicitly specifying a weight of 1.
LayerParameter layer_param;
EuclideanLossLayer<Dtype> layer_weight_1(layer_param);
layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
diff --git a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
index 5dfd765..1bd5f93 100644
--- a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
@@ -116,5 +116,33 @@ TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestGradient) {
this->blob_top_vec_, 0);
}
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestIgnoreGradient) {
+ typedef typename TypeParam::Dtype Dtype;
+ FillerParameter data_filler_param;
+ data_filler_param.set_std(1);
+ GaussianFiller<Dtype> data_filler(data_filler_param);
+ data_filler.Fill(this->blob_bottom_data_);
+ LayerParameter layer_param;
+ LossParameter* loss_param = layer_param.mutable_loss_param();
+ loss_param->set_ignore_label(-1);
+ Dtype* target = this->blob_bottom_targets_->mutable_cpu_data();
+ const int count = this->blob_bottom_targets_->count();
+ // Ignore half of targets, then check that diff of this half is zero,
+ // while the other half is nonzero.
+ caffe_set(count / 2, Dtype(-1), target);
+ SigmoidCrossEntropyLossLayer<Dtype> layer(layer_param);
+ layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+ layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+ vector<bool> propagate_down(2);
+ propagate_down[0] = true;
+ propagate_down[1] = false;
+ layer.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+ const Dtype* diff = this->blob_bottom_data_->cpu_diff();
+ for (int i = 0; i < count / 2; ++i) {
+ EXPECT_FLOAT_EQ(diff[i], 0.);
+ EXPECT_NE(diff[i + count / 2], 0.);
+ }
+}
+
} // namespace caffe
diff --git a/src/gtest/gtest-all.cpp b/src/gtest/gtest-all.cpp
index 9261974..81cdb57 100644
--- a/src/gtest/gtest-all.cpp
+++ b/src/gtest/gtest-all.cpp
@@ -2697,7 +2697,7 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT
// Utility functions for encoding Unicode text (wide strings) in
// UTF-8.
-// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
// like this:
//
// Code-point length Encoding
@@ -7550,7 +7550,7 @@ FilePath FilePath::RemoveExtension(const char* extension) const {
return *this;
}
-// Returns a pointer to the last occurence of a valid path separator in
+// Returns a pointer to the last occurrence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FilePath::FindLastPathSeparator() const {
diff --git a/src/gtest/gtest.h b/src/gtest/gtest.h
index 3143bd6..124fb23 100644
--- a/src/gtest/gtest.h
+++ b/src/gtest/gtest.h
@@ -3395,7 +3395,7 @@ class GTEST_API_ FilePath {
void Normalize();
- // Returns a pointer to the last occurence of a valid path separator in
+ // Returns a pointer to the last occurrence of a valid path separator in
// the FilePath. On Windows, for example, both '/' and '\' are valid path
// separators. Returns NULL if no path separator was found.
const char* FindLastPathSeparator() const;
diff --git a/tools/extra/parse_log.py b/tools/extra/parse_log.py
index 375b0db..017306b 100755
--- a/tools/extra/parse_log.py
+++ b/tools/extra/parse_log.py
@@ -48,8 +48,13 @@ def parse_log(path_to_log):
# iteration
continue
- time = extract_seconds.extract_datetime_from_line(line,
- logfile_year)
+ try:
+ time = extract_seconds.extract_datetime_from_line(line,
+ logfile_year)
+ except ValueError:
+ # Skip lines with bad formatting, for example when resuming solver
+ continue
+
seconds = (time - start_time).total_seconds()
learning_rate_match = regex_learning_rate.search(line)
diff --git a/tools/extra/plot_log.gnuplot.example b/tools/extra/plot_log.gnuplot.example
index 748b96e..02c68e1 100644
--- a/tools/extra/plot_log.gnuplot.example
+++ b/tools/extra/plot_log.gnuplot.example
@@ -4,7 +4,7 @@
# Be warned that the fields in the training log may change in the future.
# You had better check the data files before designing your own plots.
-# Please generate the neccessary data files with
+# Please generate the necessary data files with
# /path/to/caffe/tools/extra/parse_log.sh before plotting.
# Example usage:
# ./parse_log.sh mnist.log
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/caffe.git
More information about the debian-science-commits
mailing list