[lua-torch-nn] 01/05: Imported Upstream version 0~20160812-g461701f+dfsg
Zhou Mo
cdluminate-guest at moszumanska.debian.org
Sat Aug 13 00:44:11 UTC 2016
This is an automated email from the git hooks/post-receive script.
cdluminate-guest pushed a commit to branch master
in repository lua-torch-nn.
commit 2b6ee42f780f5795cc1a0bf679dd3e29d0adb88f
Author: Zhou Mo <cdluminate at gmail.com>
Date: Sat Aug 13 00:28:15 2016 +0000
Imported Upstream version 0~20160812-g461701f+dfsg
---
BatchNormalization.lua | 23 +-
Bottle.lua | 65 +++
GPU.lua | 273 +++++++++++++
HardTanh.lua | 12 +-
Index.lua | 9 +-
LookupTable.lua | 10 +-
MarginRankingCriterion.lua | 28 +-
Narrow.lua | 10 +-
ReLU6.lua | 32 ++
Select.lua | 10 +-
SpatialDilatedConvolution.lua | 10 +-
...lMaxPooling.lua => SpatialDilatedMaxPooling.lua | 53 +--
SpatialFullConvolution.lua | 23 +-
SpatialMaxPooling.lua | 2 +
SpatialUpSamplingBilinear.lua | 111 ++++++
VolumetricDilatedConvolution.lua | 103 +++++
VolumetricReplicationPadding.lua | 58 +++
doc/containers.md | 32 ++
doc/convolution.md | 121 +++++-
doc/criterion.md | 2 +-
doc/image/relu6.png | Bin 0 -> 20076 bytes
doc/module.md | 4 +-
doc/simple.md | 48 +++
doc/transfer.md | 27 +-
hessian.lua | 2 +-
init.lua | 8 +
lib/THNN/generic/ClassNLLCriterion.c | 15 +-
lib/THNN/generic/HardTanh.c | 113 ++++--
lib/THNN/generic/LookupTable.c | 16 +-
lib/THNN/generic/MultiLabelMarginCriterion.c | 6 +-
lib/THNN/generic/MultiMarginCriterion.c | 6 +-
lib/THNN/generic/SpatialAdaptiveMaxPooling.c | 18 +-
lib/THNN/generic/SpatialClassNLLCriterion.c | 12 +-
lib/THNN/generic/SpatialConvolutionMM.c | 4 +
lib/THNN/generic/SpatialConvolutionMap.c | 18 +-
lib/THNN/generic/SpatialDilatedConvolution.c | 2 +
lib/THNN/generic/SpatialFractionalMaxPooling.c | 6 +-
lib/THNN/generic/SpatialFullConvolution.c | 43 +-
lib/THNN/generic/SpatialFullConvolutionMap.c | 18 +-
lib/THNN/generic/SpatialMaxPooling.c | 52 ++-
lib/THNN/generic/SpatialMaxUnpooling.c | 16 +-
lib/THNN/generic/SpatialUpSamplingBilinear.c | 127 ++++++
lib/THNN/generic/THNN.h | 223 +++++++----
lib/THNN/generic/VolumetricConvolutionMM.c | 4 +
lib/THNN/generic/VolumetricDilatedConvolution.c | 356 +++++++++++++++++
lib/THNN/generic/VolumetricFullConvolution.c | 39 +-
lib/THNN/generic/VolumetricReplicationPadding.c | 301 ++++++++++++++
lib/THNN/init.c | 9 +
test.lua | 442 ++++++++++++++++++---
49 files changed, 2535 insertions(+), 387 deletions(-)
diff --git a/BatchNormalization.lua b/BatchNormalization.lua
index ac42749..578f441 100644
--- a/BatchNormalization.lua
+++ b/BatchNormalization.lua
@@ -74,12 +74,15 @@ function BN:reset()
end
function BN:checkInputDim(input)
- assert(input:dim() == self.nDim, string.format(
+ local iDim = input:dim()
+ assert(iDim == self.nDim or
+ (iDim == self.nDim - 1 and self.train == false), string.format(
'only mini-batch supported (%dD tensor), got %dD tensor instead',
- self.nDim, input:dim()))
- assert(input:size(2) == self.running_mean:nElement(), string.format(
+ self.nDim, iDim))
+ local featDim = (iDim == self.nDim - 1) and 1 or 2
+ assert(input:size(featDim) == self.running_mean:nElement(), string.format(
'got %d-feature tensor, expected %d',
- input:size(2), self.running_mean:nElement()))
+ input:size(featDim), self.running_mean:nElement()))
end
local function makeContiguous(self, input, gradOutput)
@@ -98,10 +101,20 @@ local function makeContiguous(self, input, gradOutput)
return input, gradOutput
end
+local function makeBatch(self, input)
+ local iDim = input:dim()
+ if self.train == false and iDim == self.nDim - 1 then
+ return nn.utils.addSingletonDimension(input, input, 1)
+ else
+ return input
+ end
+end
+
function BN:updateOutput(input)
self:checkInputDim(input)
input = makeContiguous(self, input)
+ input = makeBatch(self, input)
self.output:resizeAs(input)
self.save_mean = self.save_mean or input.new()
@@ -131,6 +144,8 @@ local function backward(self, input, gradOutput, scale, gradInput, gradWeight, g
assert(self.save_mean and self.save_std, 'must call :updateOutput() first')
input, gradOutput = makeContiguous(self, input, gradOutput)
+ input = makeBatch(self, input)
+ gradOutput = makeBatch(self, gradOutput)
scale = scale or 1
if gradInput then
diff --git a/Bottle.lua b/Bottle.lua
new file mode 100644
index 0000000..6934bff
--- /dev/null
+++ b/Bottle.lua
@@ -0,0 +1,65 @@
+local Bottle, parent = torch.class("nn.Bottle", "nn.Container")
+local unpack = unpack or table.unpack
+
+function Bottle:__init(module, nInputDim, nOutputDim)
+ parent.__init(self)
+ self.nInputDim = nInputDim or 2
+ self.nOutputDim = nOutputDim or self.nInputDim
+ self.dimDelta = self.nInputDim - self.nOutputDim
+ -- Used to reshape the gradients
+ self.inShape = torch.Tensor(self.nInputDim)
+ self.outShape = torch.Tensor(self.nOutputDim)
+ -- add module to modules
+ self.modules[1] = module
+end
+
+function Bottle:updateOutput(input)
+ -- first batchDims dimensions will be fused
+ local batchDims = input:dim() - self.nInputDim + 1
+ -- see if bottle is required
+ if batchDims > 1 then
+ -- bottle the first dims
+ local inSize = torch.LongTensor(input:size())
+ local squeezeSize = inSize[{{1, batchDims - 1}}]:prod()
+ self.inShape:copy(inSize[{{batchDims, input:dim()}}])
+ self.inShape[{{1}}]:mul(squeezeSize)
+ -- Forward with the module's dimension
+ local newInput = input:view(unpack(self.inShape:totable()))
+ local output = self.modules[1]:updateOutput(newInput)
+ assert(output:dim() == self.nOutputDim,
+ "Wrong number of output dims on module. Expected: " ..
+ self.nOutputDim .. ' but got ' ..
+ tostring(output and output:dim()))
+ self.outShape:copy(torch.LongTensor(output:size()))
+ if math.abs(self.dimDelta) > 0 then
+ inSize:resize(inSize:size(1) - self.dimDelta)
+ end
+ inSize[{{batchDims, inSize:size(1)}}]:copy(self.outShape)
+ inSize[{{batchDims}}]:div(squeezeSize)
+ -- unbottle
+ self.output:set(output:view(unpack(torch.totable(inSize))))
+ else
+ self.output:set(self.modules[1]:updateOutput(input))
+ end
+ return self.output
+end
+
+function Bottle:updateGradInput(input, gradOutput)
+ if input:dim() > self.nInputDim then
+ local input_ = input:view(unpack(self.inShape:totable()))
+ local gradOutput_ = gradOutput:view(unpack(self.outShape:totable()))
+ self.modules[1]:updateGradInput(input_, gradOutput_)
+ self.gradInput:set(self.modules[1].gradInput:viewAs(input))
+ else
+ self.gradInput:set(self.modules[1]:updateGradInput(input))
+ end
+ return self.gradInput
+end
+
+function Bottle:accGradParameters(input, gradOutput, scale)
+ if input:dim() > self.nInputDim then
+ input = input:view(unpack(self.inShape:totable()))
+ gradOutput = gradOutput:view(unpack(self.outShape:totable()))
+ end
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+end
diff --git a/GPU.lua b/GPU.lua
new file mode 100644
index 0000000..3150236
--- /dev/null
+++ b/GPU.lua
@@ -0,0 +1,273 @@
+------------------------------------------------------------------------
+--[[ GPU ]]--
+-- Decorates a module such that its parameters are
+-- hosted on a specified GPU device.
+-- The operations are also executed on that device.
+-- Arguments input and gradOutput are converted to the specified device
+-- before being fed to the decorated module.
+-- Returned output is on the specified outdevice (defaults to device).
+-- Returned gradInput is allocated on the same device as the input.
+-- The unit test is located in cunn.
+------------------------------------------------------------------------
+local GPU, parent = torch.class("nn.GPU", "nn.Container")
+
+function GPU:__init(module, device, outdevice)
+ parent.__init(self)
+ assert(torch.type(device) == 'number')
+ self.device = device
+ self.outdevice = outdevice or device
+
+ assert(torch.isTypeOf(module, 'nn.Module'))
+ self.modules[1] = module
+
+ if module:type() == 'torch.CudaTensor' then
+ self:cuda()
+ end
+end
+
+function GPU.recursiveModuleDevice(obj, device)
+ if type(obj) == 'table' and not torch.isTypeOf(obj, 'nn.GPU') and not obj.__noGPU__ then
+ for k,v in pairs(obj) do
+ obj[k] = GPU.recursiveModuleDevice(v, device)
+ end
+ elseif torch.type(obj):match('torch.Cuda.*Tensor') then
+ if obj:getDevice() ~= device then
+ obj = obj:clone() -- this will reallocate it to device
+ local newdevice = obj:getDevice()
+ -- when nElement() == 0 newdevice is 0
+ assert(newdevice == device or newdevice == 0)
+ end
+ end
+ assert(obj ~= nil)
+ return obj
+end
+
+-- set the device of the decorated module
+function GPU:setDevice(device)
+ self.device = device or self.device
+
+ assert(self.modules[1])
+ self.modules[1] = cutorch.withDevice(self.device, function()
+ return self.recursiveModuleDevice(self.modules[1], self.device)
+ end)
+ return self
+end
+
+-- when proto is a device number, returns a dst that has device device for each element in src
+-- otherwise, if proto is a table/tensor, makes sure dst is a identical to src, yet on the same device as proto
+function GPU.recursiveSetDevice(dst, src, proto)
+ local device, prototable
+ if torch.isTensor(proto) then
+ device = proto:getDevice()
+ elseif torch.type(proto) == 'number' then
+ device = proto
+ elseif torch.type(proto) == 'table' then
+ prototable = true
+ else
+ error"Expecting number, table or tensor for arg 3 (proto)"
+ end
+ if torch.type(src) == 'table' then
+ dst = torch.type(dst) == 'table' and dst or {}
+ for k,v in ipairs(src) do
+ dst[k] = GPU.recursiveSetDevice(dst[k], v, prototable and proto[k] or device)
+ end
+ for k=#src+1,#dst do
+ dst[k] = nil
+ end
+ elseif torch.type(src):match('torch.Cuda.*Tensor') and src:getDevice() ~= device and src:getDevice() ~= 0 then
+ if not (torch.type(dst):match('torch.Cuda.*Tensor') and dst:getDevice() == device) then
+ dst = src.new()
+ end
+ cutorch.withDevice(device, function() dst:resizeAs(src):copy(src) end)
+ else
+ dst = src
+ end
+ return dst
+end
+
+function GPU:updateOutput(input)
+ if self._type == 'torch.CudaTensor' then
+ self._input = self.recursiveSetDevice(self._input, input, self.device)
+
+ local output = cutorch.withDevice(self.device, function()
+ return self.modules[1]:updateOutput(self._input)
+ end)
+
+ if self.device ~= self.outdevice then
+ self.output = self.recursiveSetDevice(self.output, output, self.outdevice)
+ else
+ self.output = output
+ end
+ else
+ self.output = self.modules[1]:updateOutput(input)
+ end
+
+ return self.output
+end
+
+function GPU:updateGradInput(input, gradOutput)
+ if self._type == 'torch.CudaTensor' then
+ self._gradOutput = self.recursiveSetDevice(self._gradOutput, gradOutput, self.device)
+
+ local gradInput = cutorch.withDevice(self.device, function()
+ return self.modules[1]:updateGradInput(self._input, self._gradOutput)
+ end)
+
+ self.gradInput = self.recursiveSetDevice(self.gradInput, gradInput, input)
+ else
+ self.gradInput = self.modules[1]:updateGradInput(input, gradOutput)
+ end
+
+ return self.gradInput
+end
+
+function GPU:accGradParameters(input, gradOutput, scale)
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function()
+ self.modules[1]:accGradParameters(self._input, self._gradOutput, scale)
+ end)
+ else
+ self.modules[1]:accGradParameters(input, gradOutput, scale)
+ end
+end
+
+function GPU:apply(callback)
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.apply(self, callback) end)
+ else
+ parent.apply(self, callback)
+ end
+end
+
+function GPU:type(type, typecache)
+ if type and type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.type(self, type, typecache) end)
+ self:setDevice()
+ else
+ self.output = nil
+ self.gradInput = nil
+ self._input = nil
+ self._gradOutput = nil
+ parent.type(self, type, typecache)
+ end
+ return self
+end
+
+function GPU:clearState()
+ nn.utils.clear(self, 'output', 'gradInput')
+ self._input = nil
+ self._gradOutput = nil
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.clearState(self) end)
+ else
+ parent.clearState(self)
+ end
+end
+
+function GPU:zeroGradParameters()
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.zeroGradParameters(self) end)
+ else
+ parent.zeroGradParameters(self)
+ end
+end
+
+function GPU:updateParameters(lr)
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.updateParameters(self, lr) end)
+ else
+ parent.updateParameters(self, lr)
+ end
+end
+
+function GPU:training()
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.training(self) end)
+ else
+ parent.training(self)
+ end
+end
+
+function GPU:evaluate()
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.evaluate(self) end)
+ else
+ parent.evaluate(self)
+ end
+end
+
+function GPU:share(mlp, ...)
+ local args = {...}
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.share(self, mlp, unpack(args)) end)
+ else
+ parent.share(self, mlp, unpack(args))
+ end
+ return self
+end
+
+function GPU:reset(...)
+ local args = {...}
+ if self._type == 'torch.CudaTensor' then
+ cutorch.withDevice(self.device, function() parent.reset(self, unpack(args)) end)
+ else
+ parent.reset(self, unpack(args))
+ end
+ return self
+end
+
+function GPU:clone(...)
+ local args = {...}
+ if self._type == 'torch.CudaTensor' then
+ return cutorch.withDevice(self.device, function() parent.clone(self, unpack(args)) end)
+ else
+ return parent.clone(self, unpack(args))
+ end
+end
+
+function GPU:write(file)
+ -- Write all values in the object as a table.
+ local object = {}
+ for k, v in pairs(self) do
+ object[k] = v
+ end
+ local header = {self._type, self.device}
+ file:writeObject(header)
+ file:writeObject(object)
+end
+
+function GPU:read(file)
+ local header = file:readObject()
+ local object
+ if header[1] == 'torch.CudaTensor' then
+ local device = header[2]
+ if device > cutorch.getDeviceCount() then
+ print"Warning : model was saved with more devices than available on current host."
+ print"Attempting to load module onto device 1"
+ device = 1
+ end
+ object = cutorch.withDevice(device, function() return file:readObject() end)
+ else
+ object = file:readObject()
+ end
+
+ for k, v in pairs(object) do
+ self[k] = v
+ end
+end
+
+function GPU:__tostring__()
+ if self.modules[1].__tostring__ then
+ return torch.type(self) .. '(' .. self.device ..') @ ' .. self.modules[1]:__tostring__()
+ else
+ return torch.type(self) .. '(' .. self.device ..') @ ' .. torch.type(self.modules[1])
+ end
+end
+
+function GPU:accUpdateGradParameters(input, gradOutput, lr)
+ error("Not Implemented for "..torch.type(self))
+end
+
+function GPU:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ error("Not Implemented for "..torch.type(self))
+end
diff --git a/HardTanh.lua b/HardTanh.lua
index d3449a1..07cfc62 100644
--- a/HardTanh.lua
+++ b/HardTanh.lua
@@ -1,9 +1,13 @@
local HardTanh, parent = torch.class('nn.HardTanh', 'nn.Module')
-function HardTanh:__init(min_value, max_value)
+function HardTanh:__init(min_value, max_value, inplace)
parent.__init(self)
self.min_val = min_value or -1
self.max_val = max_value or 1
+ self.inplace = inplace or false
+ if (inplace and type(inplace) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
assert(self.max_val>self.min_val, 'max_value must be larger than min_value')
end
@@ -14,7 +18,8 @@ function HardTanh:updateOutput(input)
input:cdata(),
self.output:cdata(),
self.min_val,
- self.max_val
+ self.max_val,
+ self.inplace or false
)
return self.output
end
@@ -25,7 +30,8 @@ function HardTanh:updateGradInput(input, gradOutput)
gradOutput:cdata(),
self.gradInput:cdata(),
self.min_val,
- self.max_val
+ self.max_val,
+ self.inplace or false
)
return self.gradInput
end
diff --git a/Index.lua b/Index.lua
index 8ae6063..6aa4297 100644
--- a/Index.lua
+++ b/Index.lua
@@ -3,7 +3,7 @@ local Index, parent = torch.class('nn.Index', 'nn.Module')
function Index:__init(dimension)
parent.__init(self)
self.dimension = dimension
- self.gradInput = {self.gradInput}
+ self.gradInput = {self.gradInput, self.gradInput.new()}
end
function Index:updateOutput(input)
@@ -17,9 +17,16 @@ function Index:updateGradInput(input, gradOutput)
local t = input[1]
local index = input[2]
+ self.gradInput[2]:resize(index:size()):zero()
local gradInput = self.gradInput[1] -- no gradient for the index variable
gradInput:resizeAs(t):zero()
gradInput:indexAdd(self.dimension, index, gradOutput)
return self.gradInput
end
+function Index:clearState()
+ self.gradInput[1]:set()
+ self.gradInput[2]:set()
+ self.output:set()
+ return self
+end
diff --git a/LookupTable.lua b/LookupTable.lua
index 8ec2b34..8a60354 100644
--- a/LookupTable.lua
+++ b/LookupTable.lua
@@ -81,7 +81,7 @@ function LookupTable:updateOutput(input)
end
function LookupTable:updateGradInput(input, gradOutput)
- -- the input can be of any type (as in the forward it's
+ -- the input can be of any type (as in the forward it's
-- converted anyway to LongTensor) thus, need to allocate
-- new memory each time the user changes the input type
if torch.type(self.gradInput) ~= torch.type(input) then
@@ -148,10 +148,10 @@ function LookupTable:type(type, tensorCache)
if type == 'torch.CudaTensor' then
-- CUDA uses _sorted and _indices temporary tensors
- self._sorted = self.weight.new()
- self._indices = self.weight.new()
- self._count = self.weight.new()
- self._input = self.weight.new()
+ self._sorted = torch.CudaLongTensor.new()
+ self._indices = torch.CudaLongTensor.new()
+ self._count = torch.CudaLongTensor.new()
+ self._input = torch.CudaLongTensor.new()
else
-- self._count and self._input should only be converted if using Cuda
self._count = torch.IntTensor()
diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua
index 2c1f4c2..844d905 100644
--- a/MarginRankingCriterion.lua
+++ b/MarginRankingCriterion.lua
@@ -3,14 +3,14 @@ local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion',
function MarginRankingCriterion:__init(margin)
parent.__init(self)
margin=margin or 1
- self.margin = margin
+ self.margin = margin
self.gradInput = {torch.Tensor(1), torch.Tensor(1)}
self.sizeAverage = true
-end
-
-function MarginRankingCriterion:updateOutput(input,y)
- if input[1]:size(1) == 1 then
- self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin )
+end
+
+function MarginRankingCriterion:updateOutput(input, y)
+ if torch.type(y) == 'number' then -- non-batch mode
+ self.output = math.max(0, -y * (input[1][1] - input[2][1]) + self.margin)
else
self._output = self._output or input[1]:clone()
self._output:resizeAs(input[1])
@@ -33,14 +33,14 @@ function MarginRankingCriterion:updateOutput(input,y)
end
function MarginRankingCriterion:updateGradInput(input, y)
- if input[1]:size(1) == 1 then
- local dist = -y*(input[1][1]-input[2][1]) + self.margin
+ if torch.type(y) == 'number' then -- non-batch mode
+ local dist = -y * (input[1][1] - input[2][1]) + self.margin
if dist < 0 then
- self.gradInput[1][1]=0;
- self.gradInput[2][1]=0;
- else
- self.gradInput[1][1]=-y
- self.gradInput[2][1]=y
+ self.gradInput[1][1] = 0;
+ self.gradInput[2][1] = 0;
+ else
+ self.gradInput[1][1] = -y
+ self.gradInput[2][1] = y
end
else
self.dist = self.dist or input[1].new()
@@ -71,5 +71,5 @@ function MarginRankingCriterion:updateGradInput(input, y)
end
end
- return self.gradInput
+ return self.gradInput
end
diff --git a/Narrow.lua b/Narrow.lua
index 07322d8..0754d45 100644
--- a/Narrow.lua
+++ b/Narrow.lua
@@ -11,23 +11,25 @@ function Narrow:__init(dimension,offset,length)
end
function Narrow:updateOutput(input)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
local length = self.length
if length < 0 then
- length = input:size(self.dimension) - self.index + self.length + 2
+ length = input:size(dim) - self.index + self.length + 2
end
- local output=input:narrow(self.dimension,self.index,length)
+ local output=input:narrow(dim,self.index,length)
self.output = self.output:typeAs(output)
self.output:resizeAs(output):copy(output)
return self.output
end
function Narrow:updateGradInput(input, gradOutput)
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
local length = self.length
if length < 0 then
- length = input:size(self.dimension) - self.index + self.length + 2
+ length = input:size(dim) - self.index + self.length + 2
end
self.gradInput = self.gradInput:typeAs(input)
self.gradInput:resizeAs(input):zero()
- self.gradInput:narrow(self.dimension,self.index,length):copy(gradOutput)
+ self.gradInput:narrow(dim,self.index,length):copy(gradOutput)
return self.gradInput
end
diff --git a/ReLU6.lua b/ReLU6.lua
new file mode 100644
index 0000000..be8985b
--- /dev/null
+++ b/ReLU6.lua
@@ -0,0 +1,32 @@
+local ReLU6, parent = torch.class('nn.ReLU6', 'nn.Module')
+
+function ReLU6:__init(inplace)
+ parent.__init(self)
+
+ if inplace == nil then
+ self.inplace = false
+ else
+ self.inplace = inplace
+ end
+
+ if (inplace and type(inplace) ~= 'boolean') then
+ error('in-place flag must be boolean')
+ end
+end
+
+function ReLU6:updateOutput(input)
+ input.THNN.HardTanh_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ 0, 6, self.inplace)
+ return self.output
+end
+
+function ReLU6:updateGradInput(input, gradOutput)
+ input.THNN.HardTanh_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ 0, 6, self.inplace)
+ return self.gradInput
+end
diff --git a/Select.lua b/Select.lua
index fccdf32..6dc5a04 100644
--- a/Select.lua
+++ b/Select.lua
@@ -7,16 +7,18 @@ function Select:__init(dimension,index)
end
function Select:updateOutput(input)
- local index = self.index < 0 and input:size(self.dimension) + self.index + 1 or self.index
- local output = input:select(self.dimension, index);
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index
+ local output = input:select(dim, index);
self.output:resizeAs(output)
return self.output:copy(output)
end
function Select:updateGradInput(input, gradOutput)
- local index = self.index < 0 and input:size(self.dimension) + self.index + 1 or self.index
+ local dim = self.dimension < 0 and input:dim() + self.dimension + 1 or self.dimension
+ local index = self.index < 0 and input:size(dim) + self.index + 1 or self.index
self.gradInput:resizeAs(input)
self.gradInput:zero()
- self.gradInput:select(self.dimension,index):copy(gradOutput)
+ self.gradInput:select(dim,index):copy(gradOutput)
return self.gradInput
end
diff --git a/SpatialDilatedConvolution.lua b/SpatialDilatedConvolution.lua
index 8611ee9..0ae914e 100644
--- a/SpatialDilatedConvolution.lua
+++ b/SpatialDilatedConvolution.lua
@@ -1,11 +1,11 @@
local THNN = require 'nn.THNN'
local SpatialDilatedConvolution, parent = torch.class('nn.SpatialDilatedConvolution', 'nn.SpatialConvolution')
-function SpatialDilatedConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH, dilationH, dilationW)
+function SpatialDilatedConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH, dilationW, dilationH)
parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
- self.dilationH = dilationH or 1
self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
end
local function makeContiguous(self, input, gradOutput)
@@ -38,7 +38,7 @@ function SpatialDilatedConvolution:updateOutput(input)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
- self.dilationH, self.dilationW
+ self.dilationW, self.dilationH
)
return self.output
end
@@ -56,7 +56,7 @@ function SpatialDilatedConvolution:updateGradInput(input, gradOutput)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
- self.dilationH, self.dilationW
+ self.dilationW, self.dilationH
)
return self.gradInput
end
@@ -76,7 +76,7 @@ function SpatialDilatedConvolution:accGradParameters(input, gradOutput, scale)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
- self.dilationH, self.dilationW,
+ self.dilationW, self.dilationH,
scale
)
end
diff --git a/SpatialMaxPooling.lua b/SpatialDilatedMaxPooling.lua
similarity index 53%
copy from SpatialMaxPooling.lua
copy to SpatialDilatedMaxPooling.lua
index 8475b13..929459c 100644
--- a/SpatialMaxPooling.lua
+++ b/SpatialDilatedMaxPooling.lua
@@ -1,44 +1,20 @@
-local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module')
+local THNN = require 'nn.THNN'
+local SpatialDilatedMaxPooling, parent = torch.class('nn.SpatialDilatedMaxPooling', 'nn.SpatialMaxPooling')
-function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH)
- parent.__init(self)
+function SpatialDilatedMaxPooling:__init(kW, kH, dW, dH, padW, padH, dilationW, dilationH)
+ parent.__init(self, kW, kH, dW, dH, padW, padH)
- dW = dW or kW
- dH = dH or kH
-
- self.kW = kW
- self.kH = kH
- self.dW = dW
- self.dH = dH
-
- self.padW = padW or 0
- self.padH = padH or 0
-
- self.ceil_mode = false
- self.indices = torch.Tensor()
-end
-
-function SpatialMaxPooling:ceil()
- self.ceil_mode = true
- return self
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
end
-function SpatialMaxPooling:floor()
- self.ceil_mode = false
- return self
-end
-
-function SpatialMaxPooling:updateOutput(input)
+function SpatialDilatedMaxPooling:updateOutput(input)
self.indices = self.indices or input.new()
local dims = input:dim()
self.iheight = input:size(dims-1)
self.iwidth = input:size(dims)
- -- backward compatibility
- self.ceil_mode = self.ceil_mode or false
- self.padW = self.padW or 0
- self.padH = self.padH or 0
input.THNN.SpatialMaxPooling_updateOutput(
input:cdata(),
self.output:cdata(),
@@ -46,12 +22,13 @@ function SpatialMaxPooling:updateOutput(input)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
+ self.dilationW, self.dilationH,
self.ceil_mode
)
return self.output
end
-function SpatialMaxPooling:updateGradInput(input, gradOutput)
+function SpatialDilatedMaxPooling:updateGradInput(input, gradOutput)
input.THNN.SpatialMaxPooling_updateGradInput(
input:cdata(),
gradOutput:cdata(),
@@ -60,28 +37,24 @@ function SpatialMaxPooling:updateGradInput(input, gradOutput)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
+ self.dilationW, self.dilationH,
self.ceil_mode
)
return self.gradInput
end
--- for backward compat
-function SpatialMaxPooling:empty()
- self:clearState()
-end
-
-function SpatialMaxPooling:__tostring__()
+function SpatialDilatedMaxPooling:__tostring__()
local s = string.format('%s(%dx%d, %d,%d', torch.type(self),
self.kW, self.kH, self.dW, self.dH)
if (self.padW or self.padH) and (self.padW ~= 0 or self.padH ~= 0) then
s = s .. ', ' .. self.padW .. ','.. self.padH
end
+ s = s .. ', ' .. self.dilationW .. ',' .. self.dilationH
s = s .. ')'
-
return s
end
-function SpatialMaxPooling:clearState()
+function SpatialDilatedMaxPooling:clearState()
if self.indices then
self.indices:set()
end
diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua
index 40fcd3d..a234769 100644
--- a/SpatialFullConvolution.lua
+++ b/SpatialFullConvolution.lua
@@ -1,3 +1,4 @@
+local THNN = require 'nn.THNN'
local SpatialFullConvolution, parent = torch.class('nn.SpatialFullConvolution','nn.Module')
function SpatialFullConvolution:__init(nInputPlane, nOutputPlane,
@@ -33,6 +34,12 @@ function SpatialFullConvolution:__init(nInputPlane, nOutputPlane,
self:reset()
end
+function SpatialFullConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
function SpatialFullConvolution:reset(stdv)
if stdv then
stdv = stdv * math.sqrt(3)
@@ -43,7 +50,9 @@ function SpatialFullConvolution:reset(stdv)
stdv = 1/math.sqrt(kW*kH*nInputPlane)
end
self.weight:uniform(-stdv, stdv)
- self.bias:uniform(-stdv, stdv)
+ if self.bias then
+ self.bias:uniform(-stdv, stdv)
+ end
end
local function makeContiguous(self, input, gradOutput)
@@ -99,7 +108,7 @@ function SpatialFullConvolution:updateOutput(input)
inputTensor:cdata(),
self.output:cdata(),
self.weight:cdata(),
- self.bias:cdata(),
+ THNN.optionalTensor(self.bias),
self.finput:cdata(),
self.fgradInput:cdata(),
self.kW, self.kH,
@@ -131,7 +140,7 @@ function SpatialFullConvolution:updateGradInput(input, gradOutput)
adjH = calculateAdj(tH, self.kH, self.padH, self.dH)
-- Momentarily extract the gradInput tensor
if type(self.gradInput) == 'table' then
- self.gradInput = self.gradInput[1]
+ self.gradInput = self.gradInput[1] or inputTensor.new()
end
end
@@ -186,7 +195,7 @@ function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
inputTensor:cdata(),
gradOutput:cdata(),
self.gradWeight:cdata(),
- self.gradBias:cdata(),
+ THNN.optionalTensor(self.gradBias),
self.finput:cdata(),
self.fgradInput:cdata(),
self.kW, self.kH,
@@ -215,7 +224,11 @@ function SpatialFullConvolution:__tostring__()
if (self.adjW or self.adjH) and (self.adjW ~= 0 or self.adjH ~= 0) then
s = s .. ', ' .. self.adjW .. ',' .. self.adjH
end
- return s .. ')'
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
end
function SpatialFullConvolution:clearState()
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
index 8475b13..c05a876 100644
--- a/SpatialMaxPooling.lua
+++ b/SpatialMaxPooling.lua
@@ -46,6 +46,7 @@ function SpatialMaxPooling:updateOutput(input)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
+ 1, 1,
self.ceil_mode
)
return self.output
@@ -60,6 +61,7 @@ function SpatialMaxPooling:updateGradInput(input, gradOutput)
self.kW, self.kH,
self.dW, self.dH,
self.padW, self.padH,
+ 1, 1,
self.ceil_mode
)
return self.gradInput
diff --git a/SpatialUpSamplingBilinear.lua b/SpatialUpSamplingBilinear.lua
new file mode 100644
index 0000000..d911eae
--- /dev/null
+++ b/SpatialUpSamplingBilinear.lua
@@ -0,0 +1,111 @@
+require 'nn.THNN'
+local SpatialUpSamplingBilinear, parent =
+ torch.class('nn.SpatialUpSamplingBilinear', 'nn.Module')
+
+--[[
+Applies a 2D bilinear up-sampling over an input image composed of several
+input planes.
+
+The Y and X dimensions are assumed to be the last 2 tensor dimensions. For
+instance, if the tensor is 4D, then dim 3 is the y dimension and dim 4 is the x.
+scale_factor is assumed to be a positive integer.
+
+owidth = (width-1)*(scale_factor-1) + width
+oheight = (height-1)*(scale_factor-1) + height
+--]]
+
+function SpatialUpSamplingBilinear:__init(scale_factor)
+ parent.__init(self)
+
+ self.scale_factor = scale_factor
+ if self.scale_factor < 1 then
+ error('scale_factor must be greater than 1')
+ end
+ if math.floor(self.scale_factor) ~= self.scale_factor then
+ error('scale_factor must be integer')
+ end
+ self.inputSize = torch.LongStorage(4)
+ self.outputSize = torch.LongStorage(4)
+end
+
+local function makeContiguous(self, input, gradOutput)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resizeAs(input):copy(input)
+ input = self._input
+ end
+ if gradOutput then
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+ end
+ return input, gradOutput
+end
+
+function SpatialUpSamplingBilinear:updateOutput(input)
+ assert(input:dim() == 4 or input:dim()==3,
+ 'SpatialUpSamplingBilinear only support 3D or 4D tensors' )
+ local inputwas3D = false
+ if input:dim() == 3 then
+ input=input:view(-1, input:size(1), input:size(2), input:size(3))
+ inputwas3D = true
+ end
+ input = makeContiguous(self, input)
+ assert(input:dim() == 4)
+ -- Copy the input size
+ local xdim = input:dim()
+ local ydim = input:dim() - 1
+ for i = 1, input:dim() do
+ self.inputSize[i] = input:size(i)
+ self.outputSize[i] = input:size(i)
+ end
+ self.outputSize[ydim] = (self.outputSize[ydim]-1) * (self.scale_factor-1)
+ + self.outputSize[ydim]
+ self.outputSize[xdim] = (self.outputSize[xdim]-1) * (self.scale_factor -1)
+ + self.outputSize[xdim]
+ -- Resize the output if needed
+ self.output:resize(self.outputSize)
+ input.THNN.SpatialUpSamplingBilinear_updateOutput(
+ input:cdata(),
+ self.output:cdata()
+ )
+ if inputwas3D then
+ input = input:squeeze(1)
+ self.output = self.output:squeeze(1)
+ end
+ return self.output
+end
+
+function SpatialUpSamplingBilinear:updateGradInput(input, gradOutput)
+ assert(input:dim() == 4 or input:dim()==3,
+ 'SpatialUpSamplingBilinear only support 3D or 4D tensors' )
+ assert(input:dim() == gradOutput:dim(),
+ 'Input and gradOutput should be of same dimension' )
+ local inputwas3D = false
+ if input:dim() == 3 then
+ input=input:view(-1, input:size(1), input:size(2), input:size(3))
+ gradOutput=gradOutput:view(-1, gradOutput:size(1), gradOutput:size(2),
+ gradOutput:size(3))
+ inputwas3D = true
+ end
+ assert(input:dim() == 4 and gradOutput:dim() == 4)
+ self.gradInput:resizeAs(input)
+ input.THNN.SpatialUpSamplingBilinear_updateGradInput(
+ gradOutput:cdata(),
+ self.gradInput:cdata()
+ )
+ if inputwas3D then
+ input = input:squeeze(1)
+ gradOutput = gradOutput:squeeze(1)
+ self.gradInput = self.gradInput:squeeze(1)
+ end
+ return self.gradInput
+end
+
+
+function SpatialUpSamplingBilinear:__tostring__()
+ local s = string.format('%s(%d)', torch.type(self), self.scale_factor)
+ return s
+end
diff --git a/VolumetricDilatedConvolution.lua b/VolumetricDilatedConvolution.lua
new file mode 100644
index 0000000..fc7f037
--- /dev/null
+++ b/VolumetricDilatedConvolution.lua
@@ -0,0 +1,103 @@
+local THNN = require 'nn.THNN'
+local VolumetricDilatedConvolution, parent = torch.class('nn.VolumetricDilatedConvolution', 'nn.VolumetricConvolution')
+
+function VolumetricDilatedConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH, dilationT, dilationW, dilationH)
+ parent.__init(self, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+ self.dilationT = dilationT or 1
+ self.dilationW = dilationW or 1
+ self.dilationH = dilationH or 1
+end
+
+local function makeContiguous(self, input, gradOutput)
+ if not input:isContiguous() then
+ self._input = self._input or input.new()
+ self._input:resizeAs(input):copy(input)
+ input = self._input
+ end
+ if gradOutput then
+ if not gradOutput:isContiguous() then
+ self._gradOutput = self._gradOutput or gradOutput.new()
+ self._gradOutput:resizeAs(gradOutput):copy(gradOutput)
+ gradOutput = self._gradOutput
+ end
+ end
+ return input, gradOutput
+end
+
+function VolumetricDilatedConvolution:updateOutput(input)
+ self.finput = self.finput or self.weight.new()
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input = makeContiguous(self, input)
+ input.THNN.VolumetricDilatedConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH
+ )
+ return self.output
+end
+
+function VolumetricDilatedConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ input, gradOutput = makeContiguous(self, input, gradOutput)
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.VolumetricDilatedConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH
+ )
+ return self.gradInput
+ end
+end
+
+function VolumetricDilatedConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ input, gradOutput = makeContiguous(self, input, gradOutput)
+ self.fgradInput = self.fgradInput or self.weight.new()
+ input.THNN.VolumetricDilatedConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kT, self.kW, self.kH,
+ self.dT, self.dW, self.dH,
+ self.padT, self.padW, self.padH,
+ self.dilationT, self.dilationW, self.dilationH,
+ scale
+ )
+end
+
+function VolumetricDilatedConvolution:__tostring__()
+ local s = string.format('%s(%d -> %d, %dx%dx%d', torch.type(self),
+ self.nInputPlane, self.nOutputPlane, self.kT, self.kW, self.kH)
+ if self.dT ~= 1 or self.dW ~= 1 or self.dH ~= 1
+ or self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0 then
+ s = s .. string.format(', %d,%d,%d', self.dT, self.dW, self.dH)
+ end
+ if (self.padT or self.padW or self.padH)
+ and (self.padT ~= 0 or self.padW ~= 0 or self.padH ~= 0) then
+ s = s .. ', ' .. self.padT .. ',' .. self.padW .. ',' .. self.padH
+ end
+ s = s .. ', ' .. self.dilationT .. ','
+ .. self.dilationW .. ',' .. self.dilationH
+ if self.bias then
+ return s .. ')'
+ else
+ return s .. ') without bias'
+ end
+end
diff --git a/VolumetricReplicationPadding.lua b/VolumetricReplicationPadding.lua
new file mode 100644
index 0000000..31a9503
--- /dev/null
+++ b/VolumetricReplicationPadding.lua
@@ -0,0 +1,58 @@
+local VolumetricReplicationPadding, parent =
+ torch.class('nn.VolumetricReplicationPadding', 'nn.Module')
+
+function VolumetricReplicationPadding:__init(pleft, pright, ptop, pbottom,
+ pfront, pback)
+ parent.__init(self)
+ self.pleft = pleft
+ self.pright = pright or self.pleft
+ self.ptop = ptop or self.pleft
+ self.pbottom = pbottom or self.pleft
+ self.pfront = pfront or self.pleft
+ self.pback = pback or self.pleft
+end
+
+function VolumetricReplicationPadding:updateOutput(input)
+ if input:dim() == 4 or input:dim() == 5 then
+ input.THNN.VolumetricReplicationPadding_updateOutput(
+ input:cdata(), self.output:cdata(),
+ self.pleft, self.pright, self.ptop, self.pbottom, self.pfront,
+ self.pback)
+ else
+ error('input must be 4 or 5-dimensional')
+ end
+ return self.output
+end
+
+function VolumetricReplicationPadding:updateGradInput(input, gradOutput)
+ if input:dim() == 4 and gradOutput:dim() == 4 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) + self.pfront + self.pback == gradOutput:size(2)
+ and input:size(3) + self.ptop + self.pbottom == gradOutput:size(3)
+ and input:size(4) + self.pleft + self.pright == gradOutput:size(4),
+ 'input and gradOutput must be compatible in size')
+ elseif input:dim() == 5 and gradOutput:dim() == 5 then
+ assert(input:size(1) == gradOutput:size(1)
+ and input:size(2) == gradOutput:size(2)
+ and input:size(3) + self.pfront + self.pback == gradOutput:size(3)
+ and input:size(4) + self.ptop + self.pbottom == gradOutput:size(4)
+ and input:size(5) + self.pleft + self.pright == gradOutput:size(5),
+ 'input and gradOutput must be compatible in size')
+ else
+ error(
+ [[input and gradOutput must be 4 or 5-dimensional
+ and have equal number of dimensions]]
+ )
+ end
+ input.THNN.VolumetricReplicationPadding_updateGradInput(
+ input:cdata(), gradOutput:cdata(), self.gradInput:cdata(),
+ self.pleft, self.pright, self.ptop, self.pbottom, self.pfront, self.pback)
+ return self.gradInput
+end
+
+function VolumetricReplicationPadding:__tostring__()
+ return torch.type(self) ..
+ string.format('(left=%d, right=%d, top=%d, bottom=%d, front=%d, back=%d)',
+ self.pleft, self.pright, self.ptop, self.pbottom,
+ self.pfront, self.pback)
+end
diff --git a/doc/containers.md b/doc/containers.md
index 9a83607..44060e8 100644
--- a/doc/containers.md
+++ b/doc/containers.md
@@ -7,6 +7,7 @@ Complex neural networks are easily built using container classes:
* [Parallel](#nn.Parallel) : applies its `ith` child module to the `ith` slice of the input Tensor ;
* [Concat](#nn.Concat) : concatenates in one layer several modules along dimension `dim` ;
* [DepthConcat](#nn.DepthConcat) : like Concat, but adds zero-padding when non-`dim` sizes don't match;
+ * [Bottle](#nn.Bottle) : allows any dimensionality input be forwarded through a module ;
See also the [Table Containers](#nn.TableContainers) for manipulating tables of [Tensors](https://github.com/torch/torch7/blob/master/doc/tensor.md).
@@ -274,6 +275,37 @@ module output tensors non-`dim` sizes aren't all odd or even.
Such that in order to keep the mappings aligned, one need
only ensure that these be all odd (or even).
+<a name="nn.Bottle"></a>
+## Bottle
+
+
+```lua
+module = nn.Bottle(module, [nInputDim], [nOutputDim])
+```
+Bottle allows varying dimensionality input to be forwarded through any module that accepts input of `nInputDim` dimensions, and generates output of `nOutputDim` dimensions.
+
+Bottle can be used to forward a 4D input of varying sizes through a 2D module `b x n`. The module `Bottle(module, 2)` will accept input of shape `p x q x r x n` and outputs with the shape `p x q x r x m`. Internally Bottle will view the input of `module` as `p*q*r x n`, and view the output as `p x q x r x m`. The numbers `p x q x r` are inferred from the input and can change for every forward/backward pass.
+
+```lua
+input=torch.Tensor(4, 5, 3, 10)
+mlp=nn.Bottle(nn.Linear(10, 2))
+print(input:size())
+print(mlp:forward(input):size())
+```
+which gives the output:
+```lua
+ 4
+ 5
+ 3
+ 10
+[torch.LongStorage of size 4]
+ 4
+ 5
+ 3
+ 2
+[torch.LongStorage of size 4]
+```
+
<a name="nn.TableContainers"></a>
## Table Containers ##
While the above containers are used for manipulating input [Tensors](https://github.com/torch/torch7/blob/master/doc/tensor.md), table containers are used for manipulating tables :
diff --git a/doc/convolution.md b/doc/convolution.md
index 4e2bb6f..96d92d9 100644
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -16,26 +16,30 @@ A convolution is an integral that expresses the amount of overlap of one functio
* [SpatialConvolutionLocal](#nn.SpatialConvolutionLocal) : a 2D locally-connected layer over an input image ;
* [SpatialSubSampling](#nn.SpatialSubSampling) : a 2D sub-sampling over an input image ;
* [SpatialMaxPooling](#nn.SpatialMaxPooling) : a 2D max-pooling operation over an input image ;
+ * [SpatialDilatedMaxPooling](#nn.SpatialDilatedMaxPooling) : a 2D dilated max-pooling operation over an input image ;
* [SpatialFractionalMaxPooling](#nn.SpatialFractionalMaxPooling) : a 2D fractional max-pooling operation over an input image ;
* [SpatialAveragePooling](#nn.SpatialAveragePooling) : a 2D average-pooling operation over an input image ;
* [SpatialAdaptiveMaxPooling](#nn.SpatialAdaptiveMaxPooling) : a 2D max-pooling operation which adapts its parameters dynamically such that the output is of fixed size ;
* [SpatialMaxUnpooling](#nn.SpatialMaxUnpooling) : a 2D max-unpooling operation ;
* [SpatialLPPooling](#nn.SpatialLPPooling) : computes the `p` norm in a convolutional manner on a set of input images ;
* [SpatialConvolutionMap](#nn.SpatialConvolutionMap) : a 2D convolution that uses a generic connection table ;
- * [SpatialZeroPadding](#nn.SpatialZeroPadding) : padds a feature map with specified number of zeros ;
- * [SpatialReflectionPadding](#nn.SpatialReflectionPadding) : padds a feature map with the reflection of the input ;
- * [SpatialReplicationPadding](#nn.SpatialReplicationPadding) : padds a feature map with the value at the edge of the input borders ;
+ * [SpatialZeroPadding](#nn.SpatialZeroPadding) : pads a feature map with specified number of zeros ;
+ * [SpatialReflectionPadding](#nn.SpatialReflectionPadding) : pads a feature map with the reflection of the input ;
+ * [SpatialReplicationPadding](#nn.SpatialReplicationPadding) : pads a feature map with the value at the edge of the input borders ;
* [SpatialSubtractiveNormalization](#nn.SpatialSubtractiveNormalization) : a spatial subtraction operation on a series of 2D inputs using
* [SpatialCrossMapLRN](#nn.SpatialCrossMapLRN) : a spatial local response normalization between feature maps ;
* [SpatialBatchNormalization](#nn.SpatialBatchNormalization): mean/std normalization over the mini-batch inputs and pixels, with an optional affine transform that follows
a kernel for computing the weighted average in a neighborhood ;
- * [SpatialUpsamplingNearest](#nn.SpatialUpSamplingNearest): A simple upsampler applied to every channel of the feature map.
+ * [SpatialUpsamplingNearest](#nn.SpatialUpSamplingNearest): A simple nearest neighbor upsampler applied to every channel of the feature map.
+ * [SpatialUpsamplingBilinear](#nn.SpatialUpSamplingNearest): A simple bilinear upsampler applied to every channel of the feature map.
* [Volumetric Modules](#nn.VolumetricModules) apply to inputs with three-dimensional relationships (e.g. videos) :
* [VolumetricConvolution](#nn.VolumetricConvolution) : a 3D convolution over an input video (a sequence of images) ;
* [VolumetricFullConvolution](#nn.VolumetricFullConvolution) : a 3D full convolution over an input video (a sequence of images) ;
+ * [VolumetricDilatedConvolution](#nn.VolumetricDilatedConvolution) : a 3D dilated convolution over an input image ;
* [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video.
* [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video.
- * [VolumetricMaxUnpooling](#nn.VolumetricMaxUnpooling) : a 3D max-unpooling operation ;
+ * [VolumetricMaxUnpooling](#nn.VolumetricMaxUnpooling) : a 3D max-unpooling operation.
+ * [VolumetricReplicationPadding](#nn.VolumetricReplicationPadding) : Pads a volumetric feature map with the value at the edge of the input borders. ;
<a name="nn.TemporalModules"></a>
@@ -210,10 +214,10 @@ is the size of a 1D `input` tensor.
Again with a 1D input, when only `size1` is provided, the `forward(input)` is equivalent to
performing the following matrix-matrix multiplication in an efficient manner:
```lua
-M P
+P M
```
-where `M` is a 2D matrix `size x nIndex` containing the parameters of the lookup-table and
-`P` is a 2D matrix, where each column vector `i` is a zero vector except at index `input[i]` where it is `1`.
+where `M` is a 2D matrix of size `nIndex x size1` containing the parameters of the lookup-table and
+`P` is a 2D matrix of size `n x nIndex`, where for each `i`th row vector, every element is zero except the one at index `input[i]` where it is `1`.
1D example:
```lua
@@ -422,7 +426,7 @@ module = nn.SpatialFullConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH]
Applies a 2D full convolution over an input image composed of several input planes. The `input` tensor in
`forward(input)` is expected to be a 3D or 4D tensor. Note that instead of setting `adjW` and `adjH`, SpatialFullConvolution also accepts a table input with two tensors: `{convInput, sizeTensor}` where `convInput` is the standard input on which the full convolution
is applied, and the size of `sizeTensor` is used to set the size of the output. Using the two-input version of forward
-will ignore the `adjW` and `adjH` values used to construct the module.
+will ignore the `adjW` and `adjH` values used to construct the module. The layer can be used without a bias by module:noBias().
Other frameworks call this operation "In-network Upsampling", "Fractionally-strided convolution", "Backwards Convolution," "Deconvolution", or "Upconvolution."
@@ -454,6 +458,7 @@ Further information about the full convolution can be found in the following pap
module = nn.SpatialDilatedConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH], [padW], [padH], [dilationW], [dilationH])
```
+Also sometimes referred to as **atrous convolution**.
Applies a 2D dilated convolution over an input image composed of several input planes. The `input` tensor in
`forward(input)` is expected to be a 3D or 4D tensor.
@@ -472,8 +477,8 @@ The parameters are the following:
If the input image is a 3D tensor `nInputPlane x height x width`, the output image size
will be `nOutputPlane x oheight x owidth` where
```lua
-owidth = width + 2 * padW - dilationW * (kW-1) + 1 / dW + 1
-oheight = height + 2 * padH - dilationH * (kH-1) + 1 / dH + 1
+owidth = floor(width + 2 * padW - dilationW * (kW-1) + 1) / dW + 1
+oheight = floor(height + 2 * padH - dilationH * (kH-1) + 1) / dH + 1
```
Further information about the dilated convolution can be found in the following paper: [Multi-Scale Context Aggregation by Dilated Convolutions](http://arxiv.org/abs/1511.07122).
@@ -540,6 +545,29 @@ oheight = op((height + 2*padH - kH) / dH + 1)
`op` is a rounding operator. By default, it is `floor`. It can be changed
by calling `:ceil()` or `:floor()` methods.
+<a name="nn.SpatialDilatedMaxPooling"></a>
+### SpatialDilatedMaxPooling ###
+
+```lua
+module = nn.SpatialDilatedMaxPooling(kW, kH [, dW, dH, padW, padH, dilationW, dilationH])
+```
+
+Also sometimes referred to as **atrous pooling**.
+Applies 2D dilated max-pooling operation in `kWxkH` regions by step size
+`dWxdH` steps. The number of output features is equal to the number of
+input planes. If `dilationW` and `dilationH` are not provided, this is equivalent to performing normal `nn.SpatialMaxPooling`.
+
+If the input image is a 3D tensor `nInputPlane x height x width`, the output
+image size will be `nOutputPlane x oheight x owidth` where
+
+```lua
+owidth = op((width - (dilationW * (kW - 1) + 1) + 2*padW) / dW + 1)
+oheight = op((height - (dilationH * (kH - 1) + 1) + 2*padH) / dH + 1)
+```
+
+`op` is a rounding operator. By default, it is `floor`. It can be changed
+by calling `:ceil()` or `:floor()` methods.
+
<a name="nn.SpatialFractionalMaxPooling"></a>
### SpatialFractionalMaxPooling ###
@@ -716,6 +744,27 @@ output(u,v) = input(floor((u-1)/scale)+1, floor((v-1)/scale)+1)
Where `u` and `v` are index from 1 (as per lua convention). There are no learnable parameters.
+<a name="nn.SpatialUpSamplingBilinear"></a>
+### SpatialUpSamplingBilinear ###
+
+```lua
+module = nn.SpatialUpSamplingBilinear(scale)
+```
+
+Applies a 2D up-sampling over an input image composed of several input planes. The `input` tensor in
+`forward(input)` is expected to be a 3D or 4D tensor (i.e. for 4D: `nBatchPlane x nInputPlane x height x width`). The number of output planes will be the same. The v dimension is assumed to be the second last dimension (i.e. for 4D it will be the 3rd dim), and the u dimension is assumed to be the last dimension.
+
+The parameters are the following:
+ * `scale`: The upscale ratio. Must be a positive integer
+
+The up-scaling method is bilinear, and given an input of height iH and width iW, output height and width will be:
+```lua
+oH = (iH - 1)(scale - 1) + iH
+oW = (iW - 1)(scale - 1) + iW
+```
+
+There are no learnable parameters.
+
<a name="nn.SpatialZeroPadding"></a>
### SpatialZeroPadding ###
@@ -880,7 +929,7 @@ columns or rows of the input image might be lost. It is up to the user to
add proper padding in images.
If the input image is a 4D tensor `nInputPlane x time x height x width`, the output image size
-will be `nOutputPlane x otime x owidth x oheight` where
+will be `nOutputPlane x otime x oheight x owidth` where
```lua
otime = floor((time + 2*padT - kT) / dT + 1)
owidth = floor((width + 2*padW - kW) / dW + 1)
@@ -903,6 +952,8 @@ Applies a 3D full convolution over an input image composed of several input plan
`forward(input)` is expected to be a 4D or 5D tensor. Note that instead of setting `adjT`, `adjW` and `adjH`, VolumetricFullConvolution also accepts a table input with two tensors: `{convInput, sizeTensor}` where `convInput` is the standard input on which the full convolution is applied, and the size of `sizeTensor` is used to set the size of the output. Using the two-input version of forward
will ignore the `adjT`, `adjW` and `adjH` values used to construct the module.
+This can be used as 3D deconvolution, or 3D upsampling. So that the 3D FCN can be easly implemented.
+
The parameters are the following:
* `nInputPlane`: The number of expected input planes in the image given into `forward()`.
* `nOutputPlane`: The number of output planes the convolution layer will produce.
@@ -924,6 +975,42 @@ owidth = (width - 1) * dW - 2*padW + kW
oheight = (height - 1) * dH - 2*padH + kH
```
+<a name="nn.VolumetricDilatedConvolution"></a>
+### VolumetricDilatedConvolution ###
+
+```lua
+module = nn.VolumetricDilatedConvolution(nInputPlane, nOutputPlane, kT, kW, kH, [dT], [dW], [dH], [padT], [padW], [padH], [dilationT], [dilationW], [dilationH])
+```
+
+Applies a 3D dilated convolution over an input image composed of several input planes. The `input` tensor in
+`forward(input)` is expected to be a 4D or 5D tensor.
+
+The parameters are the following:
+ * `nInputPlane`: The number of expected input planes in the image given into `forward()`.
+ * `nOutputPlane`: The number of output planes the convolution layer will produce.
+ * `kT`: The kernel depth of the convolution
+ * `kW`: The kernel width of the convolution
+ * `kH`: The kernel height of the convolution
+ * `dT`: The step of the convolution in the depth dimension. Default is `1`.
+ * `dW`: The step of the convolution in the width dimension. Default is `1`.
+ * `dH`: The step of the convolution in the height dimension. Default is `1`.
+ * `padT`: The additional zeros added per depth to the input planes. Default is `0`, a good number is `(kT-1)/2`.
+ * `padW`: The additional zeros added per width to the input planes. Default is `0`, a good number is `(kW-1)/2`.
+ * `padH`: The additional zeros added per height to the input planes. Default is `0`, a good number is `(kH-1)/2`.
+ * `dilationT`: The number of pixels to skip. Default is `1`. `1` makes it a VolumetricConvolution
+ * `dilationW`: The number of pixels to skip. Default is `1`. `1` makes it a VolumetricConvolution
+ * `dilationH`: The number of pixels to skip. Default is `1`. `1` makes it a VolumetricConvolution
+
+If the input image is a 4D tensor `nInputPlane x depth x height x width`, the output image size
+will be `nOutputPlane x odepth x oheight x owidth` where
+```lua
+odepth = floor(depth + 2 * padT - dilationT * (kT-1) + 1) / dT + 1
+owidth = floor(width + 2 * padW - dilationW * (kW-1) + 1) / dW + 1
+oheight = floor(height + 2 * padH - dilationH * (kH-1) + 1) / dH + 1
+```
+
+Further information about the dilated convolution can be found in the following paper: [Multi-Scale Context Aggregation by Dilated Convolutions](http://arxiv.org/abs/1511.07122).
+
<a name="nn.VolumetricMaxPooling"></a>
### VolumetricMaxPooling ###
@@ -962,3 +1049,13 @@ values (corresponding to their position within each map) are stored:
If `C` is a tensor of same size as `B`, `module:updateOutput(C)` outputs a
tensor `D` of same size as `A` such that:
`D[{n,k,indices[{n,k,t}],indices[{n,k,i}],indices[{n,k,j}]}] = C[{n,k,t,i,j}]`.
+
+<a name="nn.VolumetricReplicationPadding"></a>
+### VolumetricReplicationPadding ###
+
+```lua
+module = nn.VolumetricReplicationPadding(padLeft, padRight, padTop, padBottom,
+ padFront, padBack)
+```
+
+Each feature map of a given input is padded with the replication of the input boundary.
diff --git a/doc/criterion.md b/doc/criterion.md
index 6e25f72..270edb9 100644
--- a/doc/criterion.md
+++ b/doc/criterion.md
@@ -332,7 +332,7 @@ By default, the losses are averaged over observations for each minibatch. Howeve
criterion = nn.SoftMarginCriterion()
```
-Creates a criterion that optimizes a two-class classification logisitic loss between input `x` (a `Tensor` of dimension `1`) and output `y` (which is a tensor containing either `1`s or `-1`s).
+Creates a criterion that optimizes a two-class classification logistic loss between input `x` (a `Tensor` of dimension `1`) and output `y` (which is a tensor containing either `1`s or `-1`s).
```lua
loss(x, y) = sum_i (log(1 + exp(-y[i]*x[i]))) / x:nElement()
diff --git a/doc/image/relu6.png b/doc/image/relu6.png
new file mode 100644
index 0000000..0a88563
Binary files /dev/null and b/doc/image/relu6.png differ
diff --git a/doc/module.md b/doc/module.md
index ce8c7b4..02b0603 100644
--- a/doc/module.md
+++ b/doc/module.md
@@ -300,11 +300,11 @@ This function will go over all the weights and gradWeights and make them view in
<a name="nn.Module.training"></a>
### training() ###
-This sets the mode of the Module (or sub-modules) to `train=true`. This is useful for modules like [Dropout](simple.md#nn.Dropout) that have a different behaviour during training vs evaluation.
+This sets the mode of the Module (or sub-modules) to `train=true`. This is useful for modules like [Dropout](simple.md#nn.Dropout) or [BatchNormalization](simple.md#nn.BatchNormalization) that have a different behaviour during training vs evaluation.
<a name="nn.Module.evaluate"></a>
### evaluate() ###
-This sets the mode of the Module (or sub-modules) to `train=false`. This is useful for modules like [Dropout](simple.md#nn.Dropout) that have a different behaviour during training vs evaluation.
+This sets the mode of the Module (or sub-modules) to `train=false`. This is useful for modules like [Dropout](simple.md#nn.Dropout) or [BatchNormalization](simple.md#nn.BatchNormalization) that have a different behaviour during training vs evaluation.
<a name="nn.Module.findModules"></a>
### findModules(typename) ###
diff --git a/doc/simple.md b/doc/simple.md
index 50e5c9f..6f01a56 100644
--- a/doc/simple.md
+++ b/doc/simple.md
@@ -51,6 +51,7 @@ Simple Modules are used for various tasks like adapting Tensor methods and provi
* [Padding](#nn.Padding) : adds padding to a dimension ;
* [L1Penalty](#nn.L1Penalty) : adds an L1 penalty to an input (for sparsity) ;
* [GradientReversal](#nn.GradientReversal) : reverses the gradient (to maximize an objective function) ;
+ * [GPU](#nn.GPU) : decorates a module so that it can be executed on a specific GPU device.
<a name="nn.Linear"></a>
## Linear ##
@@ -1404,3 +1405,50 @@ One can also call:
module:setLambda(lambda)
```
to set the hyper-parameter `lambda` dynamically during training.
+
+<a name="nn.GPU"></a>
+## GPU ##
+
+```lua
+gpu = nn.GPU(module, device, [outdevice])
+require 'cunn'
+gpu:cuda()
+```
+
+Decorates an encapsulated `module` so that it can be executed on a specific GPU `device`.
+The decorated module's `parameters` are thus hosted on the specified GPU `device`.
+All operations on the `gpu` module are executed on that device.
+Calls to `forward`/`backward` will transfer arguments `input` and `gradOutput` to the specified `device`,
+which are then fed as arguments to the decorated `module`.
+Returned `output` is located on the specified `outdevice` (defaults to `device`).
+Returned `gradInput` is allocated on the same device as the `input`.
+
+When serialized/deserialized, the `gpu` module will be run on the same `device` that it was serialized with.
+To prevent this from happening, the module can be converted to float/double before serialization:
+
+```lua
+gpu:float()
+gpustr = torch.serialize(gpu)
+```
+
+The module is located in the __nn__ package instead of __cunn__ as this allows
+it to be used in CPU-only enviroments, which are common for production models.
+
+The module supports nested table `input` and `gradOutput` tensors originating from multiple devices.
+Each nested tensor in the returned `gradInput` will be transfered to the device its commensurate tensor in the `input`.
+
+The intended use-case is not for model-parallelism where the models are executed in parallel on multiple devices, but
+for sequential models where a single GPU doesn't have enough memory.
+
+Example using 4 GPUs:
+
+```lua
+mlp = nn.Sequential()
+ :add(nn.GPU(nn.Linear(10000,10000), 1))
+ :add(nn.GPU(nn.Linear(10000,10000), 2))
+ :add(nn.GPU(nn.Linear(10000,10000), 3))
+ :add(nn.GPU(nn.Linear(10000,10000), 4, cutorch.getDevice()))
+```
+
+Note how the last `GPU` instance will return an `output` tensor on the same device as the current device (`cutorch.getDevice`).
+
diff --git a/doc/transfer.md b/doc/transfer.md
index c1dfc80..358ea7e 100644
--- a/doc/transfer.md
+++ b/doc/transfer.md
@@ -15,7 +15,7 @@ thus outputting a Tensor of the same dimension.
* `f(x)` = `x,` `otherwise.`
The range of the linear region `[-1 1]` can be adjusted by specifying arguments in declaration, for example `nn.HardTanh(min_value, max_value)`.
-Otherwise, `[min_value max_value]` is set to `[-1 1]` by default.
+Otherwise, `[min_value max_value]` is set to `[-1 1]` by default. In-place operation defined by third argument boolean.
```lua
@@ -183,7 +183,7 @@ gnuplot.grid(true)
<a name="nn.LogSoftMax"></a>
## LogSoftMax ##
-Applies the `LogSoftmax` function to an n-dimensional input Tensor.
+Applies the `LogSoftMax` function to an n-dimensional input Tensor.
`LogSoftmax` is defined as `f_i(x)` = `log(1/a exp(x_i))`,
where `a` = `sum_j exp(x_j)`.
@@ -261,6 +261,29 @@ gnuplot.grid(true)
```

+<a name="nn.ReLU6"></a>
+## ReLU6 ##
+
+Same as `ReLU` except that the rectifying function `f(x)` saturates at `x = 6`. This layer is useful for training networks that do not loose precision (due to FP saturation) when implemented as FP16.
+
+`ReLU6` is defined as `f(x)` = `min(max(0, x), 6)`
+
+Can optionally do its operation in-place without using extra state memory:
+```lua
+m=nn.ReLU6(true) -- true = in-place, false = keeping separate state.
+```
+
+```lua
+ii=torch.linspace(-3, 9)
+m=nn.ReLU6()
+oo=m:forward(ii)
+go=torch.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+```
+
+
<a name="nn.PReLU"></a>
## PReLU ##
diff --git a/hessian.lua b/hessian.lua
index 4d3afa3..33ef2b0 100644
--- a/hessian.lua
+++ b/hessian.lua
@@ -53,7 +53,7 @@ function nn.hessian.enable()
error('Number of weights is not equal to number of weights squares')
end
module.diagHessianInput = module.diagHessianInput or input.new()
- module.diagHessianInput:resizeAs(input)
+ module.diagHessianInput:resizeAs(input):zero()
local gi = module.gradInput
module.gradInput = module.diagHessianInput
diff --git a/init.lua b/init.lua
index 516f29b..a9c68da 100644
--- a/init.lua
+++ b/init.lua
@@ -15,6 +15,7 @@ require('nn.Concat')
require('nn.Parallel')
require('nn.Sequential')
require('nn.DepthConcat')
+require('nn.Bottle')
require('nn.Linear')
require('nn.Bilinear')
@@ -83,6 +84,7 @@ require('nn.HardShrink')
require('nn.SoftShrink')
require('nn.Threshold')
require('nn.ReLU')
+require('nn.ReLU6')
require('nn.PReLU')
require('nn.LeakyReLU')
require('nn.SpatialSoftMax')
@@ -99,6 +101,7 @@ require('nn.SpatialConvolutionMap')
require('nn.SpatialDilatedConvolution')
require('nn.SpatialSubSampling')
require('nn.SpatialMaxPooling')
+require('nn.SpatialDilatedMaxPooling')
require('nn.SpatialMaxUnpooling')
require('nn.SpatialFractionalMaxPooling')
require('nn.SpatialLPPooling')
@@ -115,14 +118,19 @@ require('nn.SpatialZeroPadding')
require('nn.SpatialReflectionPadding')
require('nn.SpatialReplicationPadding')
require('nn.SpatialUpSamplingNearest')
+require('nn.SpatialUpSamplingBilinear')
require('nn.SpatialBatchNormalization')
require('nn.VolumetricConvolution')
require('nn.VolumetricFullConvolution')
+require('nn.VolumetricDilatedConvolution')
require('nn.VolumetricMaxPooling')
require('nn.VolumetricMaxUnpooling')
require('nn.VolumetricAveragePooling')
require('nn.VolumetricBatchNormalization')
+require('nn.VolumetricReplicationPadding')
+
+require('nn.GPU')
require('nn.ParallelTable')
require('nn.Identity')
diff --git a/lib/THNN/generic/ClassNLLCriterion.c b/lib/THNN/generic/ClassNLLCriterion.c
index eb02f7c..aea726c 100644
--- a/lib/THNN/generic/ClassNLLCriterion.c
+++ b/lib/THNN/generic/ClassNLLCriterion.c
@@ -20,6 +20,9 @@ void THNN_(ClassNLLCriterion_updateOutput)(
if (THTensor_(nDimension)(input) > 2) {
THError("input tensor should be 1D or 2D");
}
+ if (weights && THTensor_(nElement)(weights) != n_classes) {
+ THError("weight tensor should be defined either for all or no classes");
+ }
input = THTensor_(newContiguous)(input);
target = THIndexTensor_(newContiguous)(target);
@@ -34,7 +37,7 @@ void THNN_(ClassNLLCriterion_updateOutput)(
output_data[0] = total_weight_data[0] = 0.0;
if (THTensor_(nDimension)(input) == 1) {
- int cur_target = target_data[0] - 1;
+ int cur_target = target_data[0] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
output_data[0] = -input_data[cur_target] * total_weight_data[0];
@@ -46,7 +49,7 @@ void THNN_(ClassNLLCriterion_updateOutput)(
int i;
for (i = 0; i < batch_size; i++) {
- int cur_target = target_data[i] - 1;
+ int cur_target = target_data[i] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
@@ -95,6 +98,10 @@ void THNN_(ClassNLLCriterion_updateGradInput)(
if (THTensor_(nDimension)(input) > 2) {
THError("input tensor should be 1D or 2D");
}
+
+ if (weights && THTensor_(nElement)(weights) != n_classes) {
+ THError("weight tensor should be defined either for all or no classes");
+ }
target = THIndexTensor_(newContiguous)(target);
weights = weights ? THTensor_(newContiguous)(weights) : NULL;
@@ -104,7 +111,7 @@ void THNN_(ClassNLLCriterion_updateGradInput)(
real *gradInput_data = THTensor_(data)(gradInput);
if (THTensor_(nDimension)(input) == 1) {
- int cur_target = target_data[0] - 1;
+ int cur_target = target_data[0] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
gradInput_data[cur_target] =
@@ -118,7 +125,7 @@ void THNN_(ClassNLLCriterion_updateGradInput)(
int i;
for (i = 0; i < batch_size; i++){
- int cur_target = target_data[i] - 1;
+ int cur_target = target_data[i] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
diff --git a/lib/THNN/generic/HardTanh.c b/lib/THNN/generic/HardTanh.c
index 9764ec0..3b7ba3d 100644
--- a/lib/THNN/generic/HardTanh.c
+++ b/lib/THNN/generic/HardTanh.c
@@ -7,37 +7,59 @@ void THNN_(HardTanh_updateOutput)(
THTensor *input,
THTensor *output,
real min_val,
- real max_val)
+ real max_val,
+ bool inplace)
{
- THTensor_(resizeAs)(output, input);
+ if (inplace)
+ THTensor_(set)(output, input);
+ else
+ THTensor_(resizeAs)(output, input);
if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
{
- TH_TENSOR_APPLY2(real, output, real, input,
- if (*input_data < min_val)
- *output_data = min_val;
- else if (*input_data <= max_val)
- *output_data = *input_data;
- else
- *output_data = max_val;
- );
+ if (inplace)
+ TH_TENSOR_APPLY(real, input,
+ if (*input_data < min_val)
+ *input_data = min_val;
+ else if (*input_data > max_val)
+ *input_data = max_val;
+ );
+ TH_TENSOR_APPLY2(real, output, real, input,
+ if (*input_data < min_val)
+ *output_data = min_val;
+ else if (*input_data <= max_val)
+ *output_data = *input_data;
+ else
+ *output_data = max_val;
+ );
}
else
{
- real* ptr_output = THTensor_(data)(output);
real* ptr_input = THTensor_(data)(input);
+ real* ptr_output = THTensor_(data)(output);
long i;
+ long n = THTensor_(nElement)(input);
+ if (inplace)
#pragma omp parallel for private(i)
- for (i = 0; i < THTensor_(nElement)(input); i++)
- {
- if (ptr_input[i] < min_val)
- ptr_output[i] = min_val;
- else if (ptr_input[i] <= max_val)
- ptr_output[i] = ptr_input[i];
- else
- ptr_output[i] = max_val;
- }
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] < min_val)
+ ptr_input[i] = min_val;
+ else if (ptr_input[i] > max_val)
+ ptr_input[i] = max_val;
+ }
+ else
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] < min_val)
+ ptr_output[i] = min_val;
+ else if (ptr_input[i] <= max_val)
+ ptr_output[i] = ptr_input[i];
+ else
+ ptr_output[i] = max_val;
+ }
}
}
@@ -47,21 +69,33 @@ void THNN_(HardTanh_updateGradInput)(
THTensor *gradOutput,
THTensor *gradInput,
real min_val,
- real max_val)
+ real max_val,
+ bool inplace)
{
- THTensor_(resizeAs)(gradInput, input);
+ if (inplace)
+ THTensor_(set)(gradInput, gradOutput);
+ else
+ THTensor_(resizeAs)(gradInput, input);
if (input->nDimension == 1 ||
!THTensor_(isContiguous)(input) ||
!THTensor_(isContiguous)(gradOutput) ||
!THTensor_(isContiguous)(gradInput))
{
- TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
- if (*input_data < min_val || *input_data > max_val)
- *gradInput_data = 0;
- else
- *gradInput_data = *gradOutput_data;
- );
+ if (inplace)
+ {
+ TH_TENSOR_APPLY2(real, gradOutput, real, input,
+ if (*input_data < min_val || *input_data > max_val)
+ *gradOutput_data = 0;
+ );
+ }
+ else
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,
+ if (*input_data < min_val || *input_data > max_val)
+ *gradInput_data = 0;
+ else
+ *gradInput_data = *gradOutput_data;
+ );
}
else
{
@@ -69,15 +103,24 @@ void THNN_(HardTanh_updateGradInput)(
real* ptr_gradInput = THTensor_(data)(gradInput);
real* ptr_input = THTensor_(data)(input);
long i;
+ long n = THTensor_(nElement)(input);
+ if (inplace)
#pragma omp parallel for private(i)
- for (i = 0; i < THTensor_(nElement)(input); i++)
- {
- if (ptr_input[i] < min_val || ptr_input[i] > max_val)
- ptr_gradInput[i] = 0;
- else
- ptr_gradInput[i] = ptr_gradOutput[i];
- }
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] <= min_val || ptr_input[i] >= max_val)
+ ptr_gradInput[i] = 0;
+ }
+ else
+#pragma omp parallel for private(i)
+ for (i = 0; i < n; i++)
+ {
+ if (ptr_input[i] < min_val || ptr_input[i] > max_val)
+ ptr_gradInput[i] = 0;
+ else
+ ptr_gradInput[i] = ptr_gradOutput[i];
+ }
}
}
diff --git a/lib/THNN/generic/LookupTable.c b/lib/THNN/generic/LookupTable.c
index a35ff84..378d1c3 100644
--- a/lib/THNN/generic/LookupTable.c
+++ b/lib/THNN/generic/LookupTable.c
@@ -12,12 +12,12 @@ static void THNN_(LookupTable_resetCount)(
for (i = 0; i<numel; i++)
{
- long k = input_data[i] - 1;
+ long k = input_data[i] - TH_INDEX_BASE;
count_data[k] = 0;
}
for (i = 0; i<numel; i++)
{
- long k = input_data[i] - 1;
+ long k = input_data[i] - TH_INDEX_BASE;
count_data[k]++;
}
}
@@ -56,7 +56,7 @@ void THNN_(LookupTable_accGradParameters)(
// check that inputs are all within range
for (i=0; i<numel; i++)
- if (input_data[i] < 1 || input_data[i] > numw)
+ if (input_data[i] < TH_INDEX_BASE || input_data[i] >= numw + TH_INDEX_BASE)
THError("input out of range");
gradOutput = THTensor_(newContiguous)(gradOutput);
@@ -86,7 +86,7 @@ void THNN_(LookupTable_accGradParameters)(
{
if (input_data[i] != paddingValue)
{
- long k = input_data[i] - 1;
+ long k = input_data[i] - TH_INDEX_BASE;
if (k >= start && k < end)
{
real scale_ = scale;
@@ -106,7 +106,7 @@ void THNN_(LookupTable_accGradParameters)(
{
if (input_data[i] != paddingValue)
{
- long k = input_data[i] - 1;
+ long k = input_data[i] - TH_INDEX_BASE;
real scale_ = scale;
if (count_data) scale_ /= count_data[k];
THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
@@ -178,7 +178,7 @@ void THNN_(LookupTable_renorm)(
long stride = THTensor_(stride)(weight, 0);
real *gw = THTensor_(data)(weight);
for (i=0; i<numel; i++)
- if (row_idx[i] < 1 || row_idx[i] > numw)
+ if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE)
THError("input out of range");
// get unique indices
qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
@@ -197,7 +197,7 @@ void THNN_(LookupTable_renorm)(
#pragma omp parallel for private(i)
for (i=0; i<numel; i++)
{
- long k = row_idx[i] - 1;
+ long k = row_idx[i] - TH_INDEX_BASE;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
return;
@@ -205,7 +205,7 @@ void THNN_(LookupTable_renorm)(
#endif
for (i=0; i<numel; i++)
{
- long k = row_idx[i] - 1;
+ long k = row_idx[i] - TH_INDEX_BASE;
THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
}
}
diff --git a/lib/THNN/generic/MultiLabelMarginCriterion.c b/lib/THNN/generic/MultiLabelMarginCriterion.c
index 4cbb000..9cfc5fe 100644
--- a/lib/THNN/generic/MultiLabelMarginCriterion.c
+++ b/lib/THNN/generic/MultiLabelMarginCriterion.c
@@ -47,14 +47,14 @@ void THNN_(MultiLabelMarginCriterion_updateOutput)(
{
for (ddt = 0; ddt < dim; ddt++)
{
- long target_idx = (long)target_data[ddt]-1;
+ long target_idx = (long)target_data[ddt] - TH_INDEX_BASE;
if (target_idx < 0)
break;
isTarget_data[target_idx] = 1;
}
for (dt = 0; dt < dim; dt++)
{
- long target_idx = (long)target_data[dt]-1;
+ long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
real input_target;
if (target_idx < 0)
break;
@@ -141,7 +141,7 @@ void THNN_(MultiLabelMarginCriterion_updateGradInput)(
{
for (dt = 0; dt < dim; dt++)
{
- long target_idx = (long)target_data[dt]-1;
+ long target_idx = (long)target_data[dt] - TH_INDEX_BASE;
real input_target;
if (target_idx < 0)
break;
diff --git a/lib/THNN/generic/MultiMarginCriterion.c b/lib/THNN/generic/MultiMarginCriterion.c
index 2463da1..455cf5e 100644
--- a/lib/THNN/generic/MultiMarginCriterion.c
+++ b/lib/THNN/generic/MultiMarginCriterion.c
@@ -34,7 +34,7 @@ void THNN_(MultiMarginCriterion_updateOutput)(
for (t = 0; t < nframe; t++)
{
real idx = THTensor_(get1d)(target, t);
- THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range");
+ THArgCheck((idx >= TH_INDEX_BASE) && (idx < dim + TH_INDEX_BASE), 3, "target out of range");
}
input = THTensor_(newContiguous)(input);
@@ -47,7 +47,7 @@ void THNN_(MultiMarginCriterion_updateOutput)(
sum = 0;
for (t = 0; t < nframe; t++)
{
- long target_idx = (long)(target_data[t]-1);
+ long target_idx = (long)(target_data[t] - TH_INDEX_BASE);
real input_target = input_data[target_idx];
for (d = 0; d < dim; d++)
{
@@ -124,7 +124,7 @@ void THNN_(MultiMarginCriterion_updateGradInput)(
for (t = 0; t < nframe; t++)
{
- long target_idx = (long)(target_data[t])-1;
+ long target_idx = (long)(target_data[t]) - TH_INDEX_BASE;
real input_target = input_data[target_idx];
real gradInput_target = 0;
for (d = 0; d < dim; d++)
diff --git a/lib/THNN/generic/SpatialAdaptiveMaxPooling.c b/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
index 61afc40..5d6d995 100644
--- a/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
+++ b/lib/THNN/generic/SpatialAdaptiveMaxPooling.c
@@ -30,7 +30,7 @@ static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(
for(j = 0; j < owidth; j++)
{
-
+
int x_start = (int)floor((float)j / owidth * iwidth);
int x_end = (int)ceil((float)(j + 1) / owidth * iwidth);
int kW = x_end-x_start;
@@ -64,8 +64,8 @@ static void THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(
*op = maxval;
/* store location of max (x,y) */
- *indyp = (int)(maxindex / kW)+1;
- *indxp = (maxindex % kW) +1;
+ *indyp = (int)(maxindex / kW) + TH_INDEX_BASE;
+ *indxp = (maxindex % kW) + TH_INDEX_BASE;
}
}
}
@@ -85,7 +85,7 @@ void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
long nslices;
long iheight;
long iwidth;
-
+
long istride_d;
long istride_h;
long istride_w;
@@ -98,7 +98,7 @@ void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
- if (input->nDimension == 4)
+ if (input->nDimension == 4)
{
istride_b = input->stride[0];
nbatch = input->size[0];
@@ -179,7 +179,7 @@ static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(
real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
real *indx_p_k = indx_p + k*owidth*oheight;
real *indy_p_k = indy_p + k*owidth*oheight;
-
+
/* calculate max points */
long i, j;
for(i = 0; i < oheight; i++)
@@ -189,9 +189,9 @@ static void THNN_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(
{
int x_start = (int)floor((float) j / owidth * iwidth);
/* retrieve position of max */
- long maxi = indy_p_k[i*owidth + j] - 1 + y_start;
- long maxj = indx_p_k[i*owidth + j] - 1 + x_start;
-
+ long maxi = indy_p_k[i*owidth + j] - TH_INDEX_BASE + y_start;
+ long maxj = indx_p_k[i*owidth + j] - TH_INDEX_BASE + x_start;
+
/* update gradient */
gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
}
diff --git a/lib/THNN/generic/SpatialClassNLLCriterion.c b/lib/THNN/generic/SpatialClassNLLCriterion.c
index 3121c30..cbb4cea 100644
--- a/lib/THNN/generic/SpatialClassNLLCriterion.c
+++ b/lib/THNN/generic/SpatialClassNLLCriterion.c
@@ -7,6 +7,9 @@
"only batches of spatial targets supported (3D tensors)"); \
THArgCheck(THTensor_(nDimension)(input) == 4, 2, \
"only batches of spatial inputs supported (4D tensors)"); \
+ if (weights && THTensor_(nElement)(weights) != THTensor_(size)(input, 1)) { \
+ THError("weight tensor should be defined either for all or no classes"); \
+ } \
\
{ \
long input0 = THTensor_(size)(input, 0); \
@@ -51,7 +54,7 @@ void THNN_(SpatialClassNLLCriterion_updateOutput)(
real output_acc = 0;
for (int b = 0; b < batch_size; b++) {
for (int elem = 0; elem < map_size; elem++) {
- int cur_target = target_data[b * map_size + elem] - 1;
+ int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
real cur_weight = weights ? weights_data[cur_target] : 1.0f;
@@ -102,11 +105,12 @@ void THNN_(SpatialClassNLLCriterion_updateGradInput)(
real normalize = sizeAverage ? *total_weight_data : 1.0f;
- int b,elem;
-#pragma omp parallel for
+ int b;
+ #pragma omp parallel for
for (b = 0; b < batch_size; b++) {
+ int elem;
for (elem = 0; elem < map_size; elem++) {
- int cur_target = target_data[b * map_size + elem] - 1;
+ int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
THAssert(cur_target >= 0 && cur_target < n_classes);
gradInput_data[b * sample_size + cur_target * map_size + elem] =
diff --git a/lib/THNN/generic/SpatialConvolutionMM.c b/lib/THNN/generic/SpatialConvolutionMM.c
index a549a37..e7460c8 100644
--- a/lib/THNN/generic/SpatialConvolutionMM.c
+++ b/lib/THNN/generic/SpatialConvolutionMM.c
@@ -174,6 +174,10 @@ void THNN_(SpatialConvolutionMM_updateGradInput)(
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
+ // depending on the BLAS library, fgradInput (result tensor) might
+ // be left uninitialized on zero alpha, which might lead to weird behavior
+ // hence, to be safe, zero it
+ THTensor_(zero)(fgradInput);
THTensor_(transpose)(weight, weight, 0, 1);
if(input->nDimension == 3)
diff --git a/lib/THNN/generic/SpatialConvolutionMap.c b/lib/THNN/generic/SpatialConvolutionMap.c
index aef0b1e..82886c2 100644
--- a/lib/THNN/generic/SpatialConvolutionMap.c
+++ b/lib/THNN/generic/SpatialConvolutionMap.c
@@ -10,7 +10,7 @@ void THNN_(SpatialConvolutionMap_updateOutput)(
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
- "3D weight tensor expected (connTable:size(1) x kH x kW)"
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *weight_data = THTensor_(data)(weight);
@@ -75,8 +75,8 @@ void THNN_(SpatialConvolutionMap_updateOutput)(
for (k = 0; k < nweight; k++)
{
/* get offsets for input/output */
- int o = (int)connTable_data[k*2+1]-1;
- int i = (int)connTable_data[k*2+0]-1;
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (o == p)
{
@@ -106,7 +106,7 @@ void THNN_(SpatialConvolutionMap_updateGradInput)(
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
- "3D weight tensor expected (connTable:size(1) x kH x kW)"
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *weight_data = THTensor_(data)(weight);
@@ -154,8 +154,8 @@ void THNN_(SpatialConvolutionMap_updateGradInput)(
int nkernel = connTable->size[0];
for (k = 0; k < nkernel; k++)
{
- int o = (int)connTable_data[k*2+1]-1;
- int i = (int)connTable_data[k*2+0]-1;
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (i == p)
{
/* gradient to input */
@@ -182,7 +182,7 @@ void THNN_(SpatialConvolutionMap_accGradParameters)(
THArgCheck(
gradWeight != NULL && gradWeight->nDimension == 3
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
- "3D gradWeight tensor expected (connTable:size(1) x kH x kW)"
+ "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
real *gradWeight_data = THTensor_(data)(gradWeight);
@@ -237,8 +237,8 @@ void THNN_(SpatialConvolutionMap_accGradParameters)(
long m;
for (m = 0; m < nbatch; m++)
{
- int o = (int)THTensor_(get2d)(connTable,k,1)-1;
- int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+ int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
+ int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
/* gradient to kernel */
THTensor_(validXCorr2DRevptr)(
diff --git a/lib/THNN/generic/SpatialDilatedConvolution.c b/lib/THNN/generic/SpatialDilatedConvolution.c
index 3f75016..3928af0 100644
--- a/lib/THNN/generic/SpatialDilatedConvolution.c
+++ b/lib/THNN/generic/SpatialDilatedConvolution.c
@@ -49,6 +49,7 @@ void THNN_(SpatialDilatedConvolution_updateOutput)(
// Resize output
THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);
+ THTensor_(zero)(output);
// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);
@@ -171,6 +172,7 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)(
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nInputPlane*kW*kH, outputHeight*outputWidth);
+ THTensor_(zero)(gradColumns);
// Helpers
THTensor *gradInput_n = THTensor_(new)();
diff --git a/lib/THNN/generic/SpatialFractionalMaxPooling.c b/lib/THNN/generic/SpatialFractionalMaxPooling.c
index 1c2b6ab..c0a9384 100644
--- a/lib/THNN/generic/SpatialFractionalMaxPooling.c
+++ b/lib/THNN/generic/SpatialFractionalMaxPooling.c
@@ -79,7 +79,7 @@ static void THNN_(SpatialFractionalMaxPooling_updateOutput_frame)(
outputForPlane[h * outputW + w] = maxVal;
/* +1 to lua index */
- indicesForPlane[h * outputW + w] = (real) maxIndex + 1;
+ indicesForPlane[h * outputW + w] = (real) maxIndex + TH_INDEX_BASE;
}
}
@@ -96,7 +96,7 @@ void THNN_(SpatialFractionalMaxPooling_updateOutput)(
int poolSizeW, int poolSizeH,
THTensor *indices,
THTensor *randomSamples) {
-
+
long numBatch = 1;
int planeDim = 0;
int heightDim = 1;
@@ -177,7 +177,7 @@ static void THNN_(SpatialFractionalMaxPooling_updateGradInput_frame)(
for (h = 0; h < outputH; ++h) {
for (w = 0; w < outputW; ++w) {
long outputIndex = h * outputW + w;
- long index = indicesForPlane[outputIndex] - 1;
+ long index = indicesForPlane[outputIndex] - TH_INDEX_BASE;
THAssert(index >= 0 && index < inputW * inputH);
gradInputForPlane[index] += gradOutputForPlane[outputIndex];
diff --git a/lib/THNN/generic/SpatialFullConvolution.c b/lib/THNN/generic/SpatialFullConvolution.c
index 20dd126..a82477d 100644
--- a/lib/THNN/generic/SpatialFullConvolution.c
+++ b/lib/THNN/generic/SpatialFullConvolution.c
@@ -98,6 +98,7 @@ void THNN_(SpatialFullConvolution_updateOutput)(
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);
+ THTensor_(zero)(columns);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
@@ -152,16 +153,17 @@ void THNN_(SpatialFullConvolution_updateOutput)(
long k_ = 1;
// Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
- THBlas_(gemm)(
- 't', 'n',
- n_, m_, k_,
- 1,
- THTensor_(data)(ones), k_,
- THTensor_(data)(bias), k_,
- 1,
- THTensor_(data)(output_n), n_
- );
-
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 1,
+ THTensor_(data)(output_n), n_
+ );
+ }
}
// Free
@@ -210,6 +212,7 @@ void THNN_(SpatialFullConvolution_updateGradInput)(
// Resize output
THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);
@@ -355,15 +358,17 @@ void THNN_(SpatialFullConvolution_accGradParameters)(
long k_ = outputHeight * outputWidth;
// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
- THBlas_(gemv)(
- 't',
- k_, m_,
- scale,
- THTensor_(data)(gradOutput_n), k_,
- THTensor_(data)(ones), 1,
- 1,
- THTensor_(data)(gradBias), 1
- );
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
}
// Free
diff --git a/lib/THNN/generic/SpatialFullConvolutionMap.c b/lib/THNN/generic/SpatialFullConvolutionMap.c
index bbb0282..1bd3455 100644
--- a/lib/THNN/generic/SpatialFullConvolutionMap.c
+++ b/lib/THNN/generic/SpatialFullConvolutionMap.c
@@ -10,7 +10,7 @@ void THNN_(SpatialFullConvolutionMap_updateOutput)(
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 4,
- "3D weight tensor expected (connTable:size(1) x kH x kW)"
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
const int kH = (int)weight->size[1];
@@ -62,8 +62,8 @@ void THNN_(SpatialFullConvolutionMap_updateOutput)(
for (k = 0; k < nweight; k++)
{
/* get offsets for input/output */
- int o = (int)connTable_data[k*2+1]-1;
- int i = (int)connTable_data[k*2+0]-1;
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (o == p)
{
@@ -91,7 +91,7 @@ void THNN_(SpatialFullConvolutionMap_updateGradInput)(
THArgCheck(
weight != NULL && weight->nDimension == 3
&& connTable != NULL && connTable->size[0] == weight->size[0], 5,
- "3D weight tensor expected (connTable:size(1) x kH x kW)"
+ "3D weight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
/* contiguous */
@@ -125,8 +125,8 @@ void THNN_(SpatialFullConvolutionMap_updateGradInput)(
int nkernel = connTable->size[0];
for (k = 0; k < nkernel; k++)
{
- int o = (int)connTable_data[k*2+1]-1;
- int i = (int)connTable_data[k*2+0]-1;
+ int o = (int)connTable_data[k*2+1] - TH_INDEX_BASE;
+ int i = (int)connTable_data[k*2+0] - TH_INDEX_BASE;
if (i == p)
{
/* gradient to input */
@@ -154,7 +154,7 @@ void THNN_(SpatialFullConvolutionMap_accGradParameters)(
THArgCheck(
gradWeight != NULL && gradWeight->nDimension == 3
&& connTable != NULL && connTable->size[0] == gradWeight->size[0], 5,
- "3D gradWeight tensor expected (connTable:size(1) x kH x kW)"
+ "3D gradWeight tensor expected (connTable:size(%d) x kH x kW)", TH_INDEX_BASE
);
/* contiguous */
@@ -191,8 +191,8 @@ void THNN_(SpatialFullConvolutionMap_accGradParameters)(
#pragma omp parallel for private(k)
for (k = 0; k < nkernel; k++)
{
- int o = (int)THTensor_(get2d)(connTable,k,1)-1;
- int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+ int o = (int)THTensor_(get2d)(connTable,k,1) - TH_INDEX_BASE;
+ int i = (int)THTensor_(get2d)(connTable,k,0) - TH_INDEX_BASE;
/* gradient to kernel */
THTensor_(validXCorr2DRevptr)(
diff --git a/lib/THNN/generic/SpatialMaxPooling.c b/lib/THNN/generic/SpatialMaxPooling.c
index d28fe85..3daef1d 100644
--- a/lib/THNN/generic/SpatialMaxPooling.c
+++ b/lib/THNN/generic/SpatialMaxPooling.c
@@ -16,7 +16,10 @@ static void THNN_(SpatialMaxPooling_updateOutput_frame)(
int dW,
int dH,
int padW,
- int padH)
+ int padH,
+ int dilationW,
+ int dilationH
+ )
{
long k;
#pragma omp parallel for private(k)
@@ -31,10 +34,12 @@ static void THNN_(SpatialMaxPooling_updateOutput_frame)(
{
long hstart = i * dH - padH;
long wstart = j * dW - padW;
- long hend = fminf(hstart + kH, iheight);
- long wend = fminf(wstart + kW, iwidth);
- hstart = fmaxf(hstart, 0);
- wstart = fmaxf(wstart, 0);
+ long hend = fminf(hstart + (kH - 1) * dilationH + 1, iheight);
+ long wend = fminf(wstart + (kW - 1) * dilationW + 1, iwidth);
+ while(hstart < 0)
+ hstart += dilationH;
+ while(wstart < 0)
+ wstart += dilationW;
/* local pointers */
real *op = output_p + k*owidth*oheight + i*owidth + j;
@@ -45,9 +50,9 @@ static void THNN_(SpatialMaxPooling_updateOutput_frame)(
real maxval = -THInf;
long tcntr = 0;
long x,y;
- for(y = hstart; y < hend; y++)
+ for(y = hstart; y < hend; y += dilationH)
{
- for(x = wstart; x < wend; x++)
+ for(x = wstart; x < wend; x += dilationW)
{
tcntr = y*iwidth + x;
real val = *(ip + tcntr);
@@ -63,7 +68,7 @@ static void THNN_(SpatialMaxPooling_updateOutput_frame)(
*op = maxval;
/* store location of max */
- *indp = maxindex + 1;
+ *indp = maxindex + TH_INDEX_BASE;
}
}
}
@@ -80,6 +85,8 @@ void THNN_(SpatialMaxPooling_updateOutput)(
int dH,
int padW,
int padH,
+ int dilationW,
+ int dilationH,
bool ceil_mode)
{
int dimw = 2;
@@ -97,31 +104,34 @@ void THNN_(SpatialMaxPooling_updateOutput)(
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
- if (input->nDimension == 4)
+ if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
dimh++;
}
THArgCheck(input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image smaller than kernel size");
-
THArgCheck(kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size");
-
+
/* sizes */
nslices = input->size[dimh-1];
iheight = input->size[dimh];
iwidth = input->size[dimw];
if (ceil_mode)
{
- oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1;
- owidth = (long)(ceil((float)(iwidth - kW + 2*padW) / dW)) + 1;
+ oheight = (long)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ owidth = (long)(ceil((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
else
{
- oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1;
- owidth = (long)(floor((float)(iwidth - kW + 2*padW) / dW)) + 1;
+ oheight = (long)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
+ owidth = (long)(floor((float)(iwidth - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
}
+ if (owidth < 1 || oheight < 1)
+ THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
+ nslices,iheight,iwidth,nslices,oheight,owidth);
+
if (padW || padH)
{
// ensure that the last pooling starts inside the image
@@ -151,7 +161,9 @@ void THNN_(SpatialMaxPooling_updateOutput)(
iwidth, iheight,
owidth, oheight,
kW, kH, dW, dH,
- padW, padH);
+ padW, padH,
+ dilationW, dilationH
+ );
}
else
{
@@ -174,7 +186,9 @@ void THNN_(SpatialMaxPooling_updateOutput)(
iwidth, iheight,
owidth, oheight,
kW, kH, dW, dH,
- padW, padH);
+ padW, padH,
+ dilationW, dilationH
+ );
}
}
@@ -209,7 +223,7 @@ static void THNN_(SpatialMaxPooling_updateGradInput_frame)(
for(j = 0; j < owidth; j++)
{
/* retrieve position of max */
- long maxp = ind_p_k[i*owidth + j] - 1;
+ long maxp = ind_p_k[i*owidth + j] - TH_INDEX_BASE;
/* update gradient */
gradInput_p_k[maxp] += gradOutput_p_k[i*owidth + j];
}
@@ -229,6 +243,8 @@ void THNN_(SpatialMaxPooling_updateGradInput)(
int dH,
int padW,
int padH,
+ int dilationW,
+ int dilationH,
bool ceil_mode)
{
int dimw = 2;
diff --git a/lib/THNN/generic/SpatialMaxUnpooling.c b/lib/THNN/generic/SpatialMaxUnpooling.c
index 6e7a76e..cd1739b 100644
--- a/lib/THNN/generic/SpatialMaxUnpooling.c
+++ b/lib/THNN/generic/SpatialMaxUnpooling.c
@@ -11,7 +11,7 @@ static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *o
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
- {
+ {
real *output_p_k = output_p + k*owidth*oheight;
real *input_p_k = input_p + k*iwidth*iheight;
real *ind_p_k = ind_p + k*iwidth*iheight;
@@ -21,7 +21,7 @@ static void THNN_(SpatialMaxUnpooling_updateOutput_frame)(real *input_p, real *o
{
for(j = 0; j < iwidth; j++)
{
- maxp = ind_p_k[i*iwidth + j] - 1; /* retrieve position of max */
+ maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
if(maxp<0 || maxp>=owidth*oheight){
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
}
@@ -52,9 +52,9 @@ void THNN_(SpatialMaxUnpooling_updateOutput)(
THArgCheck(input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
if (!THTensor_(isSameSizeAs)(input, indices)){
THError("Invalid input size w.r.t current indices size");
- }
+ }
- if (input->nDimension == 4)
+ if (input->nDimension == 4)
{
nbatch = input->size[0];
dimw++;
@@ -131,11 +131,11 @@ static void THNN_(SpatialMaxUnpooling_updateGradInput_frame)(real *gradInput_p,
for(i = 0; i < iheight; i++)
{
for(j = 0; j < iwidth; j++)
- {
- maxp = ind_p_k[i*iwidth + j] - 1; /* retrieve position of max */
+ {
+ maxp = ind_p_k[i*iwidth + j] - TH_INDEX_BASE; /* retrieve position of max */
if(maxp<0 || maxp>=owidth*oheight){
THError("invalid max index %d, owidth= %d, oheight= %d",maxp,owidth,oheight);
- }
+ }
gradInput_p_k[i*iwidth + j] = gradOutput_p_k[maxp]; /* update gradient */
}
}
@@ -162,7 +162,7 @@ void THNN_(SpatialMaxUnpooling_updateGradInput)(
if (!THTensor_(isSameSizeAs)(input, indices)){
THError("Invalid input size w.r.t current indices size");
- }
+ }
/* get contiguous gradOutput and indices */
gradOutput = THTensor_(newContiguous)(gradOutput);
diff --git a/lib/THNN/generic/SpatialUpSamplingBilinear.c b/lib/THNN/generic/SpatialUpSamplingBilinear.c
new file mode 100644
index 0000000..78290b6
--- /dev/null
+++ b/lib/THNN/generic/SpatialUpSamplingBilinear.c
@@ -0,0 +1,127 @@
+// Adapted from interp.cpp from Caffe util by Pauline Luc
+// Originally developed by George Papandreou
+
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialUpSamplingBilinear.c"
+#else
+
+void THNN_(SpatialUpSamplingBilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output){
+ input = THTensor_(newContiguous)(input);
+ output = THTensor_(newContiguous)(output);
+ THTensor_(zero)(output);
+ real *idata = THTensor_(data)(input);
+ real *odata = THTensor_(data)(output);
+ int channels = THTensor_(size)(input, 0) * THTensor_(size)(input, 1);
+ int height1 = THTensor_(size)(input, 2);
+ int width1 = THTensor_(size)(input, 3);
+ int height2 = THTensor_(size)(output, 2);
+ int width2 = THTensor_(size)(output, 3);
+ THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
+ // special case: just copy
+ if (height1 == height2 && width1 == width2) {
+ for (int h2 = 0; h2 < height2; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < width2; ++w2) {
+ const int w1 = w2;
+ const real* pos1 = &idata[h1 * width1 + w1];
+ real* pos2 = &odata[h2 * width2 + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = pos1[0];
+ pos1 += width1 * height1;
+ pos2 += width2 * height2;
+ }
+ }
+ }
+ return;
+ }
+ const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
+ const float rwidth = (width2 > 1) ? (float)(width1 - 1) / (width2 - 1) : 0.f;
+ for (int h2 = 0; h2 < height2; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < height1 - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < width2; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < width1 - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ const real* pos1 = &idata[h1 * width1 + w1];
+ real* pos2 = &odata[h2 * width2 + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p])
+ + h1lambda * (w0lambda * pos1[h1p * width1]
+ + w1lambda * pos1[h1p * width1 + w1p]);
+ pos1 += width1 * height1;
+ pos2 += width2 * height2;
+ }
+ }
+ }
+}
+
+void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput){
+ gradInput = THTensor_(newContiguous)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ THTensor_(zero)(gradInput);
+ real *data1 = THTensor_(data)(gradInput);
+ real *data2 = THTensor_(data)(gradOutput);
+ int channels = THTensor_(size)(gradInput, 0) * THTensor_(size)(gradInput, 1);
+ int height1 = THTensor_(size)(gradInput, 2);
+ int width1 = THTensor_(size)(gradInput, 3);
+ int height2 = THTensor_(size)(gradOutput, 2);
+ int width2 = THTensor_(size)(gradOutput, 3);
+ THAssert(height1 > 0 && width1 > 0 && height2 > 0 && width2 > 0);
+ // special case: same-size matching grids
+ if (height1 == height2 && width1 == width2) {
+ for (int h2 = 0; h2 < height2; ++h2) {
+ const int h1 = h2;
+ for (int w2 = 0; w2 < width2; ++w2) {
+ const int w1 = w2;
+ real* pos1 = &data1[h1 * width1 + w1];
+ const real* pos2 = &data2[h2 * width2 + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += pos2[0];
+ pos1 += width1 * height1;
+ pos2 += width2 * height2;
+ }
+ }
+ }
+ return;
+ }
+ const float rheight =(height2 > 1) ? (float)(height1 - 1)/(height2 - 1) : 0.f;
+ const float rwidth = (width2 > 1) ? (float)(width1 - 1)/(width2 - 1) : 0.f;
+ for (int h2 = 0; h2 < height2; ++h2) {
+ const float h1r = rheight * h2;
+ const int h1 = h1r;
+ const int h1p = (h1 < height1 - 1) ? 1 : 0;
+ const real h1lambda = h1r - h1;
+ const real h0lambda = (real)1. - h1lambda;
+ for (int w2 = 0; w2 < width2; ++w2) {
+ const float w1r = rwidth * w2;
+ const int w1 = w1r;
+ const int w1p = (w1 < width1 - 1) ? 1 : 0;
+ const real w1lambda = w1r - w1;
+ const real w0lambda = (real)1. - w1lambda;
+ real* pos1 = &data1[h1 * width1 + w1];
+ const real* pos2 = &data2[h2 * width2 + w2];
+ for (int c = 0; c < channels; ++c) {
+ pos1[0] += h0lambda * w0lambda * pos2[0];
+ pos1[w1p] += h0lambda * w1lambda * pos2[0];
+ pos1[h1p * width1] += h1lambda * w0lambda * pos2[0];
+ pos1[h1p * width1 + w1p] += h1lambda * w1lambda * pos2[0];
+ pos1 += width1 * height1;
+ pos2 += width2 * height2;
+ }
+ }
+ }
+}
+
+#endif
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
index 1600fb1..7ad6f70 100644
--- a/lib/THNN/generic/THNN.h
+++ b/lib/THNN/generic/THNN.h
@@ -106,14 +106,16 @@ TH_API void THNN_(HardTanh_updateOutput)(
THTensor *input, // input tensor
THTensor *output, // [OUT] output tensor
real min_val, // lower threshold
- real max_val); // upper threshold
+ real max_val,
+ bool inplace); // upper threshold
TH_API void THNN_(HardTanh_updateGradInput)(
THNNState *state, // library's state
THTensor *input, // input tensor
THTensor *gradOutput, // gradient w.r.t. module's output
THTensor *gradInput, // [OUT] gradient w.r.t. the input
real min_val, // lower threshold
- real max_val); // upper threshold
+ real max_val,
+ bool inplace); // upper threshold
TH_API void THNN_(L1Cost_updateOutput)(
THNNState *state, // library's state
@@ -122,7 +124,7 @@ TH_API void THNN_(L1Cost_updateOutput)(
TH_API void THNN_(L1Cost_updateGradInput)(
THNNState *state, // library's state
THTensor *input, // input tensor
- THTensor *gradOutput, // gradient w.r.t module's output
+ THTensor *gradOutput, // [OPTIONAL] gradient w.r.t module's output
THTensor *gradInput); // [OUT] gradient w.r.t the input
TH_API void THNN_(LeakyReLU_updateOutput)(
@@ -168,8 +170,8 @@ TH_API void THNN_(LookupTable_accGradParameters)(
THTensor *gradOutput,
THTensor *gradWeight,
THIntegerTensor *count,
- THTensor *sorted,
- THTensor *indices,
+ THTensor *sorted, // [OPTIONAL]
+ THTensor *indices, // [OPTIONAL]
bool scaleGradByFreq,
int paddingValue,
real scale);
@@ -245,7 +247,7 @@ TH_API void THNN_(MultiMarginCriterion_updateOutput)(
THTensor *output,
bool sizeAverage,
int p,
- THTensor* weights,
+ THTensor* weights, // [OPTIONAL]
real margin);
TH_API void THNN_(MultiMarginCriterion_updateGradInput)(
THNNState *state,
@@ -254,7 +256,7 @@ TH_API void THNN_(MultiMarginCriterion_updateGradInput)(
THTensor *gradInput,
bool sizeAverage,
int p,
- THTensor *weights,
+ THTensor *weights, // [OPTIONAL]
real margin);
TH_API void THNN_(PReLU_updateOutput)(
@@ -537,8 +539,8 @@ TH_API void THNN_(BatchNormalization_updateOutput)(
THNNState *state,
THTensor *input,
THTensor *output,
- THTensor *weight,
- THTensor *bias,
+ THTensor *weight, // [OPTIONAL]
+ THTensor *bias, // [OPTIONAL]
THTensor *running_mean,
THTensor *running_var,
THTensor *save_mean,
@@ -550,10 +552,10 @@ TH_API void THNN_(BatchNormalization_backward)(
THNNState *state,
THTensor *input,
THTensor *gradOutput,
- THTensor *gradInput,
- THTensor *gradWeight,
- THTensor *gradBias,
- THTensor *weight,
+ THTensor *gradInput, // [OPTIONAL]
+ THTensor *gradWeight, // [OPTIONAL]
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *weight, // [OPTIONAL]
THTensor *running_mean,
THTensor *running_var,
THTensor *save_mean,
@@ -600,7 +602,7 @@ TH_API void THNN_(SpatialConvolutionMM_updateOutput)(
THTensor *input,
THTensor *output,
THTensor *weight,
- THTensor *bias,
+ THTensor *bias, // [OPTIONAL]
THTensor *finput,
THTensor *fgradInput,
int kW, int kH,
@@ -622,7 +624,7 @@ TH_API void THNN_(SpatialConvolutionMM_accGradParameters)(
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
- THTensor *gradBias,
+ THTensor *gradBias, // [OPTIONAL]
THTensor *finput,
THTensor *fgradInput,
int kW, int kH,
@@ -726,7 +728,7 @@ TH_API void THNN_(SpatialFullConvolution_updateOutput)(
THTensor *input,
THTensor *output,
THTensor *weight,
- THTensor *bias,
+ THTensor *bias, // [OPTIONAL]
THTensor *columns,
THTensor *ones,
int kW, int kH,
@@ -749,7 +751,7 @@ TH_API void THNN_(SpatialFullConvolution_accGradParameters)(
THTensor *input,
THTensor *gradOutput,
THTensor *gradWeight,
- THTensor *gradBias,
+ THTensor *gradBias, // [OPTIONAL]
THTensor *columns,
THTensor *ones,
int kW, int kH,
@@ -792,43 +794,43 @@ TH_API void THNN_(SpatialFullConvolutionMap_accGradParameters)(
real scale); // scaling factor
TH_API void THNN_(SpatialDilatedConvolution_updateOutput)(
- THNNState *state,
- THTensor *input,
- THTensor *output,
- THTensor *weight,
- THTensor *bias,
- THTensor *columns,
- THTensor *ones,
- int kW, int kH,
- int dW, int dH,
- int padW, int padH,
- int dilationW, int dilationH);
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH);
TH_API void THNN_(SpatialDilatedConvolution_updateGradInput)(
- THNNState *state,
- THTensor *input,
- THTensor *gradOutput,
- THTensor *gradInput,
- THTensor *weight,
- THTensor *gradColumns,
- int kW, int kH,
- int dW, int dH,
- int padW, int padH,
- int dilationW, int dilationH);
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH);
TH_API void THNN_(SpatialDilatedConvolution_accGradParameters)(
- THNNState *state,
- THTensor *input,
- THTensor *gradOutput,
- THTensor *gradWeight,
- THTensor *gradBias,
- THTensor *columns,
- THTensor *ones,
- int kW, int kH,
- int dW, int dH,
- int padW, int padH,
- int dilationW, int dilationH,
- real scale);
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias, // [OPTIONAL]
+ THTensor *columns,
+ THTensor *ones,
+ int kW, int kH,
+ int dW, int dH,
+ int padW, int padH,
+ int dilationW, int dilationH,
+ real scale);
TH_API void THNN_(SpatialMaxPooling_updateOutput)(
THNNState *state,
@@ -838,6 +840,7 @@ TH_API void THNN_(SpatialMaxPooling_updateOutput)(
int kW, int kH,
int dW, int dH,
int padW, int padH,
+ int dilationW, int dilationH,
bool ceil_mode);
TH_API void THNN_(SpatialMaxPooling_updateGradInput)(
THNNState *state,
@@ -848,6 +851,7 @@ TH_API void THNN_(SpatialMaxPooling_updateGradInput)(
int kW, int kH,
int dW, int dH,
int padW, int padH,
+ int dilationW, int dilationH,
bool ceil_mode);
TH_API void THNN_(SpatialMaxUnpooling_updateOutput)(
@@ -902,6 +906,15 @@ TH_API void THNN_(SpatialUpSamplingNearest_updateGradInput)(
THTensor *gradInput,
int scale_factor);
+TH_API void THNN_(SpatialUpSamplingBilinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output);
+TH_API void THNN_(SpatialUpSamplingBilinear_updateGradInput)(
+ THNNState *state,
+ THTensor *gradOutput,
+ THTensor *gradInput);
+
TH_API void THNN_(unfolded_acc)(
THTensor *finput,
THTensor *input,
@@ -1031,6 +1044,45 @@ TH_API void THNN_(VolumetricFullConvolution_accGradParameters)(
int aT, int aW, int aH, // extra output adjustment
real scale); // scaling factor
+TH_API void THNN_(VolumetricDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH);
+
+TH_API void THNN_(VolumetricDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH);
+
+TH_API void THNN_(VolumetricDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH,
+ real scale);
+
TH_API void THNN_(VolumetricMaxPooling_updateOutput)(
THNNState *state,
THTensor *input,
@@ -1067,30 +1119,51 @@ TH_API void THNN_(VolumetricMaxUnpooling_updateGradInput)(
int dT, int dW, int dH,
int pT, int pW, int pH);
-TH_API void THNN_(SpatialReflectionPadding_updateOutput)(THNNState *state,
- THTensor *input,
- THTensor *output,
- int pad_l, int pad_r,
- int pad_t, int pad_b);
-
-TH_API void THNN_(SpatialReflectionPadding_updateGradInput)(THNNState *state,
- THTensor *input,
- THTensor *gradOutput,
- THTensor *gradInput,
- int pad_l, int pad_r,
- int pad_t, int pad_b);
-
-TH_API void THNN_(SpatialReplicationPadding_updateOutput)(THNNState *state,
- THTensor *input,
- THTensor *output,
- int pad_l, int pad_r,
- int pad_t, int pad_b);
-
-TH_API void THNN_(SpatialReplicationPadding_updateGradInput)(THNNState *state,
- THTensor *input,
- THTensor *gradOutput,
- THTensor *gradInput,
- int pad_l, int pad_r,
- int pad_t, int pad_b);
+TH_API void THNN_(SpatialReflectionPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReflectionPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReplicationPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(SpatialReplicationPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pad_l, int pad_r,
+ int pad_t, int pad_b);
+
+TH_API void THNN_(VolumetricReplicationPadding_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback);
+
+TH_API void THNN_(VolumetricReplicationPadding_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback);
#endif
diff --git a/lib/THNN/generic/VolumetricConvolutionMM.c b/lib/THNN/generic/VolumetricConvolutionMM.c
index a226350..8fef1cf 100644
--- a/lib/THNN/generic/VolumetricConvolutionMM.c
+++ b/lib/THNN/generic/VolumetricConvolutionMM.c
@@ -395,6 +395,10 @@ void THNN_(VolumetricConvolutionMM_updateGradInput)(
THTensor_(resizeAs)(gradInput, input);
THTensor_(resizeAs)(fgradInput, finput);
+ // depending on the BLAS library, fgradInput (result tensor) might
+ // be left uninitialized on zero alpha, which might lead to weird behavior
+ // hence, to be safe, zero it
+ THTensor_(zero)(fgradInput);
THTensor_(transpose)(weight, weight, 0, 1);
if (input->nDimension == 4)
diff --git a/lib/THNN/generic/VolumetricDilatedConvolution.c b/lib/THNN/generic/VolumetricDilatedConvolution.c
new file mode 100644
index 0000000..1a9cc93
--- /dev/null
+++ b/lib/THNN/generic/VolumetricDilatedConvolution.c
@@ -0,0 +1,356 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricDilatedConvolution.c"
+#else
+
+void THNN_(VolumetricDilatedConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH)
+{
+ THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected, but got: %d", input->nDimension);
+ THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
+ THArgCheck(!bias || weight->size[0] == bias->size[0], 4, "nOutputPlane mismatch in weight and bias");
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
+
+ // Params:
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ int batch = 1;
+ if (input->nDimension == 4) {
+ THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[0]);
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ } else {
+ THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match. Expected: %d, got %d", nInputPlane, input->size[1]);
+ }
+
+ long inputDepth = input->size[2];
+ long inputHeight = input->size[3];
+ long inputWidth = input->size[4];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+
+ if (outputDepth < 1 || outputWidth < 1 || outputHeight < 1)
+ THError("Given input size: (%dx%dx%dx%d). Calculated output size: (%dx%dx%dx%d). Output size is too small",
+ nInputPlane,inputDepth,inputHeight,inputWidth,nOutputPlane,outputDepth,outputHeight,outputWidth);
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(output, batchSize, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(zero)(output);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+
+ // Define a buffer of ones, for bias accumulation
+ // Note: this buffer can be shared with other modules, it only ever gets increased,
+ // and always contains ones.
+ if (ones->nDimension != 3 ||
+ ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *output_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(output_n, output, 0, elt);
+
+ // Do Bias first:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long n_ = outputDepth * outputHeight * outputWidth;
+ long k_ = 1;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ if (bias) {
+ THBlas_(gemm)(
+ 't', 'n',
+ n_, m_, k_,
+ 1,
+ THTensor_(data)(ones), k_,
+ THTensor_(data)(bias), k_,
+ 0,
+ THTensor_(data)(output_n), n_
+ );
+ } else {
+ THTensor_(zero)(output_n);
+ }
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = columns->size[1];
+ long k = nInputPlane*kT*kH*kW;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 'n',
+ n, m, k,
+ 1,
+ THTensor_(data)(columns), n,
+ THTensor_(data)(weight), k,
+ 1,
+ THTensor_(data)(output_n), n
+ );
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(output_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+}
+
+void THNN_(VolumetricDilatedConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *gradColumns,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH)
+{
+ THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
+ THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
+ THArgCheck(weight->nDimension == 5, 4, "weight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
+
+ // Params
+ int nInputPlane = weight->size[1];
+ int nOutputPlane = weight->size[0];
+
+ int batch = 1;
+ if (input->nDimension == 4) {
+ THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ } else {
+ THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
+ }
+
+ long inputDepth = input->size[2];
+ long inputWidth = input->size[4];
+ long inputHeight = input->size[3];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Resize output
+ THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
+
+ // Resize temporary columns
+ THTensor_(resize2d)(gradColumns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+ THTensor_(zero)(gradColumns);
+
+ // Helpers
+ THTensor *gradInput_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per sample:
+ THTensor_(select)(gradInput_n, gradInput, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // M,N,K are dims of matrix A and B
+ long m = nInputPlane*kT*kW*kH;
+ long n = gradColumns->size[1];
+ long k = nOutputPlane;
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 'n', 't',
+ n, m, k,
+ 1,
+ THTensor_(data)(gradOutput_n), n,
+ THTensor_(data)(weight), m,
+ 0,
+ THTensor_(data)(gradColumns), n
+ );
+
+ // Unpack columns back into input:
+ THNN_(col2vol)(
+ THTensor_(data)(gradColumns),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(gradInput_n)
+ );
+ }
+
+ // Free
+ THTensor_(free)(gradInput_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize output
+ if (batch == 0) {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ THTensor_(resize4d)(gradInput, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+}
+
+void THNN_(VolumetricDilatedConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *columns,
+ THTensor *ones,
+ int kT, int kW, int kH,
+ int dT, int dW, int dH,
+ int padT, int padW, int padH,
+ int dilationT, int dilationW, int dilationH,
+ real scale)
+{
+ THArgCheck(input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch mode) tensor is expected");
+ THArgCheck(gradOutput->nDimension == 4 || gradOutput->nDimension == 5, 3, "4D or 5D (batch mode) tensor is expected");
+ THArgCheck(gradWeight->nDimension == 5, 4, "gradWeight tensor must be 5D (nOutputPlane,nInputPlane,kT,kH,kW)");
+ THArgCheck(kT > 0 && kW > 0 && kH > 0, 8, "kernel size should be greater than zero");
+ THArgCheck(dT > 0 && dW > 0 && dH > 0, 10, "stride should be greater than zero");
+ THArgCheck(!gradBias || gradWeight->size[0] == gradBias->size[0], 4, "nOutputPlane mismatch in gradWeight and gradBias");
+
+ // Params
+ int nInputPlane = gradWeight->size[1];
+ int nOutputPlane = gradWeight->size[0];
+
+ int batch = 1;
+ if (input->nDimension == 4) {
+ THArgCheck(input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
+ // Force batch
+ batch = 0;
+ THTensor_(resize5d)(input, 1, input->size[0], input->size[1], input->size[2], input->size[3]);
+ THTensor_(resize5d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2], gradOutput->size[3]);
+ } else {
+ THArgCheck(input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
+ }
+
+ long inputDepth = input->size[2];
+ long inputWidth = input->size[4];
+ long inputHeight = input->size[3];
+ long outputDepth = (inputDepth + 2*padT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ long outputWidth = (inputWidth + 2*padW - (dilationW * (kW - 1) + 1)) / dW + 1;
+ long outputHeight = (inputHeight + 2*padH - (dilationH * (kH - 1) + 1)) / dH + 1;
+
+ // Batch size + input planes
+ long batchSize = input->size[0];
+
+ // Define a buffer of ones, for bias accumulation
+ if (ones->nDimension != 3 || ones->size[0]*ones->size[1]*ones->size[2] < outputDepth*outputHeight*outputWidth) {
+ // Resize plane and fill with ones...
+ THTensor_(resize3d)(ones, outputDepth, outputHeight, outputWidth);
+ THTensor_(fill)(ones, 1);
+ }
+
+ // Resize temporary columns
+ THTensor_(resize2d)(columns, nInputPlane*kT*kW*kH, outputDepth*outputHeight*outputWidth);
+
+ // Helpers
+ THTensor *input_n = THTensor_(new)();
+ THTensor *gradOutput_n = THTensor_(new)();
+
+ // For each elt in batch, do:
+ for (int elt = 0; elt < batchSize; elt ++) {
+ // Matrix mulitply per output:
+ THTensor_(select)(input_n, input, 0, elt);
+ THTensor_(select)(gradOutput_n, gradOutput, 0, elt);
+
+ // Extract columns:
+ THNN_(vol2col)(
+ THTensor_(data)(input_n),
+ nInputPlane, inputDepth, inputHeight, inputWidth,
+ kT, kH, kW, padT, padH, padW, dT, dH, dW,
+ dilationT, dilationH, dilationW,
+ THTensor_(data)(columns)
+ );
+
+ // M,N,K are dims of matrix A and B
+ long m = nOutputPlane;
+ long n = nInputPlane*kT*kW*kH;
+ long k = columns->size[1];
+
+ // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
+ THBlas_(gemm)(
+ 't', 'n',
+ n, m, k,
+ scale,
+ THTensor_(data)(columns), k,
+ THTensor_(data)(gradOutput_n), k,
+ 1,
+ THTensor_(data)(gradWeight), n
+ );
+
+ // Do Bias:
+ // M,N,K are dims of matrix A and B
+ long m_ = nOutputPlane;
+ long k_ = outputDepth * outputHeight * outputWidth;
+
+ // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
+ if (gradBias) {
+ THBlas_(gemv)(
+ 't',
+ k_, m_,
+ scale,
+ THTensor_(data)(gradOutput_n), k_,
+ THTensor_(data)(ones), 1,
+ 1,
+ THTensor_(data)(gradBias), 1
+ );
+ }
+ }
+
+ // Free
+ THTensor_(free)(input_n);
+ THTensor_(free)(gradOutput_n);
+
+ // Resize
+ if (batch == 0) {
+ THTensor_(resize4d)(gradOutput, nOutputPlane, outputDepth, outputHeight, outputWidth);
+ THTensor_(resize4d)(input, nInputPlane, inputDepth, inputHeight, inputWidth);
+ }
+}
+
+#endif
diff --git a/lib/THNN/generic/VolumetricFullConvolution.c b/lib/THNN/generic/VolumetricFullConvolution.c
index 5a6a1a7..4eb36c4 100644
--- a/lib/THNN/generic/VolumetricFullConvolution.c
+++ b/lib/THNN/generic/VolumetricFullConvolution.c
@@ -8,12 +8,13 @@ static void THNN_(vol2col)(
const int kT, const int kH, const int kW,
const int pT, const int pH, const int pW,
const int dT, const int dH, const int dW,
+ const int dilationT, const int dilationH, const int dilationW,
real *data_col)
{
int c, t, h, w;
- int depth_col = (depth + 2 * pT - kT) / dT + 1;
- int height_col = (height + 2 * pH - kH) / dH + 1;
- int width_col = (width + 2 * pW - kW) / dW + 1;
+ int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
int channels_col = channels * kT * kH * kW;
for (c = 0; c < channels_col; ++c)
{
@@ -27,10 +28,12 @@ static void THNN_(vol2col)(
{
for (w = 0; w < width_col; ++w)
{
- int t_pad = t * dT - pT + t_offset;
- int h_pad = h * dH - pH + h_offset;
- int w_pad = w * dW - pW + w_offset;
- if (t_pad >= 0 && t_pad < depth && h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+ int t_pad = t * dT - pT + t_offset * dilationT;
+ int h_pad = h * dH - pH + h_offset * dilationH;
+ int w_pad = w * dW - pW + w_offset * dilationW;
+ if (t_pad >= 0 && t_pad < depth &&
+ h_pad >= 0 && h_pad < height &&
+ w_pad >= 0 && w_pad < width)
data_col[((c * depth_col + t) * height_col + h) * width_col + w] =
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad];
else
@@ -47,13 +50,14 @@ static void THNN_(col2vol)(
const int kT, const int kH, const int kW,
const int pT, const int pH, const int pW,
const int dT, const int dH, const int dW,
+ const int dilationT, const int dilationH, const int dilationW,
real* data_vol)
{
int c, t, h, w;
memset(data_vol, 0, sizeof(real) * depth * height * width * channels);
- int depth_col = (depth + 2 * pT - kT) / dT + 1;
- int height_col = (height + 2 * pH - kH) / dH + 1;
- int width_col = (width + 2 * pW - kW) / dW + 1;
+ int depth_col = (depth + 2 * pT - (dilationT * (kT - 1) + 1)) / dT + 1;
+ int height_col = (height + 2 * pH - (dilationH * (kH - 1) + 1)) / dH + 1;
+ int width_col = (width + 2 * pW - (dilationW * (kW - 1) + 1)) / dW + 1;
int channels_col = channels * kT * kH * kW;
for (c = 0; c < channels_col; ++c)
{
@@ -67,10 +71,12 @@ static void THNN_(col2vol)(
{
for (w = 0; w < width_col; ++w)
{
- int t_pad = t * dT - pT + t_offset;
- int h_pad = h * dH - pH + h_offset;
- int w_pad = w * dW - pW + w_offset;
- if (t_pad >= 0 && t_pad < depth && h_pad >= 0 && h_pad < height && w_pad >= 0 && w_pad < width)
+ int t_pad = t * dT - pT + t_offset * dilationT;
+ int h_pad = h * dH - pH + h_offset * dilationH;
+ int w_pad = w * dW - pW + w_offset * dilationW;
+ if (t_pad >= 0 && t_pad < depth &&
+ h_pad >= 0 && h_pad < height &&
+ w_pad >= 0 && w_pad < width)
data_vol[((c_vol * depth + t_pad) * height + h_pad) * width + w_pad] +=
data_col[((c * depth_col + t) * height_col + h) * width_col + w];
}
@@ -137,6 +143,7 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
// Resize temporary columns
THTensor_(resize2d)(columns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
+ THTensor_(zero)(columns);
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
@@ -184,6 +191,7 @@ void THNN_(VolumetricFullConvolution_updateOutput)(
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
+ 1, 1, 1,
THTensor_(data)(output_n)
);
@@ -268,6 +276,7 @@ void THNN_(VolumetricFullConvolution_updateGradInput)(
// Resize output
THTensor_(resize5d)(gradInput, batchSize, nInputPlane, inputDepth, inputHeight, inputWidth);
+ THTensor_(zero)(gradInput);
// Resize temporary columns
THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH*kT, inputDepth*inputHeight*inputWidth);
@@ -291,6 +300,7 @@ void THNN_(VolumetricFullConvolution_updateGradInput)(
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
+ 1, 1, 1,
THTensor_(data)(gradColumns)
);
@@ -405,6 +415,7 @@ void THNN_(VolumetricFullConvolution_accGradParameters)(
kT, kH, kW,
pT, pH, pW,
dT, dH, dW,
+ 1, 1, 1,
THTensor_(data)(columns)
);
diff --git a/lib/THNN/generic/VolumetricReplicationPadding.c b/lib/THNN/generic/VolumetricReplicationPadding.c
new file mode 100644
index 0000000..c4ab02e
--- /dev/null
+++ b/lib/THNN/generic/VolumetricReplicationPadding.c
@@ -0,0 +1,301 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricReplicationPadding.c"
+#else
+
+static void THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ real *input_p, real *output_p,
+ long nslices,
+ long iwidth, long iheight, long idepth,
+ long owidth, long oheight, long odepth,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int iStartX = fmax(0, -pleft);
+ int iStartY = fmax(0, -ptop);
+ int iStartZ = fmax(0, -pfront);
+ int oStartX = fmax(0, pleft);
+ int oStartY = fmax(0, ptop);
+ int oStartZ = fmax(0, pfront);
+
+ long k, ip_x, ip_y, ip_z;
+#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
+ for (k = 0; k < nslices; k++) {
+ long i, j, z;
+ for (z = 0; z < odepth; z++) {
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pleft) {
+ ip_x = pleft;
+ } else if (j >= pleft && j < iwidth + pleft) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pleft - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < ptop) {
+ ip_y = ptop;
+ } else if (i >= ptop && i < iheight + ptop) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + ptop - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ if (z < pfront) {
+ ip_z = pfront;
+ } else if (z >= pfront && z < idepth + pfront) {
+ ip_z = z;
+ } else {
+ ip_z = idepth + pfront - 1;
+ }
+ ip_z = ip_z - oStartZ + iStartZ;
+
+ real *dest_p = output_p + k * owidth * oheight * odepth +
+ z * owidth * oheight + i * owidth + j;
+ real *src_p = input_p + k * iwidth * iheight * idepth +
+ ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
+ *dest_p = *src_p;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricReplicationPadding_updateOutput)(THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimd = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long idepth;
+ long iheight;
+ long iwidth;
+ long odepth;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+ THArgCheck(input->nDimension == 4 || input->nDimension == 5,
+ 2, "input must be 4 or 5-dimensional");
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimd++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ idepth = input->size[dimd];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ odepth = idepth + pfront + pback;
+ oheight = iheight + ptop + pbottom;
+ owidth = iwidth + pleft + pright;
+
+ THArgCheck(owidth >= 1 || oheight >= 1 || odepth >= 1 , 2,
+ "input is too small");
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ if (input->nDimension == 4)
+ {
+ THTensor_(resize4d)(output, nslices, odepth, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ input_data, output_data, nslices, iwidth, iheight, idepth,
+ owidth, oheight, odepth, pleft, pright, ptop, pbottom, pfront,
+ pback);
+ }
+ else
+ {
+ long p;
+
+ THTensor_(resize5d)(output, nbatch, nslices, odepth, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++)
+ {
+ THNN_(VolumetricReplicationPadding_updateOutput_frame)(
+ input_data + p * nslices * iwidth * iheight * idepth,
+ output_data + p * nslices * owidth * oheight * odepth,
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+}
+
+static void THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ real *ginput_p, real *goutput_p,
+ long nslices,
+ long iwidth, long iheight, long idepth,
+ long owidth, long oheight, long odepth,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int iStartX = fmax(0, -pleft);
+ int iStartY = fmax(0, -ptop);
+ int iStartZ = fmax(0, -pfront);
+ int oStartX = fmax(0, pleft);
+ int oStartY = fmax(0, ptop);
+ int oStartZ = fmax(0, pfront);
+
+ long k, ip_x, ip_y, ip_z;
+#pragma omp parallel for private(k, ip_x, ip_y, ip_z)
+ for (k = 0; k < nslices; k++) {
+ long i, j, z;
+ for (z = 0; z < odepth; z++) {
+ for (i = 0; i < oheight; i++) {
+ for (j = 0; j < owidth; j++) {
+ if (j < pleft) {
+ ip_x = pleft;
+ } else if (j >= pleft && j < iwidth + pleft) {
+ ip_x = j;
+ } else {
+ ip_x = iwidth + pleft - 1;
+ }
+ ip_x = ip_x - oStartX + iStartX;
+
+ if (i < ptop) {
+ ip_y = ptop;
+ } else if (i >= ptop && i < iheight + ptop) {
+ ip_y = i;
+ } else {
+ ip_y = iheight + ptop - 1;
+ }
+ ip_y = ip_y - oStartY + iStartY;
+
+ if (z < pfront) {
+ ip_z = pfront;
+ } else if (z >= pfront && z < idepth + pfront) {
+ ip_z = z;
+ } else {
+ ip_z = idepth + pfront - 1;
+ }
+ ip_z = ip_z - oStartZ + iStartZ;
+
+ real *src_p = goutput_p + k * owidth * oheight * odepth +
+ z * owidth * oheight + i * owidth + j;
+ real *dest_p = ginput_p + k * iwidth * iheight * idepth +
+ ip_z * iwidth * iheight + ip_y * iwidth + ip_x;
+ *dest_p += *src_p;
+ }
+ }
+ }
+ }
+}
+
+void THNN_(VolumetricReplicationPadding_updateGradInput)(THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int pleft, int pright,
+ int ptop, int pbottom,
+ int pfront, int pback)
+{
+ int dimw = 3;
+ int dimh = 2;
+ int dimd = 1;
+ int dimslices = 0;
+ long nbatch = 1;
+ long nslices;
+ long idepth;
+ long iheight;
+ long iwidth;
+ long odepth;
+ long oheight;
+ long owidth;
+
+ if (input->nDimension == 5)
+ {
+ nbatch = input->size[0];
+ dimw++;
+ dimh++;
+ dimd++;
+ dimslices++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimslices];
+ idepth = input->size[dimd];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ odepth = idepth + pfront + pback;
+ oheight = iheight + ptop + pbottom;
+ owidth = iwidth + pleft + pright;
+
+ THArgCheck(owidth == THTensor_(size)(gradOutput, dimw), 3,
+ "gradOutput width unexpected");
+ THArgCheck(oheight == THTensor_(size)(gradOutput, dimh), 3,
+ "gradOutput height unexpected");
+ THArgCheck(odepth == THTensor_(size)(gradOutput, dimd), 3,
+ "gradOutput depth unexpected");
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* backprop */
+ if (input->nDimension == 4) {
+ THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput),
+ THTensor_(data)(gradOutput),
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ } else {
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nbatch; p++) {
+ THNN_(VolumetricReplicationPadding_updateGradInput_frame)(
+ THTensor_(data)(gradInput) + p * nslices * idepth * iheight * iwidth,
+ THTensor_(data)(gradOutput) + p * nslices * odepth * oheight * owidth,
+ nslices,
+ iwidth, iheight, idepth,
+ owidth, oheight, odepth,
+ pleft, pright,
+ ptop, pbottom,
+ pfront, pback);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
index 7c0de94..739706c 100644
--- a/lib/THNN/init.c
+++ b/lib/THNN/init.c
@@ -148,6 +148,9 @@
#include "generic/SpatialUpSamplingNearest.c"
#include "THGenerateFloatTypes.h"
+#include "generic/SpatialUpSamplingBilinear.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/VolumetricAveragePooling.c"
#include "THGenerateFloatTypes.h"
@@ -160,6 +163,9 @@
#include "generic/VolumetricFullConvolution.c"
#include "THGenerateFloatTypes.h"
+#include "generic/VolumetricDilatedConvolution.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/VolumetricMaxPooling.c"
#include "THGenerateFloatTypes.h"
@@ -171,3 +177,6 @@
#include "generic/SpatialReplicationPadding.c"
#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricReplicationPadding.c"
+#include "THGenerateFloatTypes.h"
diff --git a/test.lua b/test.lua
index 8bf98ec..e288e25 100644
--- a/test.lua
+++ b/test.lua
@@ -85,6 +85,42 @@ function nntest.Add()
end
end
+function nntest.Bottle()
+ local ini = 2
+ local inj = 3
+ local ink = 4
+ local out = 5
+ local input = torch.Tensor(ini,inj,ink):normal()
+ local linear = nn.Linear(ink, out)
+ local module1 = nn.Bottle(linear)
+ local module2 = nn.Sequential()
+ module2:add(nn.View(ini*inj, ink))
+ module2:add(linear)
+ module2:add(nn.View(ini, inj, out))
+ local output1 = module1:forward(input)
+ local output2 = module2:forward(input)
+ mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module')
+
+ local shape = {4, 5, 6, 7, 8, 1, 3}
+ local input = torch.Tensor(table.unpack(shape)):normal()
+ local module = nn.Sequential()
+ module:add(nn.Squeeze(2))
+ module:add(nn.Linear(3, 3))
+ local module1 = nn.Bottle(module, 3, 2)
+ local outShape = {4, 5, 6, 7, 8, 3}
+ local module2 = nn.Sequential()
+ module2:add(nn.View(4*5*6*7*8, 1, 3))
+ module2:add(module)
+ module2:add(nn.View(table.unpack(outShape)))
+ local output1 = module1:forward(input)
+ local grad = torch.Tensor(output1:size()):normal()
+ local gradOutput1 = module1:backward(input, grad):clone()
+ local output2 = module2:forward(input)
+ local gradOutput2 = module2:backward(input, grad):clone()
+ mytester:eq(output1, output2, 0.0001, 'Bottle output not the same as Module')
+ mytester:eq(gradOutput1, gradOutput2, 0.0001, 'Bottle gradOutput not the same as Module')
+end
+
function nntest.CMul()
local ini = math.random(3,5)
local inj = math.random(3,5)
@@ -263,6 +299,23 @@ function nntest.ReLU()
mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput')
end
+function nntest.ReLU6()
+ for inplace = 0, 1 do
+ local input = torch.randn(3, 4):mul(6)
+ local gradOutput = torch.randn(3,4)
+ local module = nn.ReLU6(inplace == 1)
+ local output = module:forward(input:clone())
+ local gt = input:clone():gt(input, 0)
+ local lt = input:clone():lt(input, 6)
+ local output2 = gt:clone():cmul(lt):cmul(input)
+ output2:add(6, input:clone():gt(input, 6))
+ mytester:assertTensorEq(output, output2, 0.000001, 'ReLU6 output '..(inplace and '(inplace)' or '') )
+ local gradInput = module:backward(input, gradOutput:clone())
+ local gradInput2 = gt:clone():cmul(lt):cmul(gradOutput)
+ mytester:assertTensorEq(gradInput, gradInput2, 0.000001, 'ReLU gradInput '..(inplace and '(inplace)' or '') )
+ end
+end
+
function nntest.Exp()
local ini = math.random(3,5)
local inj = math.random(3,5)
@@ -1321,19 +1374,19 @@ function nntest.MarginRankingCriterion()
mytester:assert(torch.type(gradInput2[2]) == 'torch.FloatTensor', "MRC:type() error 2")
-- batch, sizeAverage true, jacobian
- local margin = math.random()*2-1
- local batch_size = math.random(2,10)
+ local margin = math.random() * 2 - 1
+ local batch_size = math.random(1,10)
local crit = nn.MarginRankingCriterion(margin)
crit.sizeAverage = true
- local v = torch.rand(2,batch_size)
+ local v = torch.rand(2, batch_size)
local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
criterionJacobianTest1DTable(crit,v,t)
-- batch, sizeAverage false, jacobian
- local margin = math.random()*2-1
+ local margin = math.random() * 2 - 1
local crit = nn.MarginRankingCriterion(margin)
crit.sizeAverage = false
- local v = torch.rand(2,batch_size)
+ local v = torch.rand(2, batch_size)
local t = torch.Tensor(batch_size):random(0,1):mul(2):add(-1)
criterionJacobianTest1DTable(crit,v,t)
@@ -1609,7 +1662,7 @@ function nntest.LogSoftmax()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test logsoftmax when gradOutput is non-contiguous
local layer = nn.LogSoftMax()
layer:zeroGradParameters()
@@ -1622,13 +1675,13 @@ function nntest.LogSoftmax()
gradOutput = gradOutput:clone()
local gradInput2 = layer:backward(input, gradOutput):clone()
- mytester:assertlt(gradInput1:add(-1, gradInput2):abs():max(),
- 1e-10,
- torch.typename(layer)
- .. ' non-contiguous gradOutput check')
-
-
-
+ mytester:assertlt(gradInput1:add(-1, gradInput2):abs():max(),
+ 1e-10,
+ torch.typename(layer)
+ .. ' non-contiguous gradOutput check')
+
+
+
end
@@ -2471,76 +2524,99 @@ function nntest.SpatialFullConvolution()
local module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
local input = torch.Tensor(from, inj, ini):zero()
- -- stochastic
+ local function jacTests(module)
+ -- stochastic
- local err = jac.testJacobian(module, input)
- mytester:assertlt(err, precision, 'error on state ')
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
- local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
- mytester:assertlt(err , precision, 'error on weight ')
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
- local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
- mytester:assertlt(err , precision, 'error on bias ')
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+ end
- local err = jac.testJacobianUpdateParameters(module, input, module.weight)
- mytester:assertlt(err , precision, 'error on weight [direct update] ')
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
- local err = jac.testJacobianUpdateParameters(module, input, module.bias)
- mytester:assertlt(err , precision, 'error on bias [direct update] ')
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+ end
- for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
- mytester:assertlt(err, precision, string.format(
- 'error on weight [%s]', t))
- end
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
- for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
- mytester:assertlt(err, precision, string.format(
- 'error on bias [%s]', t))
- end
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+ end
- -- batch
+ -- batch
- --verbose = true
- local batch = math.random(2,5)
+ --verbose = true
+ local batch = math.random(2,5)
- module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
- input = torch.Tensor(batch,from,inj,ini):zero()
+ module = nn.SpatialFullConvolution(from, to, ki, kj, di, dj, padW, padH, adjW, adjH)
+ input = torch.Tensor(batch,from,inj,ini):zero()
- -- Check that the required output size matches the actual output size
- local output = module:forward(input)
- mytester:asserteq(output:size(3), outj, 'output height error')
- mytester:asserteq(output:size(4), outi, 'output width error')
+ -- Check that the required output size matches the actual output size
+ local output = module:forward(input)
+ mytester:asserteq(output:size(3), outj, 'output height error')
+ mytester:asserteq(output:size(4), outi, 'output width error')
- local err = jac.testJacobian(module, input)
- mytester:assertlt(err, precision, 'batch error on state ')
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
- local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
- mytester:assertlt(err , precision, 'batch error on weight ')
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
- local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
- mytester:assertlt(err , precision, 'batch error on bias ')
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+ end
- local err = jac.testJacobianUpdateParameters(module, input, module.weight)
- mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
- local err = jac.testJacobianUpdateParameters(module, input, module.bias)
- mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+ end
- for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
- mytester:assertlt(err, precision, string.format(
- 'error on weight [%s]', t))
- end
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
- for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
- mytester:assertlt(err, precision, string.format(
- 'batch error on bias [%s]', t))
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
end
- local ferr, berr = jac.testIO(module, input)
- mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
- mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+ jacTests(module)
+ module:noBias()
+ jacTests(module)
+ module.bias = torch.Tensor(module.nOutputPlane):zero()
+ module.gradBias = torch.Tensor(module.nOutputPlane):zero()
+ module:reset()
+ jacTests(module)
-- non-contiguous
+ local batch = math.random(2,5)
local input = torch.randn(batch,from,ini,inj):transpose(3,4) -- non-contiguous
local inputc = input:contiguous() -- contiguous
local output = module:forward(input)
@@ -2604,8 +2680,8 @@ function nntest.SpatialDilatedConvolution()
local padH = math.random(0,2)
local outi = math.random(5,9)
local outj = math.random(5,9)
- local dilationW = math.random(0,10)
- local dilationH = math.random(0,10)
+ local dilationW = math.random(1,10)
+ local dilationH = math.random(1,10)
local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1
local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1
@@ -3134,6 +3210,49 @@ function nntest.SpatialMaxUnpooling()
end
end
+function nntest.SpatialDilatedMaxPooling()
+ for _,ceil_mode in pairs({true,false}) do
+ local from = math.random(1,5)
+ local ki = math.random(1,4)
+ local kj = math.random(1,4)
+ local si = math.random(1,3)
+ local sj = math.random(1,3)
+ local outi = math.random(4,5)
+ local outj = math.random(4,5)
+ local padW = math.min(math.random(0,1),math.floor(ki/2))
+ local padH = math.min(math.random(0,1),math.floor(kj/2))
+ local dilationW = math.random(1,5)
+ local dilationH = math.random(1,5)
+ local ini = (outi-1)*si+(dilationW*(ki-1)+1)-2*padW
+ local inj = (outj-1)*sj+(dilationH*(kj-1)+1)-2*padH
+
+ local ceil_string = ceil_mode and 'ceil' or 'floor'
+ local module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW, dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+ local input = torch.rand(from,inj,ini)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+
+ -- batch
+ local nbatch = math.random(2,5)
+ input = torch.rand(nbatch,from,inj,ini)
+ module = nn.SpatialDilatedMaxPooling(ki,kj,si,sj,padW,padH,dilationW,dilationH)
+ if ceil_mode then module:ceil() else module:floor() end
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error '..ceil_string..' mode on state (Batch)')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+ end
+end
+
function nntest.SpatialFractionalMaxPooling()
local batch = math.random(1, 3)
local plane = math.random(1, 3)
@@ -3819,6 +3938,112 @@ function nntest.VolumetricConvolution()
mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
end
+function nntest.VolumetricDilatedConvolution()
+ local from = math.random(1,5)
+ local to = math.random(1,5)
+ local ki = math.random(1,5)
+ local kj = math.random(1,5)
+ local kk = math.random(1,5)
+ local di = math.random(1,4)
+ local dj = math.random(1,4)
+ local dk = math.random(1,4)
+ local padW = 0 -- math.random(0,2)
+ local padH = 0 -- math.random(0,2)
+ local padT = 0 -- math.random(0,2)
+ local outi = math.random(2,3)
+ local outj = math.random(2,5)
+ local outk = math.random(2,5)
+ local dilationW = math.random(1,3)
+ local dilationH = math.random(1,3)
+ local dilationT = math.random(1,3)
+ local ini = (outi - 1) * di - 2 * padW + dilationW * (ki-1) + 1
+ local inj = (outj - 1) * dj - 2 * padH + dilationH * (kj-1) + 1
+ local ink = (outk - 1) * dk - 2 * padT + dilationT * (kk-1) + 1
+
+ local module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH)
+ local input = torch.Tensor(from, ink, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+
+ module = nn.VolumetricDilatedConvolution(from, to, kk, ki, kj, dk, di, dj, padT, padW, padH, dilationT, dilationW, dilationH)
+ input = torch.Tensor(batch,from,ink,inj,ini):zero()
+
+ -- Check that the required output size matches the actual output size
+ local output = module:forward(input)
+ mytester:asserteq(output:size(3), outk, 'output width error')
+ mytester:asserteq(output:size(4), outj, 'output height error')
+ mytester:asserteq(output:size(5), outi, 'output width error')
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+
+ -- non-contiguous
+ local input = torch.randn(batch,from,ink,ini,inj):transpose(4,5) -- non-contiguous
+ local inputc = input:contiguous() -- contiguous
+ local output = module:forward(input)
+ local outputc = module:forward(inputc)
+ mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+ local gradInput = module:backward(input, output)
+ local gradInputc = module:backward(inputc, outputc)
+ mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - contiguous err ')
+end
+
function nntest.VolumetricConvolutionBatchCompare()
local from = math.random(2,3)
local to = math.random(2,3)
@@ -4339,6 +4564,16 @@ function nntest.Index()
local gradOutput = torch.Tensor{{1, 2}, {1, 2}}
local gradInput = net:backward(input, gradOutput)
equal(gradInput[1], torch.Tensor{{2, 4}, {0, 0}}, "error in 2D backward pass")
+
+ -- test clearState
+ local m = nn.Index(1)
+ local tensor = torch.Tensor(10, 3)
+ local indices = torch.LongTensor{ 2,3,4}
+
+ m:clearState()
+ m:forward({tensor, indices})
+ m:backward({tensor,indices}, torch.rand(3,3))
+
end
function nntest.Squeeze()
@@ -4739,6 +4974,8 @@ function nntest.Select()
mytester:asserteq(nn.Select(1,-1):forward(input)[1], 8, "negative index")
mytester:asserteq(nn.Select(1,-1):forward(input)[2], 0, "negative index")
mytester:asserteq(nn.Select(1,-2):forward(input)[2], 6, "negative index")
+ mytester:asserteq(nn.Select(-1,-1):forward(input)[1], 7, "negative dim + negative index")
+ mytester:asserteq(nn.Select(-1,-1):forward(input)[2], 1, "negative dim + negative index")
end
function nntest.SelectTable()
@@ -5018,6 +5255,23 @@ function nntest.Narrow()
mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #3 gradInput err")
mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #3 negative output err")
mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #3 negative gradInput err")
+
+ -- check basic narrow functionality #4
+ local input = torch.rand(3, 10, 4)
+ local output = input:narrow(2, 5, 3)
+ local gradOutput = torch.rand(3, 3, 4)
+ local gradInput = torch.zeros(3, 10, 4)
+ gradInput:narrow(2, 5, 3):copy(gradOutput)
+ local module1 = nn.Narrow(-2, 5, 3)
+ local output1 = module1:forward(input)
+ local gradInput1 = module1:backward(input, gradOutput)
+ local module2 = nn.Narrow(-2, 5, -4)
+ local output2 = module2:forward(input)
+ local gradInput2 = module2:backward(input, gradOutput)
+ mytester:assertTensorEq(output, output1, 0.0000001, "Narrow #4 output err")
+ mytester:assertTensorEq(gradInput, gradInput1, 0.00001, "Narrow #4 gradInput err")
+ mytester:assertTensorEq(output, output2, 0.0000001, "Narrow #4 negative output err")
+ mytester:assertTensorEq(gradInput, gradInput2, 0.00001, "Narrow #4 negative gradInput err")
end
function nntest.NarrowTable()
@@ -5158,6 +5412,30 @@ function nntest.SpatialUpSamplingNearest()
end
end
+function nntest.SpatialUpSamplingBilinear()
+ for scale=2,4 do
+ for dim = 3,4 do
+ local m = nn.SpatialUpSamplingBilinear(scale)
+
+ -- Create a randomly sized dimD vector
+ local shape = {}
+ for i = 1, dim do
+ table.insert(shape, torch.random(2, 2+dim-1))
+ end
+
+ -- Check that the gradient is correct by using finite elements
+ local input = torch.DoubleTensor(table.unpack(shape)):normal()
+
+ local err = jac.testJacobian(m, input)
+ mytester:assertlt(err, precision, ' error on state ')
+
+ local ferr, berr = jac.testIO(m, input)
+ mytester:asserteq(ferr, 0, torch.typename(m)..' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(m)..' - i/o backward err ')
+ end
+ end
+end
+
function nntest.Concat()
local input = torch.randn(4, 2)
local num_modules = math.random(2, 5)
@@ -5939,6 +6217,7 @@ local function testBatchNormalization(moduleName, dim, k)
jacTests(module, input, true)
module:evaluate()
jacTests(module, input, true)
+ jacTests(module, input[1], true)
-- batch norm without affine transform
module = nn[moduleName](planes, 1e-5, 0.1, false)
@@ -5946,6 +6225,7 @@ local function testBatchNormalization(moduleName, dim, k)
jacTests(module, input, false)
module:evaluate()
jacTests(module, input, false)
+ jacTests(module, input[1], false)
end
function nntest.BatchNormalization()
@@ -6067,6 +6347,37 @@ function nntest.SpatialReplicationPadding()
mytester:assertalmosteq(err, 0.0, 1e-7)
end
+function nntest.VolumetricReplicationPadding()
+ for batch = 0, 1 do
+ local nbatch
+ if batch == 1 then
+ nbatch = math.random(1,3)
+ end
+ local plane = math.random(1,3)
+ local sizeZ = math.random(1,4)
+ local sizeY = math.random(7,11)
+ local sizeX = math.random(7,11)
+ local padLeft = math.random(-3,3)
+ local padRight = math.random(-3,3)
+ local padTop = math.random(-3,3)
+ local padBotom = math.random(-3,3)
+ local padFront = math.random(3,3)
+ local padBack = math.random(3,3)
+ local jac = nn.Jacobian
+ local layer =
+ nn.VolumetricReplicationPadding(padLeft, padRight, padTop,
+ padBottom, padFront, padBack)
+ local input
+ if batch == 1 then
+ input = torch.rand(nbatch, plane, sizeZ, sizeY, sizeX)
+ else
+ input = torch.rand(plane, sizeZ, sizeY, sizeX)
+ end
+ local err = jac.testJacobian(layer, input)
+ mytester:assertalmosteq(err, 0.0, 1e-7)
+ end
+end
+
function nntest.Typecast()
local function make_network()
local seq = nn.Sequential()
@@ -6288,6 +6599,11 @@ function nntest.ErrorHandling()
)
end
+function nntest.GPU()
+ -- this is a placeholder to let you know that the nn.GPU unit test
+ -- is located in cunn package.
+end
+
mytester:add(nntest)
jac = nn.Jacobian
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lua-torch-nn.git
More information about the debian-science-commits
mailing list