[lua-torch-nn] 01/09: New upstream version 0~20170204-g3a1b725
Zhou Mo
cdluminate-guest at moszumanska.debian.org
Mon Feb 6 04:27:11 UTC 2017
This is an automated email from the git hooks/post-receive script.
cdluminate-guest pushed a commit to branch master
in repository lua-torch-nn.
commit 872bb5408e37b5ec2340c10930e9dbdd760986a0
Author: Zhou Mo <cdluminate at gmail.com>
Date: Mon Feb 6 03:59:19 2017 +0000
New upstream version 0~20170204-g3a1b725
---
CMaxTable.lua | 25 +-
CMinTable.lua | 25 +-
Criterion.lua | 8 +
CrossEntropyCriterion.lua | 18 +-
GatedLinearUnit.lua | 45 +--
Max.lua | 2 +-
Min.lua | 2 +-
Normalize.lua | 2 +-
TemporalRowConvolution.lua | 120 ++++++++
doc/convolution.md | 60 ++++
doc/image/lena.jpg | Bin 0 -> 39706 bytes
doc/overview.md | 4 +-
init.lua | 1 +
lib/THNN/CMakeLists.txt | 3 +
lib/THNN/generic/GatedLinearUnit.c | 71 +++++
lib/THNN/generic/THNN.h | 50 ++++
lib/THNN/generic/TemporalRowConvolution.c | 465 ++++++++++++++++++++++++++++++
lib/THNN/init.c | 6 +
test.lua | 137 +++++++++
19 files changed, 995 insertions(+), 49 deletions(-)
diff --git a/CMaxTable.lua b/CMaxTable.lua
index 62cede9..845e38d 100644
--- a/CMaxTable.lua
+++ b/CMaxTable.lua
@@ -4,25 +4,38 @@ function CMaxTable:__init()
parent.__init(self)
self.gradInput = {}
self.maxIdx = torch.Tensor()
+ self.mask = torch.Tensor()
+ self.maxVals = torch.Tensor()
+ self.gradMaxVals = torch.Tensor()
end
function CMaxTable:updateOutput(input)
self.output:resizeAs(input[1]):copy(input[1])
self.maxIdx:resizeAs(input[1]):fill(1)
for i=2,#input do
- local mask = torch.gt(input[i], self.output)
- self.maxIdx:maskedFill(mask, i)
- self.output:maskedCopy(mask, input[i][mask])
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:gt(input[i], self.output)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.maxIdx:maskedFill(self.maskByteTensor, i)
+ self.maxVals:maskedSelect(input[i], self.maskByteTensor)
+ self.output:maskedCopy(self.maskByteTensor, self.maxVals)
end
return self.output
end
function CMaxTable:updateGradInput(input, gradOutput)
for i=1,#input do
- self.gradInput[i] = input[i].new()
+ self.gradInput[i] = self.gradInput[i] or input[i].new()
self.gradInput[i]:resizeAs(input[i]):fill(0.0)
- local mask = torch.eq(self.maxIdx, i)
- self.gradInput[i]:maskedCopy(mask, gradOutput[mask])
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:eq(self.maxIdx, i)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+ self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
end
for i=#input+1, #self.gradInput do
diff --git a/CMinTable.lua b/CMinTable.lua
index a8385e8..25b9a19 100644
--- a/CMinTable.lua
+++ b/CMinTable.lua
@@ -4,25 +4,38 @@ function CMinTable:__init()
parent.__init(self)
self.gradInput = {}
self.minIdx = torch.Tensor()
+ self.mask = torch.Tensor()
+ self.minVals = torch.Tensor()
+ self.gradMaxVals = torch.Tensor()
end
function CMinTable:updateOutput(input)
self.output:resizeAs(input[1]):copy(input[1])
self.minIdx:resizeAs(input[1]):fill(1)
for i=2,#input do
- local mask = torch.lt(input[i], self.output)
- self.minIdx:maskedFill(mask, i)
- self.output:maskedCopy(mask, input[i][mask])
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:lt(input[i], self.output)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.minIdx:maskedFill(self.maskByteTensor, i)
+ self.minVals:maskedSelect(input[i], self.maskByteTensor)
+ self.output:maskedCopy(self.maskByteTensor, self.minVals)
end
return self.output
end
function CMinTable:updateGradInput(input, gradOutput)
for i=1,#input do
- self.gradInput[i] = torch.Tensor()
+ self.gradInput[i] = self.gradInput[i] or input[i].new()
self.gradInput[i]:resizeAs(input[i]):fill(0.0)
- local mask = torch.eq(self.minIdx, i)
- self.gradInput[i]:maskedCopy(mask, gradOutput[mask])
+ self.maskByteTensor = self.maskByteTensor or
+ (torch.type(self.output) == 'torch.CudaTensor' and
+ torch.CudaByteTensor() or torch.ByteTensor())
+ self.mask:eq(self.minIdx, i)
+ self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+ self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+ self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
end
for i=#input+1, #self.gradInput do
diff --git a/Criterion.lua b/Criterion.lua
index 4efb279..e48f068 100644
--- a/Criterion.lua
+++ b/Criterion.lua
@@ -49,6 +49,14 @@ function Criterion:cuda()
return self:type('torch.CudaTensor')
end
+function Criterion:cudaHalf()
+ return self:type('torch.CudaHalfTensor')
+end
+
+function Criterion:cudaDouble()
+ return self:type('torch.CudaDoubleTensor')
+end
+
function Criterion:__call__(input, target)
self.output = self:forward(input, target)
self.gradInput = self:backward(input, target)
diff --git a/CrossEntropyCriterion.lua b/CrossEntropyCriterion.lua
index d4d19e5..2f72cf8 100644
--- a/CrossEntropyCriterion.lua
+++ b/CrossEntropyCriterion.lua
@@ -1,17 +1,25 @@
local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', 'nn.Criterion')
-function CrossEntropyCriterion:__init(weights)
+function CrossEntropyCriterion:__init(weights, sizeAverage)
Criterion.__init(self)
self.lsm = nn.LogSoftMax()
- self.nll = nn.ClassNLLCriterion(weights)
+ self.nll = nn.ClassNLLCriterion(weights, sizeAverage)
+ self.sizeAverage = self.nll.sizeAverage
+ self.oldSizeAverage = self.sizeAverage
end
function CrossEntropyCriterion:updateOutput(input, target)
input = input:squeeze()
target = type(target) == 'number' and target or target:squeeze()
+ -- only propagate if value has changed to preserve old behavior
+ -- of setting nll.sizeAverage directly
+ if self.sizeAverage ~= self.oldSizeAverage then
+ self.nll.sizeAverage = self.sizeAverage
+ end
self.lsm:updateOutput(input)
self.nll:updateOutput(self.lsm.output, target)
self.output = self.nll.output
+ self.oldSizeAverage = self.sizeAverage
return self.output
end
@@ -19,9 +27,15 @@ function CrossEntropyCriterion:updateGradInput(input, target)
local size = input:size()
input = input:squeeze()
target = type(target) == 'number' and target or target:squeeze()
+ -- only propagate if value has changed to preserve old behavior
+ -- of setting nll.sizeAverage directly
+ if self.sizeAverage ~= self.oldSizeAverage then
+ self.nll.sizeAverage = self.sizeAverage
+ end
self.nll:updateGradInput(self.lsm.output, target)
self.lsm:updateGradInput(input, self.nll.gradInput)
self.gradInput:view(self.lsm.gradInput, size)
+ self.oldSizeAverage = self.sizeAverage
return self.gradInput
end
diff --git a/GatedLinearUnit.lua b/GatedLinearUnit.lua
index 5f215ca..5273abf 100644
--- a/GatedLinearUnit.lua
+++ b/GatedLinearUnit.lua
@@ -2,41 +2,26 @@ local GatedLinearUnit, parent = torch.class('nn.GatedLinearUnit', 'nn.Module')
function GatedLinearUnit:__init(dim)
parent.__init(self)
- self.sigmoid = nn.Sigmoid()
self.dim = dim
end
function GatedLinearUnit:updateOutput(input)
- local dim = self.dim or input:dim()
- local inputSize = input:size(dim)
-
- assert(inputSize % 2 == 0, "halving dimension needs to be even")
-
- self.fHalf = input:narrow(dim, 1, inputSize/2)
- self.sHalf = input:narrow(dim, inputSize/2 + 1, inputSize/2)
-
- self.sHalfOut = self.sigmoid:forward(self.sHalf)
- self.output:resizeAs(self.fHalf):copy(self.fHalf):cmul(self.sHalfOut)
-
- return self.output
+ local dim = self.dim or input:dim()
+ input.THNN.GatedLinear_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ dim
+ )
+ return self.output
end
function GatedLinearUnit:updateGradInput(input, gradOutput)
- local dim = self.dim or input:dim()
- local inputSize = input:size(dim)
-
- assert(inputSize % 2 == 0, "halving dimension needs to be even")
-
- local fGradInput = self.sHalfOut
- local sGradInput = self.sigmoid:backward(self.sHalf, gradOutput)
- :cmul(self.fHalf)
-
- self.gradInput:resizeAs(input)
- self.gradInput:narrow(dim, 1, inputSize/2)
- :copy(fGradInput)
- :cmul(gradOutput)
- self.gradInput:narrow(dim, inputSize/2+1, inputSize/2)
- :copy(sGradInput)
-
- return self.gradInput
+ local dim = self.dim or input:dim()
+ input.THNN.GatedLinear_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ dim
+ )
+ return self.gradInput
end
diff --git a/Max.lua b/Max.lua
index 2aa67d3..8273e80 100644
--- a/Max.lua
+++ b/Max.lua
@@ -21,7 +21,7 @@ end
function Max:_lazyInit()
self._output = self._output or self.output.new()
if not self._indices then
- if torch.type(self.output) == 'torch.CudaTensor' then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
else
self._indices = torch.LongTensor()
diff --git a/Min.lua b/Min.lua
index 252f52e..3a3e4a8 100644
--- a/Min.lua
+++ b/Min.lua
@@ -21,7 +21,7 @@ end
function Min:_lazyInit()
self._output = self._output or self.output.new()
if not self._indices then
- if torch.type(self.output) == 'torch.CudaTensor' then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
else
self._indices = torch.LongTensor()
diff --git a/Normalize.lua b/Normalize.lua
index b6d1298..0937ebb 100644
--- a/Normalize.lua
+++ b/Normalize.lua
@@ -24,7 +24,7 @@ function Normalize:updateOutput(input)
if self.p == math.huge then
-- specialization for the infinity norm
if not self._indices then
- if torch.type(self.output) == 'torch.CudaTensor' then
+ if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
else
self._indices = torch.LongTensor()
diff --git a/TemporalRowConvolution.lua b/TemporalRowConvolution.lua
new file mode 100644
index 0000000..7c9d6a2
--- /dev/null
+++ b/TemporalRowConvolution.lua
@@ -0,0 +1,120 @@
+local THNN = require "nn.THNN"
+
+local TemporalRowConvolution, parent = torch.class("nn.TemporalRowConvolution", "nn.Module")
+
+function TemporalRowConvolution:__init(inputFrameSize, kW, dW, featFirst)
+ parent.__init(self)
+
+ self.inputFrameSize = inputFrameSize
+ self.kW = kW
+ self.dW = dW or 1
+
+ self.weight = torch.Tensor(inputFrameSize, 1, kW)
+ self.bias = torch.Tensor(inputFrameSize)
+ self.gradWeight = torch.Tensor(inputFrameSize, 1, kW)
+ self.gradBias = torch.Tensor(inputFrameSize)
+
+ -- Set to true for batch x inputFrameSize x nInputFrame
+ self.featFirst = featFirst and true or false
+ self:reset()
+end
+
+function TemporalRowConvolution:noBias()
+ self.bias = nil
+ self.gradBias = nil
+ return self
+end
+
+function TemporalRowConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1 / math.sqrt(self.kW * self.inputFrameSize)
+ end
+ self.weight:uniform(-stdv, stdv)
+ self.bias:uniform(-stdv, stdv)
+end
+
+function TemporalRowConvolution:updateOutput(input)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+ self.finput = self.finput or input.new()
+ self.fgradInput = self.fgradInput or input.new()
+
+ input.THNN.TemporalRowConvolution_updateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ THNN.optionalTensor(self.bias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst
+ )
+
+ return self.output
+end
+
+function TemporalRowConvolution:updateGradInput(input, gradOutput)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+ if self.gradInput then
+ input.THNN.TemporalRowConvolution_updateGradInput(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradInput:cdata(),
+ self.weight:cdata(),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst
+ )
+ return self.gradInput
+ end
+end
+
+function TemporalRowConvolution:accGradParameters(input, gradOutput, scale)
+ assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+ input.THNN.TemporalRowConvolution_accGradParameters(
+ input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ THNN.optionalTensor(self.gradBias),
+ self.finput:cdata(),
+ self.fgradInput:cdata(),
+ self.kW,
+ self.dW,
+ 0, -- would be self.padW
+ self.featFirst,
+ scale or 1)
+end
+
+function TemporalRowConvolution:type(type, tensorCache)
+ if self.finput then self.finput:set() end
+ if self.fgradInput then self.fgradInput:set() end
+ return parent.type(self, type, tensorCache)
+end
+
+function TemporalRowConvolution:__tostring__()
+ local s = string.format("%s(%d, %d", torch.type(self), self.inputFrameSize, self.kW)
+ if self.dW ~= 1 then
+ s = s .. string.format(", %d", self.dW)
+ end
+ if self.padW and self.padW ~= 0 then -- currently padding is not supported
+ s = s .. ", " .. self.padW
+ end
+ if self.bias then
+ return s .. ")"
+ else
+ return s .. ") without bias"
+ end
+end
+
+function TemporalRowConvolution:clearState()
+ nn.utils.clear(self, "finput", "fgradInput", "_input", "_gradOutput")
+ return parent.clearState(self)
+end
diff --git a/doc/convolution.md b/doc/convolution.md
index 21cfa57..d87a749 100644
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -9,6 +9,7 @@ A convolution is an integral that expresses the amount of overlap of one functio
* [TemporalSubSampling](#nn.TemporalSubSampling) : a 1D sub-sampling over an input sequence ;
* [TemporalMaxPooling](#nn.TemporalMaxPooling) : a 1D max-pooling operation over an input sequence ;
* [LookupTable](#nn.LookupTable) : a convolution of width `1`, commonly used for word embeddings ;
+ * [TemporalRowConvolution](#nn.TemporalRowConvolution) : a row-oriented 1D convolution over an input sequence ;
* [Spatial Modules](#nn.SpatialModules) apply to inputs with two-dimensional relationships (e.g. images):
* [SpatialConvolution](#nn.SpatialConvolution) : a 2D convolution over an input image ;
* [SpatialFullConvolution](#nn.SpatialFullConvolution) : a 2D full convolution over an input image ;
@@ -322,6 +323,65 @@ Outputs something like:
Note that the 1st, 2nd and 10th rows of the module.weight are updated to
obey the max-norm constraint, since their indices appear in the "input".
+<a name="nn.TemporalRowConvolution"></a>
+### TemporalRowConvolution ###
+
+```lua
+module = nn.TemporalRowConvolution(inputFrameSize, kW, [dW], [featFirst]))
+```
+
+Applies a 1D row-oriented convolution over an input sequence composed of `nInputFrame` frames. The input tensor in `forward(input)` is expected to be a 2D tensor (`nInputFrame x inputFrameSize`) or a 3D tensor (`nBatchFrame x nInputFrame x inputFrameSize`). The layer can be used without a bias by `module:noBias()`.
+
+The parameters are the following:
+ * `inputFrameSize`: The input frame size expected in sequences given into `forward()`.
+ * `kW`: The kernel width of the convolution.
+ * `dW`: The step of the convolution Default is `1`.
+ * `featFirst`: Expects input to be in the form `nBatchFrame x inputFrameSize x nInputFrame` is `true`. Default is `false`.
+
+ If the input sequence is a 2D tensor of dimension `nInputFrame x inputFrameSize`, the output sequence will be `nOutputFrame x inputFrameSize` where
+
+ ```lua
+ nOutputFrame = (nInputFrame - kW) / dW + 1
+ ```
+
+ If the input sequence is a 3D tensor of dimension `nBatchFrame x nInputFrame x inputFrameSize`, the output sequence will be `nBatch x nOutputFrame x outputFrameSize`.
+
+ The parameters are the convolution can be found in `self.weight` (Tensor of size `inputFrameSize x kW`) and `self.bias` (Tensor of size `inputFrameSize`). The corresponding gradients can be found in `self.gradWeight` and `self.gradBias`.
+
+ For a 2D input, the output value of the layer can be precisely described as:
+
+ ```lua
+ output[t][i] = bias[i] + sum_{k=1}^kW weight[i][k] * input[dW(t-1)+k][i]
+ ```
+
+ Here is a simple example:
+ ```lua
+ inp = 5;
+ kw = 3;
+ dw = 1;
+
+ -- row convolution with a kernel width of 3 (future context of 2)
+ module = nn.TemporalRowConvolution(inp, kw, dw)
+
+ x = torch.rand(8, inp)
+ print(module:forward(x))
+ ```
+
+ which gives
+
+ ```lua
+ 0.1188 0.1945 0.1065 -0.0077 -0.3433
+ 0.0630 0.4354 0.1954 -0.2103 -0.3506
+ 0.0340 0.2222 0.3039 -0.2012 -0.3814
+ 0.0820 0.3489 0.2533 -0.0940 -0.3298
+ 0.1964 0.1533 0.1750 -0.1493 -0.3059
+ 0.2651 0.2474 0.0521 -0.1134 -0.4024
+ [torch.Tensor of dimension 8x5]
+ ```
+
+ More information about the layer can be found [here](http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf).
+
+
<a name="nn.SpatialModules"></a>
## Spatial Modules ##
Excluding an optional batch dimension, spatial layers expect a 3D Tensor as input. The
diff --git a/doc/image/lena.jpg b/doc/image/lena.jpg
new file mode 100644
index 0000000..d4a8c36
Binary files /dev/null and b/doc/image/lena.jpg differ
diff --git a/doc/overview.md b/doc/overview.md
index 6db8008..f8f4f3e 100644
--- a/doc/overview.md
+++ b/doc/overview.md
@@ -106,10 +106,10 @@ criterion). The input is usually a Tensor (except if you use special
kind of gradient modules, like [table layers](table.md#nn.TableLayers)). The
label type depends on the criterion. For example, the
[MSECriterion](criterion.md#nn.MSECriterion) expect a Tensor, but the
-[ClassNLLCriterion](criterion.md#nn.ClassNLLCriterion) except a integer number (the
+[ClassNLLCriterion](criterion.md#nn.ClassNLLCriterion) expect an integer number (the
class).
-Such a dataset is easily constructed by using Lua tables, but it could
+Such a dataset is easily constructed by using Lua tables, but it could be
any `C` object for example, as long as required operators/methods
are implemented. [See an example](containers.md#nn.DoItStochasticGradient).
diff --git a/init.lua b/init.lua
index cad1c3c..66ef8f5 100644
--- a/init.lua
+++ b/init.lua
@@ -117,6 +117,7 @@ require('nn.TemporalConvolution')
require('nn.TemporalSubSampling')
require('nn.TemporalMaxPooling')
require('nn.TemporalDynamicKMaxPooling')
+require('nn.TemporalRowConvolution')
require('nn.SpatialSubtractiveNormalization')
require('nn.SpatialDivisiveNormalization')
require('nn.SpatialContrastiveNormalization')
diff --git a/lib/THNN/CMakeLists.txt b/lib/THNN/CMakeLists.txt
index 33eaf56..f9859fa 100644
--- a/lib/THNN/CMakeLists.txt
+++ b/lib/THNN/CMakeLists.txt
@@ -12,6 +12,7 @@ MESSAGE(STATUS "TH_LIBRARIES: ${TH_LIBRARIES}")
IF(NOT THNN_INSTALL_LIB_SUBDIR)
SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory")
+ SET(THNN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THNN install include subdirectory")
ENDIF()
# Flags
@@ -78,3 +79,5 @@ IF(THNN_SO_VERSION)
ENDIF(THNN_SO_VERSION)
INSTALL(TARGETS THNN LIBRARY DESTINATION ${THNN_INSTALL_LIB_SUBDIR})
+INSTALL(FILES THNN.h DESTINATION "${THNN_INSTALL_INCLUDE_SUBDIR}/THNN")
+INSTALL(FILES generic/THNN.h DESTINATION "${THNN_INSTALL_INCLUDE_SUBDIR}/THNN/generic")
diff --git a/lib/THNN/generic/GatedLinearUnit.c b/lib/THNN/generic/GatedLinearUnit.c
new file mode 100644
index 0000000..d412a7b
--- /dev/null
+++ b/lib/THNN/generic/GatedLinearUnit.c
@@ -0,0 +1,71 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/GatedLinearUnit.c"
+#else
+
+void THNN_(GatedLinear_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ int dim)
+{
+ // size output to half of input
+ dim = dim - 1;
+ const long nIn = THTensor_(size)(input, dim);
+ THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
+
+ const long inputSize = THTensor_(size)(input, dim) / 2;
+ THLongStorage *newSizes = THTensor_(newSizeOf)(input);
+ THLongStorage_set(newSizes, dim, inputSize);
+ THTensor_(resize)(output, newSizes, NULL);
+
+ // halve tensor
+ THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+ THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+
+ // x = x1:cmul( sigmoid(x2) )
+ THTensor_(sigmoid)(output, secondHalf);
+ THTensor_(cmul)(output, output, firstHalf);
+
+ THLongStorage_free(newSizes);
+ THTensor_(free)(firstHalf);
+ THTensor_(free)(secondHalf);
+}
+
+void THNN_(GatedLinear_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ int dim)
+{
+ // set up tensors
+ dim = dim - 1;
+ const long nIn = THTensor_(size)(input, dim);
+ THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
+
+ THTensor_(resizeAs)(gradInput, input);
+ const long inputSize = THTensor_(size)(input, dim) / 2;
+ THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+ THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+ THTensor *gradInputfirstHalf = THTensor_(newNarrow)(gradInput, dim, 0, inputSize);
+ THTensor *gradInputsecondHalf = THTensor_(newNarrow)(gradInput, dim, inputSize, inputSize);
+
+ THTensor_(sigmoid)(gradInputfirstHalf, secondHalf);
+
+ TH_TENSOR_APPLY2(real, gradInputsecondHalf, real, gradInputfirstHalf,
+ real z = *gradInputfirstHalf_data;
+ *gradInputsecondHalf_data = (1. - z) * z;
+ );
+
+ THTensor_(cmul)(gradInputfirstHalf, gradInputfirstHalf, gradOutput);
+
+ THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, gradOutput);
+ THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, firstHalf);
+
+ THTensor_(free)(firstHalf);
+ THTensor_(free)(secondHalf);
+ THTensor_(free)(gradInputfirstHalf);
+ THTensor_(free)(gradInputsecondHalf);
+}
+
+#endif
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
index 8fd50f5..4420962 100644
--- a/lib/THNN/generic/THNN.h
+++ b/lib/THNN/generic/THNN.h
@@ -102,6 +102,18 @@ TH_API void THNN_(DistKLDivCriterion_updateGradInput)(
THTensor *gradInput, // [OUT] gradient w.r.t. input
bool sizeAverage); // if true, the loss will be normalized **by total number of elements**
+TH_API void THNN_(GatedLinear_updateOutput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *output, // [OUT] output tensor, half size of input along dimension dim
+ int dim); // dimension for halving operation
+TH_API void THNN_(GatedLinear_updateGradInput)(
+ THNNState *state, // library's state
+ THTensor *input, // input tensor
+ THTensor *gradOutput, // gradient w.r.t module's output
+ THTensor *gradInput, // [OUT] gradient w.r.t input
+ int dim); // dimension for halving operation
+
// HardShink outputs 0 on interval of (-lambda; lambda) or original value otherwise.
TH_API void THNN_(HardShrink_updateOutput)(
THNNState *state, // library's state
@@ -576,6 +588,44 @@ TH_API void THNN_(TemporalSubSampling_accGradParameters)(
int kW, int dW,
real scale);
+TH_API void THNN_(TemporalRowConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst,
+ real scale);
+
TH_API void THNN_(BatchNormalization_updateOutput)(
THNNState *state,
THTensor *input,
diff --git a/lib/THNN/generic/TemporalRowConvolution.c b/lib/THNN/generic/TemporalRowConvolution.c
new file mode 100644
index 0000000..9e62939
--- /dev/null
+++ b/lib/THNN/generic/TemporalRowConvolution.c
@@ -0,0 +1,465 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c"
+#else
+
+static inline void THNN_(TemporalRowConvolution_shapeCheck)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *bias,
+ int kW,
+ int dW,
+ int padW) {
+
+ THArgCheck(kW > 0, 5,
+ "kernel size should be greater than zero, but got kW: %d", kW);
+ THArgCheck(dW > 0, 6,
+ "stride should be greater than zero, but got dW: %d", dW);
+ THNN_ARGCHECK(weight->nDimension == 3, 3, weight,
+ "3D weight tensor expected, but got: %s");
+
+ if (bias != NULL) {
+ THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+ }
+
+ // we're always looking at (possibly batch) x feats x seq
+ int ndim = input->nDimension;
+ int dimF = 0;
+ int dimS = 1;
+
+ if (ndim == 3) {
+ ++dimS;
+ ++dimF;
+ }
+
+ THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input,
+ "2D or 3D (batch mode) input tensor expected, but got :%s");
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[dimS];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (nOutputFrame < 1) {
+ THError("Given input size: (%d x %d). "
+ "Calculated output size: (%d x %d). Output size is too small",
+ inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame);
+ }
+
+ THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize);
+
+ if (gradOutput != NULL) {
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize);
+ THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame);
+ }
+}
+
+static void THNN_(unfolded_acc_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ size_t c;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(c)
+ for (c = 0; c < inputFrameSize; c++) {
+ size_t kw, x;
+ long long ix = 0;
+
+ for (kw = 0; kw < kW; kw++) {
+ real *src = finput_data
+ + c * (kW * nOutputFrame)
+ + kw * (nOutputFrame);
+ real *dst = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ THVector_(add)(dst + (size_t)(ix), src, 1, nOutputFrame);
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ THVector_(add)(dst + (size_t)(ix + x * dW),
+ src + (size_t)(x), 1, 1);
+ }
+ }
+ }
+ }
+}
+
+static void THNN_(unfolded_copy_row)(
+ THTensor *finput,
+ THTensor *input,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long k;
+ real *input_data = THTensor_(data)(input);
+ real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(k)
+ for (k = 0; k < inputFrameSize * kW; k++) {
+ size_t c = k / kW;
+ size_t rest = k % kW;
+ size_t kw = rest % kW;
+ size_t x;
+ long long ix;
+ real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame);
+ real *src = input_data + c * (nInputFrame);
+
+ ix = (long long)(kw);
+ if (dW == 1) {
+ memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame));
+ } else {
+ for (x = 0; x < nOutputFrame; x++) {
+ memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW),
+ sizeof(real) * 1);
+ }
+ }
+ }
+}
+
+static void THNN_(TemporalRowConvolution_updateOutput_frame)(
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ long i;
+
+ THTensor *output3d = THTensor_(newWithStorage3d)(
+ output->storage, output->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(zero)(output);
+
+ if (bias != NULL) {
+ for (i = 0; i < inputFrameSize; i++)
+ THVector_(fill)
+ (output->storage->data + output->storageOffset
+ + output->stride[0] * i,
+ THTensor_(get1d)(bias, i), nOutputFrame);
+ }
+
+ THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);
+
+ THTensor_(free)(output3d);
+}
+
+void THNN_(TemporalRowConvolution_updateOutput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *output,
+ THTensor *weight,
+ THTensor *bias,
+ THTensor *finput,
+ THTensor *fgradInput, // unused here but needed for Cuda
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput;
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ input = THTensor_(newContiguous)(tinput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(
+ state, input, NULL, weight, bias, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) { /* non-batch mode */
+
+ THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input, output, weight, bias, finput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ } else {
+ long T = input->size[0];
+ long t;
+
+ THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
+ THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame);
+
+ THTensor_(zero)(finput);
+ THTensor_(zero)(output);
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+ THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+ THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateOutput_frame)
+ (input_t, output_t, weight, bias, finput_t,
+ kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(input_t);
+ THTensor_(free)(output_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) { // NOTE: output will NOT be contiguous in this case
+ THTensor_(transpose)(output, output, ndim - 1, ndim - 2);
+ THTensor_(free)(tinput);
+ }
+
+ THTensor_(free)(input);
+}
+
+static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
+ THTensor *gradInput,
+ THTensor *gradOutput,
+ THTensor *weight,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ long inputFrameSize,
+ long nInputFrame,
+ long nOutputFrame) {
+
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ inputFrameSize, -1,
+ 1, -1,
+ nOutputFrame, -1);
+
+ // weight: inputFrameSize x kW x 1
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
+ // fgradInput: inputFrameSize x kW x nOutputFrame
+ THTensor_(free)(gradOutput3d);
+
+ THTensor_(zero)(gradInput);
+
+ THNN_(unfolded_acc_row)(fgradInput, gradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+}
+
+void THNN_(TemporalRowConvolution_updateGradInput)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradInput,
+ THTensor *weight,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight,
+ NULL, kW, dW, padW);
+
+ long inputFrameSize = weight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ THTensor_(resizeAs)(fgradInput, finput);
+ THTensor_(resizeAs)(gradInput, input);
+
+ THTensor_(zero)(fgradInput);
+ THTensor_(zero)(gradInput);
+
+ THTensor_(transpose)(weight, weight, 1, 2);
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput, gradOutput, weight, fgradInput,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+ } else {
+ long T = input->size[0];
+ long t;
+
+#pragma omp parallel for private(t)
+ for (t = 0; t < T; t++) {
+
+ THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+ THNN_(TemporalRowConvolution_updateGradInput_frame)
+ (gradInput_t, gradOutput_t, weight, fgradInput_t,
+ kW, dW, padW,
+ inputFrameSize, nInputFrame, nOutputFrame);
+
+ THTensor_(free)(gradInput_t);
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(fgradInput_t);
+ }
+ }
+
+ THTensor_(transpose)(weight, weight, 1, 2);
+
+ if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case
+
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+
+ THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+
+}
+
+static void THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
+ THTensor *finput, real scale) {
+
+ long i;
+ THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+ gradOutput->storage, gradOutput->storageOffset,
+ gradOutput->size[0], -1,
+ 1, -1,
+ gradOutput->size[1], -1);
+
+ THTensor_(transpose)(finput, finput, 1, 2);
+ // gradOutput3d: inputFrameSize x 1 x nOutputFrame
+ // finput: inputFrameSize x nOutputFrame x kW
+ THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, finput);
+ // gradWeight: inputFrameSize x 1 x kW
+ THTensor_(transpose)(finput, finput, 1, 2);
+
+ if (gradBias != NULL) {
+ for (i = 0; i < gradBias->size[0]; i++) {
+ long k;
+ real sum = 0;
+ real *data = gradOutput3d->storage->data
+ + gradOutput3d->storageOffset
+ + i * gradOutput3d->stride[0];
+ for (k = 0; k < gradOutput3d->size[2]; k++) {
+ sum += data[k];
+ }
+ (gradBias->storage->data + gradBias->storageOffset)[i]
+ += scale * sum;
+ }
+ }
+
+ THTensor_(free)(gradOutput3d);
+
+}
+
+void THNN_(TemporalRowConvolution_accGradParameters)(
+ THNNState *state,
+ THTensor *input,
+ THTensor *gradOutput,
+ THTensor *gradWeight,
+ THTensor *gradBias,
+ THTensor *finput,
+ THTensor *fgradInput,
+ int kW,
+ int dW,
+ int padW,
+ bool featFirst,
+ real scale) {
+
+ int ndim = input->nDimension;
+
+ THTensor *tinput, *tgradOutput;
+
+ if (!featFirst) {
+ tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+ tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+ input = THTensor_(newContiguous)(tinput);
+ gradOutput = THTensor_(newContiguous)(tgradOutput);
+ } else {
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+ }
+
+ THNN_(TemporalRowConvolution_shapeCheck)
+ (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);
+
+ long inputFrameSize = gradWeight->size[0];
+ long nInputFrame = input->size[ndim - 1];
+ long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+ if (ndim == 2) {
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput, gradWeight, gradBias, finput, scale);
+ } else {
+ long T = input->size[0];
+ long t;
+
+ for (t = 0; t < T; t++) {
+ THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+ THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+ THNN_(TemporalRowConvolution_accGradParameters_frame)(
+ gradOutput_t, gradWeight, gradBias, finput_t, scale);
+
+ THTensor_(free)(gradOutput_t);
+ THTensor_(free)(finput_t);
+ }
+ }
+
+ if (!featFirst) {
+ THTensor_(free)(tinput);
+ THTensor_(free)(tgradOutput);
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
index 3a7806d..990775d 100644
--- a/lib/THNN/init.c
+++ b/lib/THNN/init.c
@@ -89,6 +89,9 @@
#include "generic/HardTanh.c"
#include "THGenerateFloatTypes.h"
+#include "generic/GatedLinearUnit.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/L1Cost.c"
#include "THGenerateFloatTypes.h"
@@ -167,6 +170,9 @@
#include "generic/TemporalMaxPooling.c"
#include "THGenerateFloatTypes.h"
+#include "generic/TemporalRowConvolution.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/BatchNormalization.c"
#include "THGenerateFloatTypes.h"
diff --git a/test.lua b/test.lua
index b3e1d16..e5c92ab 100644
--- a/test.lua
+++ b/test.lua
@@ -2116,6 +2116,20 @@ function nntest.CrossEntropyCriterion()
weights = weights / weights:sum()
cri = nn.CrossEntropyCriterion(weights)
criterionJacobianTest(cri, input, target)
+
+ -- verify nll.sizeAverage preservation
+ cri = nn.CrossEntropyCriterion(weights)
+ cri.nll.sizeAverage = false
+ criterionJacobianTest(cri, input, target)
+ mytester:eq(cri.nll.sizeAverage, false,
+ "ClassNLLCriterion.sizeAverage overwritten")
+
+ -- verify nll.sizeAverage propagation
+ cri = nn.CrossEntropyCriterion(weights)
+ cri.sizeAverage = false
+ criterionJacobianTest(cri, input, target)
+ mytester:eq(cri.nll.sizeAverage, false,
+ "ClassNLLCriterion.sizeAverage not propagated")
end
function nntest.LogSigmoid()
@@ -4265,6 +4279,129 @@ function nntest.TemporalSubSampling()
mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
end
+
+function nntest.TemporalRowConvolution()
+
+ local from = math.random(1,5)
+ local ki = math.random(1,5)
+ local si = math.random(1,2)
+ local outi = math.random(5,7)
+ local ini = (outi-1)*si+ki
+
+ local function jacTest(module)
+
+ local input
+ if module.featFirst then
+ input = torch.Tensor(from, ini):zero()
+ else
+ input = torch.Tensor(ini, from):zero()
+ end
+
+ -- 1D
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, "error on state" )
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err, precision, "error on weight ")
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err, precision, "error on bias ")
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err, precision, "error on bias [direct update] ")
+ end
+
+ for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+ mytester:assertlt(err, precision, string.format(
+ "error on weight [%s] ", t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ "error on bias [%s] ", t))
+ end
+ end
+
+ -- 2D
+ local nBatchFrame = 4
+ if module.featFirst then
+ input = torch.Tensor(nBatchFrame, from, ini):zero()
+ else
+ input = torch.Tensor(nBatchFrame, ini, from):zero()
+ end
+
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, "error on state" )
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err, precision, "error on weight ")
+
+ if module.bias then
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err, precision, "error on bias ")
+ end
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+ if module.bias then
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err, precision, "error on bias [direct update] ")
+ end
+
+ for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+ mytester:assertlt(err, precision, string.format(
+ "error on weight [%s] ", t))
+ end
+
+ if module.bias then
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ "error on bias [%s] ", t))
+ end
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:eq(0, ferr, torch.typename(module) .. " - i/o forward err ", precision)
+ mytester:eq(0, berr, torch.typename(module) .. " - i/o forward err ", precision)
+
+ -- 2D matches 1D
+ local output = module:forward(input):clone()
+ local outputGrad = torch.randn(output:size())
+ local inputGrad = module:backward(input, outputGrad):clone()
+
+ local input1D = input:select(1, 2)
+ local output1D = module:forward(input1D)
+ local outputGrad1D = outputGrad:select(1, 2)
+ local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+ mytester:assertTensorEq(output:select(1,2), output1D, 0.000001,
+ "error on 2D vs 1D forward")
+ mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001,
+ "error on 2D vs 1D backward")
+ end
+
+ local module = nn.TemporalRowConvolution(from, ki, si)
+ jacTest(module)
+ module:noBias()
+ jacTest(module)
+ module.bias = torch.Tensor(module.inputFrameSize):zero()
+ module.gradBias = torch.Tensor(module.inputFrameSize):zero()
+ module:reset()
+ module.featFirst = true
+ jacTest(module)
+ module:noBias()
+ jacTest(module, true)
+end
+
function nntest.TemporalMaxPooling()
local from = math.random(2,4)
local ki = math.random(5,7)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lua-torch-nn.git
More information about the debian-science-commits
mailing list