[lua-torch-nn] 01/09: New upstream version 0~20170204-g3a1b725

Mon Feb 6 04:27:11 UTC 2017

This is an automated email from the git hooks/post-receive script.

cdluminate-guest pushed a commit to branch master
in repository lua-torch-nn.

commit 872bb5408e37b5ec2340c10930e9dbdd760986a0
Author: Zhou Mo <cdluminate at gmail.com>
Date:   Mon Feb 6 03:59:19 2017 +0000

    New upstream version 0~20170204-g3a1b725
---
 CMaxTable.lua                             |  25 +-
 CMinTable.lua                             |  25 +-
 Criterion.lua                             |   8 +
 CrossEntropyCriterion.lua                 |  18 +-
 GatedLinearUnit.lua                       |  45 +--
 Max.lua                                   |   2 +-
 Min.lua                                   |   2 +-
 Normalize.lua                             |   2 +-
 TemporalRowConvolution.lua                | 120 ++++++++
 doc/convolution.md                        |  60 ++++
 doc/image/lena.jpg                        | Bin 0 -> 39706 bytes
 doc/overview.md                           |   4 +-
 init.lua                                  |   1 +
 lib/THNN/CMakeLists.txt                   |   3 +
 lib/THNN/generic/GatedLinearUnit.c        |  71 +++++
 lib/THNN/generic/THNN.h                   |  50 ++++
 lib/THNN/generic/TemporalRowConvolution.c | 465 ++++++++++++++++++++++++++++++
 lib/THNN/init.c                           |   6 +
 test.lua                                  | 137 +++++++++
 19 files changed, 995 insertions(+), 49 deletions(-)

diff --git a/CMaxTable.lua b/CMaxTable.lua
index 62cede9..845e38d 100644
--- a/CMaxTable.lua
+++ b/CMaxTable.lua
@@ -4,25 +4,38 @@ function CMaxTable:__init()
    parent.__init(self)
    self.gradInput = {}
    self.maxIdx = torch.Tensor()
+   self.mask = torch.Tensor()
+   self.maxVals = torch.Tensor()
+   self.gradMaxVals = torch.Tensor()
 end
 
 function CMaxTable:updateOutput(input)
    self.output:resizeAs(input[1]):copy(input[1])
    self.maxIdx:resizeAs(input[1]):fill(1)
    for i=2,#input do
-      local mask = torch.gt(input[i], self.output)
-      self.maxIdx:maskedFill(mask, i)
-      self.output:maskedCopy(mask, input[i][mask])
+      self.maskByteTensor = self.maskByteTensor or
+         (torch.type(self.output) == 'torch.CudaTensor' and
+         torch.CudaByteTensor() or torch.ByteTensor())
+      self.mask:gt(input[i], self.output)
+      self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+      self.maxIdx:maskedFill(self.maskByteTensor, i)
+      self.maxVals:maskedSelect(input[i], self.maskByteTensor)
+      self.output:maskedCopy(self.maskByteTensor, self.maxVals)
    end
    return self.output
 end
 
 function CMaxTable:updateGradInput(input, gradOutput)
    for i=1,#input do
-      self.gradInput[i] = input[i].new()
+      self.gradInput[i] = self.gradInput[i] or input[i].new()
       self.gradInput[i]:resizeAs(input[i]):fill(0.0)
-      local mask = torch.eq(self.maxIdx, i)
-      self.gradInput[i]:maskedCopy(mask, gradOutput[mask])
+      self.maskByteTensor = self.maskByteTensor or
+         (torch.type(self.output) == 'torch.CudaTensor' and
+         torch.CudaByteTensor() or torch.ByteTensor())
+      self.mask:eq(self.maxIdx, i)
+      self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+      self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+      self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
    end
 
    for i=#input+1, #self.gradInput do
diff --git a/CMinTable.lua b/CMinTable.lua
index a8385e8..25b9a19 100644
--- a/CMinTable.lua
+++ b/CMinTable.lua
@@ -4,25 +4,38 @@ function CMinTable:__init()
    parent.__init(self)
    self.gradInput = {}
    self.minIdx = torch.Tensor()
+   self.mask = torch.Tensor()
+   self.minVals = torch.Tensor()
+   self.gradMaxVals = torch.Tensor()
 end
 
 function CMinTable:updateOutput(input)
    self.output:resizeAs(input[1]):copy(input[1])
    self.minIdx:resizeAs(input[1]):fill(1)
    for i=2,#input do
-      local mask = torch.lt(input[i], self.output)
-      self.minIdx:maskedFill(mask, i)
-      self.output:maskedCopy(mask, input[i][mask])
+      self.maskByteTensor = self.maskByteTensor or
+         (torch.type(self.output) == 'torch.CudaTensor' and
+         torch.CudaByteTensor() or torch.ByteTensor())
+      self.mask:lt(input[i], self.output)
+      self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+      self.minIdx:maskedFill(self.maskByteTensor, i)
+      self.minVals:maskedSelect(input[i], self.maskByteTensor)
+      self.output:maskedCopy(self.maskByteTensor, self.minVals)
    end
    return self.output
 end
 
 function CMinTable:updateGradInput(input, gradOutput)
    for i=1,#input do
-      self.gradInput[i] = torch.Tensor()
+      self.gradInput[i] = self.gradInput[i] or input[i].new()
       self.gradInput[i]:resizeAs(input[i]):fill(0.0)
-      local mask = torch.eq(self.minIdx, i)
-      self.gradInput[i]:maskedCopy(mask, gradOutput[mask])
+      self.maskByteTensor = self.maskByteTensor or
+         (torch.type(self.output) == 'torch.CudaTensor' and
+         torch.CudaByteTensor() or torch.ByteTensor())
+      self.mask:eq(self.minIdx, i)
+      self.maskByteTensor:resize(self.mask:size()):copy(self.mask)
+      self.gradMaxVals:maskedSelect(gradOutput, self.maskByteTensor)
+      self.gradInput[i]:maskedCopy(self.maskByteTensor, self.gradMaxVals)
    end
 
    for i=#input+1, #self.gradInput do
diff --git a/Criterion.lua b/Criterion.lua
index 4efb279..e48f068 100644
--- a/Criterion.lua
+++ b/Criterion.lua
@@ -49,6 +49,14 @@ function Criterion:cuda()
    return self:type('torch.CudaTensor')
 end
 
+function Criterion:cudaHalf()
+   return self:type('torch.CudaHalfTensor')
+end
+
+function Criterion:cudaDouble()
+   return self:type('torch.CudaDoubleTensor')
+end
+
 function Criterion:__call__(input, target)
    self.output = self:forward(input, target)
    self.gradInput = self:backward(input, target)
diff --git a/CrossEntropyCriterion.lua b/CrossEntropyCriterion.lua
index d4d19e5..2f72cf8 100644
--- a/CrossEntropyCriterion.lua
+++ b/CrossEntropyCriterion.lua
@@ -1,17 +1,25 @@
 local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', 'nn.Criterion')
 
-function CrossEntropyCriterion:__init(weights)
+function CrossEntropyCriterion:__init(weights, sizeAverage)
    Criterion.__init(self)
    self.lsm = nn.LogSoftMax()
-   self.nll = nn.ClassNLLCriterion(weights)
+   self.nll = nn.ClassNLLCriterion(weights, sizeAverage)
+   self.sizeAverage = self.nll.sizeAverage
+   self.oldSizeAverage = self.sizeAverage
 end
 
 function CrossEntropyCriterion:updateOutput(input, target)
    input = input:squeeze()
    target = type(target) == 'number' and target or target:squeeze()
+   -- only propagate if value has changed to preserve old behavior
+   -- of setting nll.sizeAverage directly
+   if self.sizeAverage ~= self.oldSizeAverage then
+      self.nll.sizeAverage = self.sizeAverage
+   end
    self.lsm:updateOutput(input)
    self.nll:updateOutput(self.lsm.output, target)
    self.output = self.nll.output
+   self.oldSizeAverage = self.sizeAverage
    return self.output
 end
 
@@ -19,9 +27,15 @@ function CrossEntropyCriterion:updateGradInput(input, target)
    local size = input:size()
    input = input:squeeze()
    target = type(target) == 'number' and target or target:squeeze()
+   -- only propagate if  value has changed to preserve old behavior
+   -- of setting nll.sizeAverage directly
+   if self.sizeAverage ~= self.oldSizeAverage then
+      self.nll.sizeAverage = self.sizeAverage
+   end
    self.nll:updateGradInput(self.lsm.output, target)
    self.lsm:updateGradInput(input, self.nll.gradInput)
    self.gradInput:view(self.lsm.gradInput, size)
+   self.oldSizeAverage = self.sizeAverage
    return self.gradInput
 end
 
diff --git a/GatedLinearUnit.lua b/GatedLinearUnit.lua
index 5f215ca..5273abf 100644
--- a/GatedLinearUnit.lua
+++ b/GatedLinearUnit.lua
@@ -2,41 +2,26 @@ local GatedLinearUnit, parent = torch.class('nn.GatedLinearUnit', 'nn.Module')
 
 function GatedLinearUnit:__init(dim)
    parent.__init(self)
-   self.sigmoid = nn.Sigmoid()
    self.dim = dim
 end
 
 function GatedLinearUnit:updateOutput(input)
-    local dim = self.dim or input:dim()
-    local inputSize = input:size(dim)
-
-    assert(inputSize % 2 == 0, "halving dimension needs to be even")
-
-    self.fHalf = input:narrow(dim, 1, inputSize/2)
-    self.sHalf = input:narrow(dim, inputSize/2 + 1, inputSize/2)
-
-    self.sHalfOut = self.sigmoid:forward(self.sHalf)
-    self.output:resizeAs(self.fHalf):copy(self.fHalf):cmul(self.sHalfOut)
-
-    return self.output
+   local dim = self.dim or input:dim()
+   input.THNN.GatedLinear_updateOutput(
+      input:cdata(),
+      self.output:cdata(),
+      dim
+   )
+   return self.output
 end
 
 function GatedLinearUnit:updateGradInput(input, gradOutput)
-    local dim = self.dim or input:dim()
-    local inputSize = input:size(dim)
-
-    assert(inputSize % 2 == 0, "halving dimension needs to be even")
-
-    local fGradInput = self.sHalfOut
-    local sGradInput = self.sigmoid:backward(self.sHalf, gradOutput)
-                                   :cmul(self.fHalf)
-
-    self.gradInput:resizeAs(input)
-    self.gradInput:narrow(dim, 1, inputSize/2)
-                    :copy(fGradInput)
-                    :cmul(gradOutput)
-    self.gradInput:narrow(dim, inputSize/2+1, inputSize/2)
-                    :copy(sGradInput)
-
-    return self.gradInput
+   local dim = self.dim or input:dim()
+   input.THNN.GatedLinear_updateGradInput(
+      input:cdata(),
+      gradOutput:cdata(),
+      self.gradInput:cdata(),
+      dim
+   )
+   return self.gradInput
 end
diff --git a/Max.lua b/Max.lua
index 2aa67d3..8273e80 100644
--- a/Max.lua
+++ b/Max.lua
@@ -21,7 +21,7 @@ end
 function Max:_lazyInit()
    self._output = self._output or self.output.new()
    if not self._indices then
-      if torch.type(self.output) == 'torch.CudaTensor' then
+      if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
          self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
       else
          self._indices = torch.LongTensor()
diff --git a/Min.lua b/Min.lua
index 252f52e..3a3e4a8 100644
--- a/Min.lua
+++ b/Min.lua
@@ -21,7 +21,7 @@ end
 function Min:_lazyInit()
    self._output = self._output or self.output.new()
    if not self._indices then
-      if torch.type(self.output) == 'torch.CudaTensor' then
+      if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
          self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
       else
          self._indices = torch.LongTensor()
diff --git a/Normalize.lua b/Normalize.lua
index b6d1298..0937ebb 100644
--- a/Normalize.lua
+++ b/Normalize.lua
@@ -24,7 +24,7 @@ function Normalize:updateOutput(input)
   if self.p == math.huge then
     -- specialization for the infinity norm
     if not self._indices then
-      if torch.type(self.output) == 'torch.CudaTensor' then
+      if torch.typename(self.output):find('torch%.Cuda.*Tensor') then
         self._indices = torch.CudaLongTensor and torch.CudaLongTensor() or torch.CudaTensor()
       else
         self._indices = torch.LongTensor()
diff --git a/TemporalRowConvolution.lua b/TemporalRowConvolution.lua
new file mode 100644
index 0000000..7c9d6a2
--- /dev/null
+++ b/TemporalRowConvolution.lua
@@ -0,0 +1,120 @@
+local THNN = require "nn.THNN"
+
+local TemporalRowConvolution, parent = torch.class("nn.TemporalRowConvolution", "nn.Module")
+
+function TemporalRowConvolution:__init(inputFrameSize, kW, dW, featFirst)
+  parent.__init(self)
+
+  self.inputFrameSize = inputFrameSize
+  self.kW = kW
+  self.dW = dW or 1
+
+  self.weight = torch.Tensor(inputFrameSize, 1, kW)
+  self.bias = torch.Tensor(inputFrameSize)
+  self.gradWeight = torch.Tensor(inputFrameSize, 1, kW)
+  self.gradBias = torch.Tensor(inputFrameSize)
+
+  -- Set to true for batch x inputFrameSize x nInputFrame
+  self.featFirst = featFirst and true or false
+  self:reset()
+end
+
+function TemporalRowConvolution:noBias()
+  self.bias = nil
+  self.gradBias = nil
+  return self
+end
+
+function TemporalRowConvolution:reset(stdv)
+  if stdv then
+    stdv = stdv * math.sqrt(3)
+  else
+    stdv = 1 / math.sqrt(self.kW * self.inputFrameSize)
+  end
+  self.weight:uniform(-stdv, stdv)
+  self.bias:uniform(-stdv, stdv)
+end
+
+function TemporalRowConvolution:updateOutput(input)
+  assert(input.THNN, torch.type(input)..".THNN backend not imported")
+  self.finput = self.finput or input.new()
+  self.fgradInput = self.fgradInput or input.new()
+
+  input.THNN.TemporalRowConvolution_updateOutput(
+    input:cdata(),
+    self.output:cdata(),
+    self.weight:cdata(),
+    THNN.optionalTensor(self.bias),
+    self.finput:cdata(),
+    self.fgradInput:cdata(),
+    self.kW,
+    self.dW,
+    0, -- would be self.padW
+    self.featFirst
+  )
+
+  return self.output
+end
+
+function TemporalRowConvolution:updateGradInput(input, gradOutput)
+  assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+  if self.gradInput then
+    input.THNN.TemporalRowConvolution_updateGradInput(
+      input:cdata(),
+      gradOutput:cdata(),
+      self.gradInput:cdata(),
+      self.weight:cdata(),
+      self.finput:cdata(),
+      self.fgradInput:cdata(),
+      self.kW,
+      self.dW,
+      0, -- would be self.padW
+      self.featFirst
+    )
+    return self.gradInput
+  end
+end
+
+function TemporalRowConvolution:accGradParameters(input, gradOutput, scale)
+  assert(input.THNN, torch.type(input)..".THNN backend not imported")
+
+  input.THNN.TemporalRowConvolution_accGradParameters(
+    input:cdata(),
+    gradOutput:cdata(),
+    self.gradWeight:cdata(),
+    THNN.optionalTensor(self.gradBias),
+    self.finput:cdata(),
+    self.fgradInput:cdata(),
+    self.kW,
+    self.dW,
+    0, -- would be self.padW
+    self.featFirst,
+    scale or 1)
+end
+
+function TemporalRowConvolution:type(type, tensorCache)
+  if self.finput then self.finput:set() end
+  if self.fgradInput then self.fgradInput:set() end
+  return parent.type(self, type, tensorCache)
+end
+
+function TemporalRowConvolution:__tostring__()
+  local s = string.format("%s(%d, %d", torch.type(self), self.inputFrameSize, self.kW)
+  if self.dW ~= 1 then
+    s = s .. string.format(", %d", self.dW)
+  end
+  if self.padW and self.padW ~= 0 then -- currently padding is not supported
+    s = s .. ", " .. self.padW
+  end
+  if self.bias then
+    return s .. ")"
+  else
+    return s .. ") without bias"
+  end
+end
+
+function TemporalRowConvolution:clearState()
+  nn.utils.clear(self, "finput", "fgradInput", "_input", "_gradOutput")
+  return parent.clearState(self)
+end
diff --git a/doc/convolution.md b/doc/convolution.md
index 21cfa57..d87a749 100644
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -9,6 +9,7 @@ A convolution is an integral that expresses the amount of overlap of one functio
     * [TemporalSubSampling](#nn.TemporalSubSampling) : a 1D sub-sampling over an input sequence ;
     * [TemporalMaxPooling](#nn.TemporalMaxPooling) : a 1D max-pooling operation over an input sequence ;
     * [LookupTable](#nn.LookupTable) : a convolution of width `1`, commonly used for word embeddings ;
+    * [TemporalRowConvolution](#nn.TemporalRowConvolution) : a row-oriented 1D convolution over an input sequence ;
   * [Spatial Modules](#nn.SpatialModules) apply to inputs with two-dimensional relationships (e.g. images):
     * [SpatialConvolution](#nn.SpatialConvolution) : a 2D convolution over an input image ;
     * [SpatialFullConvolution](#nn.SpatialFullConvolution) : a 2D full convolution over an input image ;
@@ -322,6 +323,65 @@ Outputs something like:
 Note that the 1st, 2nd and 10th rows of the module.weight are updated to
 obey the max-norm constraint, since their indices appear in the "input".
 
+<a name="nn.TemporalRowConvolution"></a>
+### TemporalRowConvolution ###
+
+```lua
+module = nn.TemporalRowConvolution(inputFrameSize, kW, [dW], [featFirst]))
+```
+
+Applies a 1D row-oriented convolution over an input sequence composed of `nInputFrame` frames. The input tensor in `forward(input)` is expected to be a 2D tensor (`nInputFrame x inputFrameSize`) or a 3D tensor (`nBatchFrame x nInputFrame x inputFrameSize`). The layer can be used without a bias by `module:noBias()`.
+
+The parameters are the following:
+  * `inputFrameSize`: The input frame size expected in sequences given into `forward()`.
+  * `kW`: The kernel width of the convolution.
+  * `dW`: The step of the convolution Default is `1`.
+  * `featFirst`: Expects input to be in the form `nBatchFrame x inputFrameSize x nInputFrame` is `true`. Default is `false`.
+
+  If the input sequence is a 2D tensor of dimension `nInputFrame x inputFrameSize`, the output sequence will be `nOutputFrame x inputFrameSize` where
+
+  ```lua
+  nOutputFrame = (nInputFrame - kW) / dW + 1
+  ```
+
+  If the input sequence is a 3D tensor of dimension `nBatchFrame x nInputFrame x inputFrameSize`, the output sequence will be `nBatch x nOutputFrame x outputFrameSize`.
+
+  The parameters are the convolution can be found in `self.weight` (Tensor of size `inputFrameSize x kW`) and `self.bias` (Tensor of size `inputFrameSize`). The corresponding gradients can be found in `self.gradWeight` and `self.gradBias`.
+
+  For a 2D input, the output value of the layer can be precisely described as:
+
+  ```lua
+  output[t][i] = bias[i] + sum_{k=1}^kW weight[i][k] * input[dW(t-1)+k][i]
+  ```
+
+  Here is a simple example:
+  ```lua
+  inp = 5;
+  kw = 3;
+  dw = 1;
+
+  -- row convolution with a kernel width of 3 (future context of 2)
+  module = nn.TemporalRowConvolution(inp, kw, dw)
+
+  x = torch.rand(8, inp)
+  print(module:forward(x))
+  ```
+
+  which gives
+
+  ```lua
+  0.1188  0.1945  0.1065 -0.0077 -0.3433
+  0.0630  0.4354  0.1954 -0.2103 -0.3506
+  0.0340  0.2222  0.3039 -0.2012 -0.3814
+  0.0820  0.3489  0.2533 -0.0940 -0.3298
+  0.1964  0.1533  0.1750 -0.1493 -0.3059
+  0.2651  0.2474  0.0521 -0.1134 -0.4024
+  [torch.Tensor of dimension 8x5]
+  ```
+
+  More information about the layer can be found [here](http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf).
+
+
 <a name="nn.SpatialModules"></a>
 ## Spatial Modules ##
 Excluding an optional batch dimension, spatial layers expect a 3D Tensor as input. The
diff --git a/doc/image/lena.jpg b/doc/image/lena.jpg
new file mode 100644
index 0000000..d4a8c36
Binary files /dev/null and b/doc/image/lena.jpg differ
diff --git a/doc/overview.md b/doc/overview.md
index 6db8008..f8f4f3e 100644
--- a/doc/overview.md
+++ b/doc/overview.md
@@ -106,10 +106,10 @@ criterion).  The input is usually a Tensor (except if you use special
 kind of gradient modules, like [table layers](table.md#nn.TableLayers)). The
 label type depends on the criterion.  For example, the
 [MSECriterion](criterion.md#nn.MSECriterion) expect a Tensor, but the
-[ClassNLLCriterion](criterion.md#nn.ClassNLLCriterion) except a integer number (the
+[ClassNLLCriterion](criterion.md#nn.ClassNLLCriterion) expect an integer number (the
 class).
 
-Such a dataset is easily constructed by using Lua tables, but it could
+Such a dataset is easily constructed by using Lua tables, but it could be
 any `C` object for example, as long as required operators/methods
 are implemented.  [See an example](containers.md#nn.DoItStochasticGradient).
 
diff --git a/init.lua b/init.lua
index cad1c3c..66ef8f5 100644
--- a/init.lua
+++ b/init.lua
@@ -117,6 +117,7 @@ require('nn.TemporalConvolution')
 require('nn.TemporalSubSampling')
 require('nn.TemporalMaxPooling')
 require('nn.TemporalDynamicKMaxPooling')
+require('nn.TemporalRowConvolution')
 require('nn.SpatialSubtractiveNormalization')
 require('nn.SpatialDivisiveNormalization')
 require('nn.SpatialContrastiveNormalization')
diff --git a/lib/THNN/CMakeLists.txt b/lib/THNN/CMakeLists.txt
index 33eaf56..f9859fa 100644
--- a/lib/THNN/CMakeLists.txt
+++ b/lib/THNN/CMakeLists.txt
@@ -12,6 +12,7 @@ MESSAGE(STATUS "TH_LIBRARIES: ${TH_LIBRARIES}")
 
 IF(NOT THNN_INSTALL_LIB_SUBDIR)
   SET(THNN_INSTALL_LIB_SUBDIR "lib" CACHE PATH "THNN install library directory")
+  SET(THNN_INSTALL_INCLUDE_SUBDIR "include" CACHE PATH "THNN install include subdirectory")
 ENDIF()
 
 # Flags
@@ -78,3 +79,5 @@ IF(THNN_SO_VERSION)
 ENDIF(THNN_SO_VERSION)
 
 INSTALL(TARGETS THNN LIBRARY DESTINATION ${THNN_INSTALL_LIB_SUBDIR})
+INSTALL(FILES THNN.h DESTINATION "${THNN_INSTALL_INCLUDE_SUBDIR}/THNN")
+INSTALL(FILES generic/THNN.h DESTINATION "${THNN_INSTALL_INCLUDE_SUBDIR}/THNN/generic")
diff --git a/lib/THNN/generic/GatedLinearUnit.c b/lib/THNN/generic/GatedLinearUnit.c
new file mode 100644
index 0000000..d412a7b
--- /dev/null
+++ b/lib/THNN/generic/GatedLinearUnit.c
@@ -0,0 +1,71 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/GatedLinearUnit.c"
+#else
+
+void THNN_(GatedLinear_updateOutput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *output,
+          int dim)
+{
+  // size output to half of input
+  dim = dim - 1;
+  const long nIn = THTensor_(size)(input, dim);
+  THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
+
+  const long inputSize = THTensor_(size)(input, dim) / 2;
+  THLongStorage *newSizes = THTensor_(newSizeOf)(input);
+  THLongStorage_set(newSizes, dim, inputSize);
+  THTensor_(resize)(output, newSizes, NULL);
+
+  // halve tensor
+  THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+  THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+
+  // x = x1:cmul( sigmoid(x2) )
+  THTensor_(sigmoid)(output, secondHalf);
+  THTensor_(cmul)(output, output, firstHalf);
+
+  THLongStorage_free(newSizes);
+  THTensor_(free)(firstHalf);
+  THTensor_(free)(secondHalf);
+}
+
+void THNN_(GatedLinear_updateGradInput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradInput,
+          int dim)
+{
+  // set up tensors
+  dim = dim - 1;
+  const long nIn = THTensor_(size)(input, dim);
+  THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim+1, nIn);
+
+  THTensor_(resizeAs)(gradInput, input);
+  const long inputSize = THTensor_(size)(input, dim) / 2;
+  THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
+  THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);
+  THTensor *gradInputfirstHalf = THTensor_(newNarrow)(gradInput, dim, 0, inputSize);
+  THTensor *gradInputsecondHalf = THTensor_(newNarrow)(gradInput, dim, inputSize, inputSize);
+
+  THTensor_(sigmoid)(gradInputfirstHalf, secondHalf);
+
+  TH_TENSOR_APPLY2(real, gradInputsecondHalf, real, gradInputfirstHalf,
+    real z = *gradInputfirstHalf_data;
+    *gradInputsecondHalf_data = (1. - z) * z;
+  );
+
+  THTensor_(cmul)(gradInputfirstHalf, gradInputfirstHalf, gradOutput);
+
+  THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, gradOutput);
+  THTensor_(cmul)(gradInputsecondHalf, gradInputsecondHalf, firstHalf);
+
+  THTensor_(free)(firstHalf);
+  THTensor_(free)(secondHalf);
+  THTensor_(free)(gradInputfirstHalf);
+  THTensor_(free)(gradInputsecondHalf);
+}
+
+#endif
diff --git a/lib/THNN/generic/THNN.h b/lib/THNN/generic/THNN.h
index 8fd50f5..4420962 100644
--- a/lib/THNN/generic/THNN.h
+++ b/lib/THNN/generic/THNN.h
@@ -102,6 +102,18 @@ TH_API void THNN_(DistKLDivCriterion_updateGradInput)(
           THTensor *gradInput,         // [OUT] gradient w.r.t. input
           bool sizeAverage);           // if true, the loss will be normalized **by total number of elements**
 
+TH_API void THNN_(GatedLinear_updateOutput)(
+          THNNState *state,            // library's state
+          THTensor *input,             // input tensor
+          THTensor *output,            // [OUT] output tensor, half size of input along dimension dim
+          int dim);                    // dimension for halving operation
+TH_API void THNN_(GatedLinear_updateGradInput)(
+          THNNState *state,            // library's state
+          THTensor *input,             // input tensor
+          THTensor *gradOutput,        // gradient w.r.t module's output
+          THTensor *gradInput,         // [OUT] gradient w.r.t input
+          int dim);                    // dimension for halving operation
+
 // HardShink outputs 0 on interval of (-lambda; lambda) or original value otherwise.
 TH_API void THNN_(HardShrink_updateOutput)(
           THNNState *state,            // library's state
@@ -576,6 +588,44 @@ TH_API void THNN_(TemporalSubSampling_accGradParameters)(
           int kW, int dW,
           real scale);
 
+TH_API void THNN_(TemporalRowConvolution_updateOutput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *output,
+          THTensor *weight,
+          THTensor *bias,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int dW,
+          int padW,
+          bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_updateGradInput)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradInput,
+          THTensor *weight,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int dW,
+          int padW,
+          bool featFirst);
+TH_API void THNN_(TemporalRowConvolution_accGradParameters)(
+          THNNState *state,
+          THTensor *input,
+          THTensor *gradOutput,
+          THTensor *gradWeight,
+          THTensor *gradBias,
+          THTensor *finput,
+          THTensor *fgradInput,
+          int kW,
+          int dW,
+          int padW,
+          bool featFirst,
+          real scale);
+
 TH_API void THNN_(BatchNormalization_updateOutput)(
           THNNState *state,
           THTensor *input,
diff --git a/lib/THNN/generic/TemporalRowConvolution.c b/lib/THNN/generic/TemporalRowConvolution.c
new file mode 100644
index 0000000..9e62939
--- /dev/null
+++ b/lib/THNN/generic/TemporalRowConvolution.c
@@ -0,0 +1,465 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalRowConvolution.c"
+#else
+
+static inline void THNN_(TemporalRowConvolution_shapeCheck)(
+	THNNState *state,
+	THTensor *input,
+	THTensor *gradOutput,
+	THTensor *weight,
+	THTensor *bias,
+	int kW,
+	int dW,
+	int padW) {
+
+	THArgCheck(kW > 0, 5,
+	           "kernel size should be greater than zero, but got kW: %d", kW);
+	THArgCheck(dW > 0, 6,
+	           "stride should be greater than zero, but got dW: %d", dW);
+	THNN_ARGCHECK(weight->nDimension == 3, 3, weight,
+	              "3D weight tensor expected, but got: %s");
+
+	if (bias != NULL) {
+		THNN_CHECK_DIM_SIZE(bias, 1, 0, weight->size[0]);
+	}
+
+	// we're always looking at (possibly batch) x feats x seq
+	int ndim = input->nDimension;
+	int dimF = 0;
+	int dimS = 1;
+
+	if (ndim == 3) {
+		++dimS;
+		++dimF;
+	}
+
+	THNN_ARGCHECK(ndim == 2 || ndim == 3, 1, input,
+	              "2D or 3D (batch mode) input tensor expected, but got :%s");
+
+	long inputFrameSize = weight->size[0];
+	long nInputFrame = input->size[dimS];
+	long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+	if (nOutputFrame < 1) {
+		THError("Given input size: (%d x %d). "
+		        "Calculated output size: (%d x %d). Output size is too small",
+		        inputFrameSize, nInputFrame, inputFrameSize, nOutputFrame);
+	}
+
+	THNN_CHECK_DIM_SIZE(input, ndim, dimF, inputFrameSize);
+
+	if (gradOutput != NULL) {
+		THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimF, inputFrameSize);
+		THNN_CHECK_DIM_SIZE(gradOutput, ndim, dimS, nOutputFrame);
+	}
+}
+
+static void THNN_(unfolded_acc_row)(
+	THTensor *finput,
+	THTensor *input,
+	int kW,
+	int dW,
+	int padW,
+	long inputFrameSize,
+	long nInputFrame,
+	long nOutputFrame) {
+
+	size_t c;
+	real *input_data = THTensor_(data)(input);
+	real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(c)
+	for (c = 0; c < inputFrameSize; c++) {
+		size_t kw, x;
+		long long ix = 0;
+
+		for (kw = 0; kw < kW; kw++) {
+			real *src = finput_data
+			            + c * (kW * nOutputFrame)
+			            + kw * (nOutputFrame);
+			real *dst = input_data + c * (nInputFrame);
+
+			ix = (long long)(kw);
+			if (dW == 1) {
+				THVector_(add)(dst + (size_t)(ix), src, 1, nOutputFrame);
+			} else {
+				for (x = 0; x < nOutputFrame; x++) {
+					THVector_(add)(dst + (size_t)(ix + x * dW),
+					               src + (size_t)(x), 1, 1);
+				}
+			}
+		}
+	}
+}
+
+static void THNN_(unfolded_copy_row)(
+	THTensor *finput,
+	THTensor *input,
+	int kW,
+	int dW,
+	int padW,
+	long inputFrameSize,
+	long nInputFrame,
+	long nOutputFrame) {
+
+	long k;
+	real *input_data = THTensor_(data)(input);
+	real *finput_data = THTensor_(data)(finput);
+
+// #pragma omp parallel for private(k)
+	for (k = 0; k < inputFrameSize * kW; k++) {
+		size_t c = k / kW;
+		size_t rest = k % kW;
+		size_t kw = rest % kW;
+		size_t x;
+		long long ix;
+		real *dst = finput_data + c * (kW * nOutputFrame) + kw * (nOutputFrame);
+		real *src = input_data + c * (nInputFrame);
+
+		ix = (long long)(kw);
+		if (dW == 1) {
+			memcpy(dst, src+(size_t)(ix), sizeof(real) * (nOutputFrame));
+		} else {
+			for (x = 0; x < nOutputFrame; x++) {
+				memcpy(dst + (size_t)(x), src + (size_t)(ix + x * dW),
+				       sizeof(real) * 1);
+			}
+		}
+	}
+}
+
+static void THNN_(TemporalRowConvolution_updateOutput_frame)(
+	THTensor *input,
+	THTensor *output,
+	THTensor *weight,
+	THTensor *bias,
+	THTensor *finput,
+	int kW,
+	int dW,
+	int padW,
+	long inputFrameSize,
+	long nInputFrame,
+	long nOutputFrame) {
+
+	long i;
+
+	THTensor *output3d = THTensor_(newWithStorage3d)(
+		output->storage, output->storageOffset,
+		inputFrameSize, -1,
+		1, -1,
+		nOutputFrame, -1);
+
+	THNN_(unfolded_copy_row)(finput, input, kW, dW, padW,
+	                         inputFrameSize, nInputFrame, nOutputFrame);
+
+	THTensor_(zero)(output);
+
+	if (bias != NULL) {
+		for (i = 0; i < inputFrameSize; i++)
+			THVector_(fill)
+			        (output->storage->data + output->storageOffset
+			        + output->stride[0] * i,
+			        THTensor_(get1d)(bias, i), nOutputFrame);
+	}
+
+	THTensor_(baddbmm)(output3d, 1, output3d, 1, weight, finput);
+
+	THTensor_(free)(output3d);
+}
+
+void THNN_(TemporalRowConvolution_updateOutput)(
+	THNNState *state,
+	THTensor *input,
+	THTensor *output,
+	THTensor *weight,
+	THTensor *bias,
+	THTensor *finput,
+	THTensor *fgradInput,     // unused here but needed for Cuda
+	int kW,
+	int dW,
+	int padW,
+	bool featFirst) {
+
+	int ndim = input->nDimension;
+
+	THTensor *tinput;
+	if (!featFirst) {
+		tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+		input = THTensor_(newContiguous)(tinput);
+	} else {
+		input = THTensor_(newContiguous)(input);
+	}
+
+	THNN_(TemporalRowConvolution_shapeCheck)(
+		state, input, NULL, weight, bias, kW, dW, padW);
+
+	long inputFrameSize = weight->size[0];
+	long nInputFrame = input->size[ndim - 1];
+	long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+	if (ndim == 2) { /* non-batch mode */
+
+		THTensor_(resize3d)(finput, inputFrameSize, kW, nOutputFrame);
+		THTensor_(resize2d)(output, inputFrameSize, nOutputFrame);
+
+		THTensor_(zero)(finput);
+		THTensor_(zero)(output);
+
+		THNN_(TemporalRowConvolution_updateOutput_frame)
+		        (input, output, weight, bias, finput,
+		        kW, dW, padW,
+		        inputFrameSize, nInputFrame, nOutputFrame);
+
+	} else {
+		long T = input->size[0];
+		long t;
+
+		THTensor_(resize4d)(finput, T, inputFrameSize, kW, nOutputFrame);
+		THTensor_(resize3d)(output, T, inputFrameSize, nOutputFrame);
+
+		THTensor_(zero)(finput);
+		THTensor_(zero)(output);
+
+#pragma omp parallel for private(t)
+		for (t = 0; t < T; t++) {
+			THTensor *input_t = THTensor_(newSelect)(input, 0, t);
+			THTensor *output_t = THTensor_(newSelect)(output, 0, t);
+			THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+			THNN_(TemporalRowConvolution_updateOutput_frame)
+			        (input_t, output_t, weight, bias, finput_t,
+			        kW, dW, padW, inputFrameSize, nInputFrame, nOutputFrame);
+
+			THTensor_(free)(input_t);
+			THTensor_(free)(output_t);
+			THTensor_(free)(finput_t);
+		}
+	}
+
+	if (!featFirst) { // NOTE: output will NOT be contiguous in this case
+		THTensor_(transpose)(output, output, ndim - 1, ndim - 2);
+		THTensor_(free)(tinput);
+	}
+
+	THTensor_(free)(input);
+}
+
+static void THNN_(TemporalRowConvolution_updateGradInput_frame)(
+	THTensor *gradInput,
+	THTensor *gradOutput,
+	THTensor *weight,
+	THTensor *fgradInput,
+	int kW,
+	int dW,
+	int padW,
+	long inputFrameSize,
+	long nInputFrame,
+	long nOutputFrame) {
+
+	THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+		gradOutput->storage, gradOutput->storageOffset,
+		inputFrameSize, -1,
+		1, -1,
+		nOutputFrame, -1);
+
+	// weight:			inputFrameSize x kW x 1
+	// gradOutput3d:	inputFrameSize x 1 x nOutputFrame
+	THTensor_(baddbmm)(fgradInput, 0, fgradInput, 1, weight, gradOutput3d);
+	// fgradInput:		inputFrameSize x kW x nOutputFrame
+	THTensor_(free)(gradOutput3d);
+
+	THTensor_(zero)(gradInput);
+
+	THNN_(unfolded_acc_row)(fgradInput, gradInput,
+	                        kW, dW, padW,
+	                        inputFrameSize, nInputFrame, nOutputFrame);
+}
+
+void THNN_(TemporalRowConvolution_updateGradInput)(
+	THNNState *state,
+	THTensor *input,
+	THTensor *gradOutput,
+	THTensor *gradInput,
+	THTensor *weight,
+	THTensor *finput,
+	THTensor *fgradInput,
+	int kW,
+	int dW,
+	int padW,
+	bool featFirst) {
+
+	int ndim = input->nDimension;
+
+	THTensor *tinput, *tgradOutput;
+
+	if (!featFirst) {
+		tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+		tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+		input = THTensor_(newContiguous)(tinput);
+		gradOutput = THTensor_(newContiguous)(tgradOutput);
+
+	} else {
+		input = THTensor_(newContiguous)(input);
+		gradOutput = THTensor_(newContiguous)(gradOutput);
+	}
+
+	THNN_(TemporalRowConvolution_shapeCheck)(state, input, gradOutput, weight,
+	                                         NULL, kW, dW, padW);
+
+	long inputFrameSize = weight->size[0];
+	long nInputFrame = input->size[ndim - 1];
+	long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+	THTensor_(resizeAs)(fgradInput, finput);
+	THTensor_(resizeAs)(gradInput, input);
+
+	THTensor_(zero)(fgradInput);
+	THTensor_(zero)(gradInput);
+
+	THTensor_(transpose)(weight, weight, 1, 2);
+
+	if (ndim == 2) {
+		THNN_(TemporalRowConvolution_updateGradInput_frame)
+		        (gradInput, gradOutput, weight, fgradInput,
+		        kW, dW, padW,
+		        inputFrameSize, nInputFrame, nOutputFrame);
+	} else {
+		long T = input->size[0];
+		long t;
+
+#pragma omp parallel for private(t)
+		for (t = 0; t < T; t++) {
+
+			THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
+			THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+			THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);
+
+			THNN_(TemporalRowConvolution_updateGradInput_frame)
+			        (gradInput_t, gradOutput_t, weight, fgradInput_t,
+			        kW, dW, padW,
+			        inputFrameSize, nInputFrame, nOutputFrame);
+
+			THTensor_(free)(gradInput_t);
+			THTensor_(free)(gradOutput_t);
+			THTensor_(free)(fgradInput_t);
+		}
+	}
+
+	THTensor_(transpose)(weight, weight, 1, 2);
+
+	if (!featFirst) { // NOTE: gradInput will NOT be contiguous in this case
+
+		THTensor_(free)(tinput);
+		THTensor_(free)(tgradOutput);
+
+		THTensor_(transpose)(gradInput, gradInput, ndim - 1, ndim - 2);
+	}
+
+	THTensor_(free)(input);
+	THTensor_(free)(gradOutput);
+
+}
+
+static void THNN_(TemporalRowConvolution_accGradParameters_frame)(
+	THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias,
+	THTensor *finput, real scale) {
+
+	long i;
+	THTensor *gradOutput3d = THTensor_(newWithStorage3d)(
+		gradOutput->storage, gradOutput->storageOffset,
+		gradOutput->size[0], -1,
+		1, -1,
+		gradOutput->size[1], -1);
+
+	THTensor_(transpose)(finput, finput, 1, 2);
+	// gradOutput3d:	inputFrameSize x 1 x nOutputFrame
+	// finput:			inputFrameSize x nOutputFrame x kW
+	THTensor_(baddbmm)(gradWeight, 1, gradWeight, scale, gradOutput3d, finput);
+	// gradWeight:		inputFrameSize x 1 x kW
+	THTensor_(transpose)(finput, finput, 1, 2);
+
+	if (gradBias != NULL) {
+		for (i = 0; i < gradBias->size[0]; i++) {
+			long k;
+			real sum = 0;
+			real *data = gradOutput3d->storage->data
+			             + gradOutput3d->storageOffset
+			             + i * gradOutput3d->stride[0];
+			for (k = 0; k < gradOutput3d->size[2]; k++) {
+				sum += data[k];
+			}
+			(gradBias->storage->data + gradBias->storageOffset)[i]
+			        += scale * sum;
+		}
+	}
+
+	THTensor_(free)(gradOutput3d);
+
+}
+
+void THNN_(TemporalRowConvolution_accGradParameters)(
+	THNNState *state,
+	THTensor *input,
+	THTensor *gradOutput,
+	THTensor *gradWeight,
+	THTensor *gradBias,
+	THTensor *finput,
+	THTensor *fgradInput,
+	int kW,
+	int dW,
+	int padW,
+	bool featFirst,
+	real scale) {
+
+	int ndim = input->nDimension;
+
+	THTensor *tinput, *tgradOutput;
+
+	if (!featFirst) {
+		tinput = THTensor_(newTranspose)(input, ndim - 1, ndim - 2);
+		tgradOutput = THTensor_(newTranspose)(gradOutput, ndim - 1, ndim - 2);
+
+		input = THTensor_(newContiguous)(tinput);
+		gradOutput = THTensor_(newContiguous)(tgradOutput);
+	} else {
+		input = THTensor_(newContiguous)(input);
+		gradOutput = THTensor_(newContiguous)(gradOutput);
+	}
+
+	THNN_(TemporalRowConvolution_shapeCheck)
+	        (state, input, gradOutput, gradWeight, gradBias, kW, dW, padW);
+
+	long inputFrameSize = gradWeight->size[0];
+	long nInputFrame = input->size[ndim - 1];
+	long nOutputFrame = (nInputFrame + 2 * padW - kW) / dW + 1;
+
+	if (ndim == 2) {
+		THNN_(TemporalRowConvolution_accGradParameters_frame)(
+			gradOutput, gradWeight, gradBias, finput, scale);
+	} else {
+		long T = input->size[0];
+		long t;
+
+		for (t = 0; t < T; t++) {
+			THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
+			THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);
+
+			THNN_(TemporalRowConvolution_accGradParameters_frame)(
+				gradOutput_t, gradWeight, gradBias, finput_t, scale);
+
+			THTensor_(free)(gradOutput_t);
+			THTensor_(free)(finput_t);
+		}
+	}
+
+	if (!featFirst) {
+		THTensor_(free)(tinput);
+		THTensor_(free)(tgradOutput);
+	}
+
+	THTensor_(free)(input);
+	THTensor_(free)(gradOutput);
+}
+
+#endif
diff --git a/lib/THNN/init.c b/lib/THNN/init.c
index 3a7806d..990775d 100644
--- a/lib/THNN/init.c
+++ b/lib/THNN/init.c
@@ -89,6 +89,9 @@
 #include "generic/HardTanh.c"
 #include "THGenerateFloatTypes.h"
 
+#include "generic/GatedLinearUnit.c"
+#include "THGenerateFloatTypes.h"
+
 #include "generic/L1Cost.c"
 #include "THGenerateFloatTypes.h"
 
@@ -167,6 +170,9 @@
 #include "generic/TemporalMaxPooling.c"
 #include "THGenerateFloatTypes.h"
 
+#include "generic/TemporalRowConvolution.c"
+#include "THGenerateFloatTypes.h"
+
 #include "generic/BatchNormalization.c"
 #include "THGenerateFloatTypes.h"
 
diff --git a/test.lua b/test.lua
index b3e1d16..e5c92ab 100644
--- a/test.lua
+++ b/test.lua
@@ -2116,6 +2116,20 @@ function nntest.CrossEntropyCriterion()
    weights = weights / weights:sum()
    cri = nn.CrossEntropyCriterion(weights)
    criterionJacobianTest(cri, input, target)
+
+   -- verify nll.sizeAverage preservation
+   cri = nn.CrossEntropyCriterion(weights)
+   cri.nll.sizeAverage = false
+   criterionJacobianTest(cri, input, target)
+   mytester:eq(cri.nll.sizeAverage, false,
+      "ClassNLLCriterion.sizeAverage overwritten")
+
+   -- verify nll.sizeAverage propagation
+   cri = nn.CrossEntropyCriterion(weights)
+   cri.sizeAverage = false
+   criterionJacobianTest(cri, input, target)
+   mytester:eq(cri.nll.sizeAverage, false,
+      "ClassNLLCriterion.sizeAverage not propagated")
 end
 
 function nntest.LogSigmoid()
@@ -4265,6 +4279,129 @@ function nntest.TemporalSubSampling()
    mytester:eq(0, berr, torch.typename(module) .. ' - i/o backward err ', precision)
 end
 
+
+function nntest.TemporalRowConvolution()
+
+  local from = math.random(1,5)
+  local ki = math.random(1,5)
+  local si = math.random(1,2)
+  local outi = math.random(5,7)
+  local ini = (outi-1)*si+ki
+
+  local function jacTest(module)
+
+    local input
+    if module.featFirst then
+      input = torch.Tensor(from, ini):zero()
+    else
+      input = torch.Tensor(ini, from):zero()
+    end
+
+    -- 1D
+    local err = jac.testJacobian(module, input)
+    mytester:assertlt(err, precision, "error on state" )
+
+    local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+    mytester:assertlt(err, precision, "error on weight ")
+
+    if module.bias then
+      local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+      mytester:assertlt(err, precision, "error on bias ")
+    end
+
+    local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+    mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+    if module.bias then
+      local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+      mytester:assertlt(err, precision, "error on bias [direct update] ")
+    end
+
+    for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+      mytester:assertlt(err, precision, string.format(
+          "error on weight [%s] ", t))
+    end
+
+    if module.bias then
+      for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+        mytester:assertlt(err, precision, string.format(
+            "error on bias [%s] ", t))
+      end
+    end
+
+    -- 2D
+    local nBatchFrame = 4
+    if module.featFirst then
+      input = torch.Tensor(nBatchFrame, from, ini):zero()
+    else
+      input = torch.Tensor(nBatchFrame, ini, from):zero()
+    end
+
+
+    local err = jac.testJacobian(module, input)
+    mytester:assertlt(err, precision, "error on state" )
+
+    local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+    mytester:assertlt(err, precision, "error on weight ")
+
+    if module.bias then
+      local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+      mytester:assertlt(err, precision, "error on bias ")
+    end
+
+    local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+    mytester:assertlt(err, precision, "error on weight [direct update] ")
+
+    if module.bias then
+      local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+      mytester:assertlt(err, precision, "error on bias [direct update] ")
+    end
+
+    for t, err in pairs(jac.testAllUpdate(module, input, "weight", "gradWeight")) do
+      mytester:assertlt(err, precision, string.format(
+          "error on weight [%s] ", t))
+    end
+
+    if module.bias then
+      for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+        mytester:assertlt(err, precision, string.format(
+            "error on bias [%s] ", t))
+      end
+    end
+
+    local ferr, berr = jac.testIO(module, input)
+    mytester:eq(0, ferr, torch.typename(module) .. " - i/o forward err ", precision)
+    mytester:eq(0, berr, torch.typename(module) .. " - i/o forward err ", precision)
+
+    -- 2D matches 1D
+    local output = module:forward(input):clone()
+    local outputGrad = torch.randn(output:size())
+    local inputGrad = module:backward(input, outputGrad):clone()
+
+    local input1D = input:select(1, 2)
+    local output1D = module:forward(input1D)
+    local outputGrad1D = outputGrad:select(1, 2)
+    local inputGrad1D = module:backward(input1D, outputGrad1D)
+
+    mytester:assertTensorEq(output:select(1,2), output1D, 0.000001,
+    "error on 2D vs 1D forward")
+    mytester:assertTensorEq(inputGrad:select(1,2), inputGrad1D, 0.000001,
+    "error on 2D vs 1D backward")
+  end
+
+  local module = nn.TemporalRowConvolution(from, ki, si)
+  jacTest(module)
+  module:noBias()
+  jacTest(module)
+  module.bias = torch.Tensor(module.inputFrameSize):zero()
+  module.gradBias = torch.Tensor(module.inputFrameSize):zero()
+  module:reset()
+  module.featFirst = true
+  jacTest(module)
+  module:noBias()
+  jacTest(module, true)
+end
+
 function nntest.TemporalMaxPooling()
    local from = math.random(2,4)
    local ki = math.random(5,7)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lua-torch-nn.git