[lua-torch-optim] 01/05: New upstream version 0~20161016-g89ef52a

Zhou Mo cdluminate-guest at moszumanska.debian.org
Thu Nov 17 09:04:27 UTC 2016


This is an automated email from the git hooks/post-receive script.

cdluminate-guest pushed a commit to branch master
in repository lua-torch-optim.

commit 4d25dd4748c6ff46585299612a4d0d28f0d446b8
Author: Zhou Mo <cdluminate at gmail.com>
Date:   Thu Nov 17 08:55:07 2016 +0000

    New upstream version 0~20161016-g89ef52a
---
 CMakeLists.txt                        |   3 +
 ConfusionMatrix.lua                   |   2 +-
 Logger.lua                            |   8 +-
 checkgrad.lua                         |  15 +-
 doc/algos.md                          |   2 +-
 doc/image/parameterflattening.png     | Bin 0 -> 74658 bytes
 doc/image/parameterflattening.svg     | 338 ++++++++++++++++++++++++++++++++++
 doc/image/parameterflattening.svg.png | Bin 0 -> 74546 bytes
 doc/intro.md                          | 211 ++++++++++++++++++++-
 doc/logger.md                         |   6 +
 polyinterp.lua                        |  50 ++---
 rmsprop.lua                           |   3 +-
 12 files changed, 594 insertions(+), 44 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 26ec4de..a5e686a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,3 +12,6 @@ SET(src)
 FILE(GLOB luasrc *.lua)
 ADD_TORCH_PACKAGE(optim "${src}" "${luasrc}")
 #ADD_TORCH_DOK(dok optim "Machine Learning" "Optimization" 3.2)
+
+INSTALL(DIRECTORY "doc" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/optim")
+INSTALL(FILES "README.md" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/optim")
diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua
index 8659a4e..ec5302c 100644
--- a/ConfusionMatrix.lua
+++ b/ConfusionMatrix.lua
@@ -256,7 +256,7 @@ function ConfusionMatrix:render(sortmode, display, block, legendwidth)
    -- legends
    local legend = {
       ['score'] = 'Confusion matrix [sorted by scores, global accuracy = %0.3f%%, per-class accuracy = %0.3f%%]',
-      ['occurrence'] = 'Confusiong matrix [sorted by occurences, accuracy = %0.3f%%, per-class accuracy = %0.3f%%]'
+      ['occurrence'] = 'Confusion matrix [sorted by occurrences, accuracy = %0.3f%%, per-class accuracy = %0.3f%%]'
    }
 
    -- parse matrix / normalize / count scores
diff --git a/Logger.lua b/Logger.lua
index 48915ef..31928ec 100644
--- a/Logger.lua
+++ b/Logger.lua
@@ -124,8 +124,12 @@ function Logger:style(symbols)
    return self
 end
 
-function Logger:setlogscale(value)
-   self.logscale = value
+function Logger:setlogscale(state)
+   self.logscale = state
+end
+
+function Logger:display(state)
+   self.showPlot = state
 end
 
 function Logger:plot(...)
diff --git a/checkgrad.lua b/checkgrad.lua
index aecb969..908a1a2 100644
--- a/checkgrad.lua
+++ b/checkgrad.lua
@@ -20,18 +20,29 @@ RETURN:
 function optim.checkgrad(opfunc, x, eps)
     
     -- compute true gradient:
-    local _,dC = opfunc(x)
+    local Corg,dC = opfunc(x)
     dC:resize(x:size())
     
+    local Ctmp -- temporary value
+    local isTensor = torch.isTensor(Corg)
+    if isTensor then
+          Ctmp = Corg.new(Corg:size())
+    end
+    
     -- compute numeric approximations to gradient:
     local eps = eps or 1e-7
     local dC_est = torch.Tensor():typeAs(dC):resizeAs(dC)
     for i = 1,dC:size(1) do
+      local tmp = x[i]
       x[i] = x[i] + eps
       local C1 = opfunc(x)
+      if isTensor then
+          Ctmp:copy(C1)
+          C1 = Ctmp
+      end
       x[i] = x[i] - 2 * eps
       local C2 = opfunc(x)
-      x[i] = x[i] + eps
+      x[i] = tmp
       dC_est[i] = (C1 - C2) / (2 * eps)
     end
 
diff --git a/doc/algos.md b/doc/algos.md
index a3ce681..a8dba9f 100644
--- a/doc/algos.md
+++ b/doc/algos.md
@@ -269,7 +269,7 @@ Algorithm is published in http://epubs.siam.org/doi/abs/10.1137/080716542
 <a name='optim.nag'></a>
 ## nag(opfunc, x[, config][, state])
 
-An implementation of *SGD* adapted with features of *Nesterov's Accelerated Gradient method*, based on the paper "On the Importance of Initialization and Momentum in Deep Learning" (Sutsveker et. al., ICML 2013) http://www.cs.toronto.edu/~fritz/absps/momentum.pdf.
+An implementation of *SGD* adapted with features of *Nesterov's Accelerated Gradient method*, based on the paper "On the Importance of Initialization and Momentum in Deep Learning" (Sutskever et. al., ICML 2013) http://www.cs.toronto.edu/~fritz/absps/momentum.pdf.
 
 Arguments:
 
diff --git a/doc/image/parameterflattening.png b/doc/image/parameterflattening.png
new file mode 100644
index 0000000..efab4de
Binary files /dev/null and b/doc/image/parameterflattening.png differ
diff --git a/doc/image/parameterflattening.svg b/doc/image/parameterflattening.svg
new file mode 100644
index 0000000..d58d62f
--- /dev/null
+++ b/doc/image/parameterflattening.svg
@@ -0,0 +1,338 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="275.54715mm"
+   height="214.99242mm"
+   viewBox="0 0 976.34814 761.78413"
+   id="svg2"
+   version="1.1"
+   inkscape:version="0.91 r13725"
+   sodipodi:docname="parameterflattening.svg"
+   inkscape:export-filename="/home/ubuntu/git/nn/doc/image/parameterflattening.svg.png"
+   inkscape:export-xdpi="90"
+   inkscape:export-ydpi="90">
+  <defs
+     id="defs4" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="0.7"
+     inkscape:cx="165.78568"
+     inkscape:cy="360.0347"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     inkscape:window-width="1920"
+     inkscape:window-height="1024"
+     inkscape:window-x="0"
+     inkscape:window-y="0"
+     inkscape:window-maximized="1"
+     fit-margin-top="0"
+     fit-margin-left="0"
+     fit-margin-right="0"
+     fit-margin-bottom="0" />
+  <metadata
+     id="metadata7">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-145.10191,-140.95261)">
+    <rect
+       id="rect3336"
+       width="264.20071"
+       height="127.05788"
+       x="498.61389"
+       y="212.40469"
+       style="fill:none;stroke:#000000;stroke-width:1.08497822;stroke-opacity:1" />
+    <rect
+       id="rect3336-7"
+       width="264.20071"
+       height="127.05788"
+       x="499.32819"
+       y="384.54752"
+       style="fill:none;stroke:#000000;stroke-width:1.08497822;stroke-opacity:1" />
+    <rect
+       id="rect3336-7-1"
+       width="264.20071"
+       height="127.05788"
+       x="502.18533"
+       y="554.54755"
+       style="fill:none;stroke:#000000;stroke-width:1.08497822;stroke-opacity:1" />
+    <rect
+       id="rect3336-7-1-4"
+       width="264.20071"
+       height="127.05788"
+       x="499.32816"
+       y="705.97614"
+       style="fill:none;stroke:#000000;stroke-width:1.08497822;stroke-opacity:1" />
+    <rect
+       style="fill:#aafff8;fill-opacity:1;stroke:#000000;stroke-opacity:1"
+       id="rect4183"
+       width="18.571428"
+       height="631.42859"
+       x="170.00005"
+       y="206.64792" />
+    <rect
+       style="fill:#fcf2cd;fill-opacity:1;stroke:#000000;stroke-opacity:1"
+       id="rect4185"
+       width="18.571428"
+       height="631.42859"
+       x="207.14287"
+       y="207.50507" />
+    <rect
+       style="fill:#aafff8;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187"
+       width="84.285713"
+       height="41.42857"
+       x="518.57141"
+       y="229.50507" />
+    <rect
+       style="fill:#fcf2cd;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-3"
+       width="84.285713"
+       height="41.42857"
+       x="518.42853"
+       y="283.07651" />
+    <rect
+       style="fill:#aafff8;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-8"
+       width="84.285713"
+       height="41.42857"
+       x="519.35712"
+       y="400.57651" />
+    <rect
+       style="fill:#fcf2cd;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-3-3"
+       width="84.285713"
+       height="41.42857"
+       x="519.21423"
+       y="454.14792" />
+    <rect
+       style="fill:#aafff8;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-8-7"
+       width="84.285713"
+       height="41.42857"
+       x="526.5"
+       y="572.00507" />
+    <rect
+       style="fill:#fcf2cd;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-3-3-8"
+       width="84.285713"
+       height="41.42857"
+       x="526.35712"
+       y="625.57648" />
+    <rect
+       style="fill:#aafff8;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-8-7-8"
+       width="84.285713"
+       height="41.42857"
+       x="529.35718"
+       y="722.00513" />
+    <rect
+       style="fill:#fcf2cd;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:8, 8;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect4187-3-3-8-3"
+       width="84.285713"
+       height="41.42857"
+       x="529.21429"
+       y="775.57648" />
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:none;stroke:#000000;stroke-opacity:1"
+       x="1515.7142"
+       y="190.93362"
+       id="text4278"><tspan
+         sodipodi:role="line"
+         id="tspan4280"
+         x="1515.7142"
+         y="190.93362"></tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="635.71429"
+       y="768.07654"
+       id="text4290"><tspan
+         sodipodi:role="line"
+         id="tspan4292"
+         x="635.71429"
+         y="768.07654">conv1</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="627.14288"
+       y="613.79077"
+       id="text4294"><tspan
+         sodipodi:role="line"
+         id="tspan4296"
+         x="627.14288"
+         y="613.79077">conv2</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="632.85718"
+       y="443.79074"
+       id="text4298"><tspan
+         sodipodi:role="line"
+         id="tspan4300"
+         x="632.85718"
+         y="443.79074">conv3</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="631.42865"
+       y="259.50507"
+       id="text4302"><tspan
+         sodipodi:role="line"
+         id="tspan4304"
+         x="631.42865"
+         y="259.50507">conv4</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="528.57141"
+       y="156.64792"
+       id="text4306"><tspan
+         sodipodi:role="line"
+         id="tspan4308"
+         x="528.57141"
+         y="156.64792">Network layers:</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;stroke:#000000;stroke-opacity:1;fill-opacity:1;"
+       x="145.14287"
+       y="159.79077"
+       id="text4310"><tspan
+         sodipodi:role="line"
+         x="145.14287"
+         y="159.79077"
+         id="tspan4314">flattened tensors:</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;fill-opacity:1;stroke:#000000;stroke-opacity:1;"
+       x="175.71434"
+       y="898.0766"
+       id="text4337"><tspan
+         sodipodi:role="line"
+         id="tspan4339"
+         x="175.71434"
+         y="898.0766">params tensor</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;fill-opacity:1;stroke:#000000;stroke-opacity:1;"
+       x="288.57147"
+       y="815.21936"
+       id="text4341"><tspan
+         sodipodi:role="line"
+         id="tspan4343"
+         x="288.57147"
+         y="815.21936">gradParams</tspan><tspan
+         sodipodi:role="line"
+         x="288.57147"
+         y="840.21936"
+         id="tspan4345">tensor</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 284.28571,810.93366 228.57143,793.79078"
+       id="path4347"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 191.42857,872.36216 180,843.79076"
+       id="path4349"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 522.85714,230.93364 185.71429,205.21935"
+       id="path4351"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 517.14285,269.50506 187.14286,342.36221"
+       id="path4353"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 521.42857,396.64792 187.14286,340.93364"
+       id="path4355"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 521.42857,440.93364 185.71429,483.79078"
+       id="path4357"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 527.14285,625.21935 225.71428,506.64792"
+       id="path4359"
+       inkscape:connector-curvature="0" />
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="M 522.85714,666.64792 225.71428,659.50506"
+       id="path4361"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;"
+       x="801.42853"
+       y="649.50513"
+       id="text4363"><tspan
+         sodipodi:role="line"
+         id="tspan4365"
+         x="801.42853"
+         y="649.50513">conv2 grad weight:</tspan><tspan
+         sodipodi:role="line"
+         x="801.42853"
+         y="674.50513"
+         id="tspan4367">view onto flattened gradParams</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="m 612.85708,640.9336 180,14.2857"
+       id="path4375"
+       inkscape:connector-curvature="0" />
+    <text
+       xml:space="preserve"
+       style="font-size:20px;fill:#000000;fill-opacity:1;stroke:#000000;stroke-width:1;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;"
+       x="791.42853"
+       y="400.93353"
+       id="text4377"><tspan
+         sodipodi:role="line"
+         id="tspan4379"
+         x="791.42853"
+         y="400.93353">conv3 weight:</tspan><tspan
+         sodipodi:role="line"
+         x="791.42853"
+         y="425.93353"
+         id="tspan4381">view onto flattened params</tspan><tspan
+         sodipodi:role="line"
+         x="791.42853"
+         y="450.93353"
+         id="tspan4383">tensor</tspan></text>
+    <path
+       style="fill:none;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
+       d="m 782.85708,403.7907 -180,11.4286"
+       id="path4387"
+       inkscape:connector-curvature="0" />
+  </g>
+</svg>
diff --git a/doc/image/parameterflattening.svg.png b/doc/image/parameterflattening.svg.png
new file mode 100644
index 0000000..ecf4068
Binary files /dev/null and b/doc/image/parameterflattening.svg.png differ
diff --git a/doc/intro.md b/doc/intro.md
index b387235..d0025aa 100644
--- a/doc/intro.md
+++ b/doc/intro.md
@@ -20,7 +20,7 @@ where:
 <a name='optim.example'></a>
 ## Example
 
-The state table is used to hold the state of the algorihtm.
+The state table is used to hold the state of the algorithm.
 It's usually initialized once, by the user, and then passed to the optim function as a black box.
 Example:
 
@@ -39,3 +39,212 @@ for i, sample in ipairs(training_samples) do
 end
 ```
 
+
+<a name="optim.training"></a>
+## Training using optim ##
+
+`optim` is a quite general optimizer, for minimizing any function with respect to a set of parameters.
+In our case, our function will be the loss of our network, given an input, and a set of weights. 
+The goal of training a neural net is to optimize the weights to give the lowest loss over our validation set, by using the training set as a proxy. 
+So, we are going to use optim to minimize the loss with respect to the weights, over our training set.
+
+To illustrate all the steps required, we will go over a simple example, where we will train a neural network on the classical XOR problem.
+We will feed the data to `optim` in minibatches (we will use here just one minibatch), breaking your training set into chucks, and feed each minibatch to `optim`, one by one.
+
+We need to give `optim` a function that will output the loss and the derivative of the loss with respect to the
+weights, given the current weights, as a function parameter.
+The function will have access to our training minibatch, and use this to calculate the loss, for this minibatch.
+Typically, the function would be defined inside our loop over batches, and therefore have access to the current minibatch data.
+
+
+### Neural Network ###
+
+We create a simple neural network with one hidden layer.
+
+```lua
+require 'nn'
+
+model = nn.Sequential()  -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20 -- parameters
+model:add(nn.Linear(inputs, HUs))
+model:add(nn.Tanh())
+model:add(nn.Linear(HUs, outputs))
+```
+
+> If we would like to train on GPU, then we need to shipt the model to *device memory* by typing `model:cuda()` after having issued `require 'cunn'`.
+
+
+### Criterion ###
+
+We choose the *Mean Squared Error* loss `Criterion`:
+
+```lua
+criterion = nn.MSECriterion()
+```
+
+We are using an `nn.MSECriterion` because we are training on a regression task, predicting contiguous (real) target value, from `-1` to `+1`.
+For a classification task, with more than two classes, we would add an `nn.LogSoftMax` layer to the end of our network, and use a `nn.ClassNLLCriterion` loss criterion.
+Nevertheless, the XOR problem could be seen and a two classes classification task, associated to the `-1` and `+1` discrete outputs.
+
+> If we would like to train on GPU, then we need to ship the `Criterion` to *device memory* by typing `criterion:cuda()`.
+
+
+### Data set ###
+
+We will just create one minibatch of `128` examples.
+In your own training, you'd want to break down your rather larger data set into multiple minibatches, of around `32` to `512` examples each.
+
+```lua
+batchSize = 128
+batchInputs = torch.DoubleTensor(batchSize, inputs) -- or CudaTensor for GPU training
+batchLabels = torch.DoubleTensor(batchSize)         -- or CudaTensor for GPU training
+
+for i = 1, batchSize do
+   local input = torch.randn(2)     -- normally distributed example in 2d
+   local label
+   if input[1] * input[2] > 0 then  -- calculate label for XOR function
+      label = -1
+   else
+      label = 1
+   end
+   batchInputs[i]:copy(input)
+   batchLabels[i] = label
+end
+```
+
+
+### Flatten parameters ###
+
+`optim` expects the parameters that are to be optimized, and their gradients, to be one-dimensional `Tensor`s.
+But, our network model contains probably multiple modules, typically multiple convolutional layers, and each of these layers has their own `weight` and `bias` `Tensor`s.
+How to handle this?
+
+It is simple: we can call a standard method `:getParameters()`, that is defined for any network module.
+When we call this method, the following magic will happen:
+
+ - a new `Tensor` will be created, large enough to hold all the `weight`s and `bias`es of the entire network model
+ - the model `weight` and `bias` `Tensor`s are replaced with views onto the new contiguous parameter `Tensor`
+ - and the exact same thing will happen for all the gradient `Tensor`s: replaced with views onto one single contiguous gradient `Tensor`
+
+We can call this method as follows:
+
+```lua
+params, gradParams = model:getParameters()
+```
+
+These flattened `Tensor`s have the following characteristics:
+
+ - to `optim`, the parameters it needs to optimize are all contained in one single one-dimensional `Tensor`
+ - when `optim` optimizes the parameters in this large one-dimensional `Tensor`, it is implicitly optimizing the `weight`s and `bias`es in our network model, since those are now simply views onto this large one-dimensional parameter `Tensor`
+
+It will look something like this:
+
+![Parameter flattening](image/parameterflattening.png?raw=true "Parameter Flattening")
+
+> Note that flattening the parameters redefines the `weight` and `bias` `Tensor`s for all the network modules in our network model.
+> Therefore, any pre-existing references to the original model layer `weight` and `bias` `Tensor`s will no longer point to the model `weight` and `bias` `Tensor`s, after flattening.
+
+
+### Training ###
+
+Now that we have created our model, our training set, and prepared the flattened network parameters, we can train using `optim`.
+`optim` provides [various training algorithms](doc/index.md).
+We will use the stochastic gradient descent algorithm [SGD](doc/index.md#x-sgdopfunc-x-state).
+We need to provide the learning rate, via an optimization state table:
+
+```lua
+local optimState = {learningRate = 0.01}
+```
+
+We define an evaluation function, inside our training loop, and use `optim.sgd` to train the system:
+
+```lua
+require 'optim'
+
+for epoch = 1, 50 do
+   -- local function we give to optim
+   -- it takes current weights as input, and outputs the loss
+   -- and the gradient of the loss with respect to the weights
+   -- gradParams is calculated implicitly by calling 'backward',
+   -- because the model's weight and bias gradient tensors
+   -- are simply views onto gradParams
+   function feval(params)
+      gradParams:zero()
+
+      local outputs = model:forward(batchInputs)
+      local loss = criterion:forward(outputs, batchLabels)
+      local dloss_doutputs = criterion:backward(outputs, batchLabels)
+      model:backward(batchInputs, dloss_doutputs)
+
+      return loss, gradParams
+   end
+   optim.sgd(feval, params, optimState)
+end
+```
+
+
+### Test the network ###
+
+For the prediction task, we will also typically use minibatches, although we can run prediction sample by sample too.
+In this example, we will predict sample by sample.
+To run prediction on a minibatch, simply pass in a tensor with one additional dimension, which represents the sample index.
+
+```lua
+x = torch.Tensor(2)
+x[1] =  0.5; x[2] =  0.5; print(model:forward(x))
+x[1] =  0.5; x[2] = -0.5; print(model:forward(x))
+x[1] = -0.5; x[2] =  0.5; print(model:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(model:forward(x))
+```
+
+You should see something like:
+
+```lua
+> x = torch.Tensor(2)
+> x[1] =  0.5; x[2] =  0.5; print(model:forward(x))
+
+-0.3490
+[torch.DoubleTensor of dimension 1]
+
+> x[1] =  0.5; x[2] = -0.5; print(model:forward(x))
+
+ 1.0561
+[torch.DoubleTensor of dimension 1]
+
+> x[1] = -0.5; x[2] =  0.5; print(model:forward(x))
+
+ 0.8640
+[torch.DoubleTensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(model:forward(x))
+
+-0.2941
+[torch.DoubleTensor of dimension 1]
+```
+
+If we were running on a GPU, we would probably want to predict using minibatches, because this will hide the latencies involved in transferring data from main memory to the GPU.
+To predict on a minbatch, we could do something like:
+
+```lua
+x = torch.CudaTensor({
+   { 0.5,  0.5},
+   { 0.5, -0.5},
+   {-0.5,  0.5},
+   {-0.5, -0.5}
+})
+print(model:forward(x))
+```
+
+You should see something like:
+
+```lua
+> print(model:forward(x))
+ -0.3490
+  1.0561
+  0.8640
+ -0.2941
+[torch.CudaTensor of size 4]
+```
+
+That's it!
+For minibatched prediction, the output tensor contains one value for each of our input data samples.
diff --git a/doc/logger.md b/doc/logger.md
index b7797d2..bef8ba3 100644
--- a/doc/logger.md
+++ b/doc/logger.md
@@ -71,3 +71,9 @@ logger:plot()
 ![Logging plot](logger_plot.png)
 
 If we'd like an interactive visualisation, we can put the `logger:plot()` instruction within the `for` loop, and the chart will be updated at every iteration.
+
+In case we'd like to prevent `gnuplot` to display the plots, we can set the option `logger:display(false)`.
+In this way, plots will be saved but not displayed.
+To restore the normal behaviour, use `logger:display(true)`.
+
+We can set a logarithmic *y* axis with `logger:setlogscale(true)` and reset it with `logger:setlogscale(false)`.
diff --git a/polyinterp.lua b/polyinterp.lua
index 5975981..35317ac 100644
--- a/polyinterp.lua
+++ b/polyinterp.lua
@@ -32,23 +32,23 @@ local function roots(c)
 
    local n = c:size(1)-1
    if n == 1 then
-      local e = torch.Tensor({{-c[2]/c[1], 0}})
+      local e = c.new({{-c[2]/c[1], 0}})
       if nz > 0 then
-         return torch.cat(e, torch.zeros(nz, 2), 1)
+         return torch.cat(e, c.new(nz, 2):zero(), 1)
       else
          return e
       end
    elseif n > 1 then
-      local A = torch.diag(torch.ones(n-1),-1)
+      local A = torch.diag(c.new(n-1):fill(1),-1)
       A[1] = -c[{ {2,n+1} }]/c[1];
       local e = torch.eig(A,'N')
       if nz > 0 then
-         return torch.cat(e, torch.zeros(nz,2), 1)
+         return torch.cat(e, c.new(nz,2):zero(), 1)
       else
          return e
       end
    else
-      return torch.zeros(nz,2)
+      return c.new(nz,2):zero()
    end
 end
 
@@ -60,7 +60,7 @@ end
 local function imag(x)
    if type(x) == 'number' then return 0 end
    if x:nDimension() == 1 then
-      return torch.zeros(x:size(1))
+      return x.new(x:size(1)):zero()
    else
       return x[{ {},  2}]
    end
@@ -95,8 +95,6 @@ function optim.polyinterp(points,xminBound,xmaxBound)
    -- locals
    local sqrt = torch.sqrt
    local mean = torch.mean
-   local Tensor = torch.Tensor
-   local zeros = torch.zeros
    local max = math.max
    local min = math.min
 
@@ -147,10 +145,10 @@ function optim.polyinterp(points,xminBound,xmaxBound)
    xmaxBound = xmaxBound or xmax
 
    -- Add constraints on function values
-   local A = zeros(nPoints*2,order+1)
-   local b = zeros(nPoints*2,1)
+   local A = points.new(nPoints*2,order+1):zero()
+   local b = points.new(nPoints*2,1):zero()
    for i = 1,nPoints do
-      local constraint = zeros(order+1)
+      local constraint = points.new(order+1):zero()
       for j = order,0,-1 do
          constraint[order-j+1] = points[{i,1}]^j
       end
@@ -160,7 +158,7 @@ function optim.polyinterp(points,xminBound,xmaxBound)
 
    -- Add constraints based on derivatives
    for i = 1,nPoints do
-      local constraint = zeros(order+1)
+      local constraint = points.new(order+1):zero()
       for j = 1,order do
          constraint[j] = (order-j+1)*points[{i,1}]^(order-j)
       end
@@ -172,13 +170,10 @@ function optim.polyinterp(points,xminBound,xmaxBound)
    local res = torch.gels(b,A)
    local params = res[{ {1,nPoints*2} }]:squeeze()
 
-   --print(A)
-   --print(b)
-   --print(params)
    params[torch.le(torch.abs(params),1e-12)]=0
 
    -- Compute Critical Points
-   local dParams = zeros(order);
+   local dParams = points.new(order):zero();
    for i = 1,params:size(1)-1 do
       dParams[i] = params[i]*(order-i+1)
    end
@@ -188,46 +183,29 @@ function optim.polyinterp(points,xminBound,xmaxBound)
    if torch.ne(dParams,dParams):max() > 0 or torch.eq(dParams,math.huge):max() > 0 then
       nans = true
    end
-   -- for i = 1,dParams:size(1) do
-   --    if dParams[i] ~= dParams[i] or dParams[i] == math.huge then
-   --       nans = true
-   --       break
-   --    end
-   -- end
-   local cp = torch.cat(Tensor{xminBound,xmaxBound},points[{{},1}])
+
+   local cp = torch.cat(points.new{xminBound,xmaxBound},points[{{},1}])
    if not nans then
       local cproots = roots(dParams)
-      local cpi = zeros(cp:size(1),2)
+      local cpi = points.new(cp:size(1),2):zero()
       cpi[{ {1,cp:size(1)} , 1 }] = cp
       cp = torch.cat(cpi,cproots,1)
    end
 
-   --print(dParams)
-   --print(cp)
-
    -- Test Critical Points
    local fmin = math.huge
    -- Default to Bisection if no critical points valid:
    minPos = (xminBound+xmaxBound)/2
-   --print(minPos,fmin)
-   --print(xminBound,xmaxBound)
    for i = 1,cp:size(1) do
       local xCP = cp[{ {i,i} , {} }]
-      --print('xcp=')
-      --print(xCP)
       local ixCP = imag(xCP)[1]
       local rxCP = real(xCP)[1]
       if ixCP == 0 and rxCP >= xminBound and rxCP <= xmaxBound then
          local fCP = polyval(params,rxCP)
-	 --print('fcp=')
-	 --print(fCP)
-	 --print(fCP < fmin)
          if fCP < fmin then
             minPos = rxCP
             fmin = fCP
-	    --print('u',minPos,fmin)
          end
-	 --print('v',minPos,fmin)
       end
    end
    return minPos,fmin
diff --git a/rmsprop.lua b/rmsprop.lua
index 1eb526d..aa56200 100644
--- a/rmsprop.lua
+++ b/rmsprop.lua
@@ -29,6 +29,7 @@ function optim.rmsprop(opfunc, x, config, state)
    local alpha = config.alpha or 0.99
    local epsilon = config.epsilon or 1e-8
    local wd = config.weightDecay or 0
+   local mfill = config.initialMean or 0
 
    -- (1) evaluate f(x) and df/dx
    local fx, dfdx = opfunc(x)
@@ -40,7 +41,7 @@ function optim.rmsprop(opfunc, x, config, state)
 
    -- (3) initialize mean square values and square gradient storage
    if not state.m then
-      state.m = torch.Tensor():typeAs(x):resizeAs(dfdx):fill(1)
+      state.m = torch.Tensor():typeAs(x):resizeAs(dfdx):fill(mfill)
       state.tmp = torch.Tensor():typeAs(x):resizeAs(dfdx)
    end
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lua-torch-optim.git



More information about the debian-science-commits mailing list