[r-cran-vgam] 45/63: Import Upstream version 0.9-8

Tue Jan 24 13:54:38 UTC 2017

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-vgam.

commit d92069d779416147bd551950d84e0421cbd8f9f5
Author: Andreas Tille <tille at debian.org>
Date:   Tue Jan 24 14:17:03 2017 +0100

    Import Upstream version 0.9-8
---
 DESCRIPTION                      |   34 +-
 MD5                              |  249 ++-
 NAMESPACE                        |   13 +-
 NEWS                             |   38 +-
 R/family.actuary.R               | 4333 +++++++++++++++++++++++++-------------
 R/family.basics.R                |   16 +-
 R/family.binomial.R              |  178 --
 R/family.categorical.R           |  470 ++---
 R/family.censored.R              |    6 +-
 R/family.extremes.R              |  266 +--
 R/family.genetic.R               |  326 ---
 R/family.glmgam.R                |  325 +--
 R/family.nonlinear.R             |    4 +-
 R/family.normal.R                |  543 +----
 R/family.positive.R              |   12 +-
 R/family.quantal.R               |  575 -----
 R/family.rcim.R                  |    2 +-
 R/family.rcqo.R                  |   46 +-
 R/family.ts.R                    |  497 +++++
 R/family.univariate.R            |  322 +--
 R/family.zeroinf.R               |   38 -
 R/vgam.R                         |   51 +
 build/vignette.rds               |  Bin 0 -> 479 bytes
 data/Huggins89.t1.rda            |  Bin 443 -> 442 bytes
 data/Huggins89table1.rda         |  Bin 445 -> 444 bytes
 data/alclevels.rda               |  Bin 550 -> 549 bytes
 data/alcoff.rda                  |  Bin 547 -> 546 bytes
 data/auuc.rda                    |  Bin 246 -> 245 bytes
 data/backPain.rda                |  Bin 488 -> 484 bytes
 data/beggs.rda                   |  Bin 198 -> 196 bytes
 data/car.all.rda                 |  Bin 6972 -> 6969 bytes
 data/cfibrosis.rda               |  Bin 265 -> 264 bytes
 data/corbet.rda                  |  Bin 244 -> 237 bytes
 data/crashbc.rda                 |  Bin 374 -> 373 bytes
 data/crashf.rda                  |  Bin 340 -> 340 bytes
 data/crashi.rda                  |  Bin 491 -> 490 bytes
 data/crashmc.rda                 |  Bin 385 -> 345 bytes
 data/crashp.rda                  |  Bin 376 -> 375 bytes
 data/crashtr.rda                 |  Bin 361 -> 360 bytes
 data/deermice.rda                |  Bin 394 -> 394 bytes
 data/ducklings.rda               |  Bin 561 -> 560 bytes
 data/finney44.rda                |  Bin 210 -> 209 bytes
 data/flourbeetle.rda             |  Bin 344 -> 343 bytes
 data/hspider.rda                 |  Bin 1345 -> 1343 bytes
 data/lakeO.rda                   |  Bin 336 -> 334 bytes
 data/leukemia.rda                |  Bin 329 -> 328 bytes
 data/marital.nz.rda              |  Bin 10452 -> 10440 bytes
 data/melbmaxtemp.rda             |  Bin 4265 -> 4262 bytes
 data/pneumo.rda                  |  Bin 267 -> 266 bytes
 data/prinia.rda                  |  Bin 1229 -> 1228 bytes
 data/ruge.rda                    |  Bin 258 -> 254 bytes
 data/toxop.rda                   |  Bin 474 -> 473 bytes
 data/venice.rda                  |  Bin 976 -> 982 bytes
 data/venice90.rda                |  Bin 8000 -> 8004 bytes
 data/wine.rda                    |  Bin 270 -> 270 bytes
 inst/CITATION                    |   23 +
 inst/doc/categoricalVGAM.R       |  278 +++
 inst/doc/categoricalVGAM.Rnw     | 2325 ++++++++++++++++++++
 inst/doc/categoricalVGAM.pdf     |  Bin 0 -> 735468 bytes
 inst/doc/crVGAM.R                |  480 +++++
 inst/doc/crVGAM.Rnw              | 2247 ++++++++++++++++++++
 inst/doc/crVGAM.pdf              |  Bin 0 -> 496617 bytes
 man/AR1.Rd                       |  198 ++
 man/AR1UC.Rd                     |  101 +
 man/VGAM-package.Rd              |   21 +-
 man/acat.Rd                      |    9 +-
 man/betaII.Rd                    |   28 +-
 man/betaR.Rd                     |    8 +-
 man/betaff.Rd                    |    6 +-
 man/betaprime.Rd                 |    6 +-
 man/binom2.or.Rd                 |    6 +-
 man/brat.Rd                      |    4 +-
 man/bratt.Rd                     |    4 +-
 man/cao.Rd                       |   10 +-
 man/cdf.lmscreg.Rd               |    8 +-
 man/constraints.Rd               |    4 +-
 man/cqo.Rd                       |    7 +-
 man/cumulative.Rd                |    6 +-
 man/dagum.Rd                     |   36 +-
 man/deermice.Rd                  |    3 +-
 man/dirmul.old.Rd                |   11 +-
 man/exponential.Rd               |    2 +-
 man/fill.Rd                      |   31 +-
 man/fisk.Rd                      |   40 +-
 man/genbetaII.Rd                 |   73 +-
 man/genbetaIIUC.Rd               |   97 +
 man/genpoisUC.Rd                 |   81 +
 man/genpoisson.Rd                |   76 +-
 man/iam.Rd                       |   12 +-
 man/inv.lomax.Rd                 |   45 +-
 man/inv.paralogistic.Rd          |   41 +-
 man/is.buggy.Rd                  |  110 +
 man/lms.bcg.Rd                   |    8 +-
 man/lms.bcn.Rd                   |   19 +-
 man/lms.yjn.Rd                   |    8 +-
 man/loglinb3.Rd                  |    6 +-
 man/lomax.Rd                     |   19 +-
 man/micmen.Rd                    |    6 +-
 man/multinomial.Rd               |    6 +-
 man/negbinomial.Rd               |    4 +-
 man/notdocumentedyet.Rd          |   10 +-
 man/paralogistic.Rd              |   42 +-
 man/plotdeplot.lmscreg.Rd        |    6 +-
 man/plotqtplot.lmscreg.Rd        |    6 +-
 man/posbernoulli.t.Rd            |   11 +-
 man/posbernoulli.tb.Rd           |    1 +
 man/posnormal.Rd                 |   16 +-
 man/pospoisson.Rd                |    6 +-
 man/propodds.Rd                  |    6 +-
 man/qtplot.lmscreg.Rd            |    6 +-
 man/rhobit.Rd                    |   14 +-
 man/rrvglm-class.Rd              |    2 +-
 man/rrvglm.Rd                    |    9 +-
 man/s.Rd                         |    1 +
 man/sinmad.Rd                    |   35 +-
 man/sinmadUC.Rd                  |    5 +-
 man/smartpred.Rd                 |    6 +-
 man/sratio.Rd                    |    9 +-
 man/tobit.Rd                     |   23 +-
 man/undocumented-methods.Rd      |   13 +-
 man/uninormal.Rd                 |    1 +
 man/vgam-class.Rd                |    2 +-
 man/vgam.Rd                      |   10 +-
 man/vglm-class.Rd                |    2 +-
 man/vglm.Rd                      |    6 +-
 man/vglm.control.Rd              |    2 +-
 man/vglmff-class.Rd              |    6 +-
 man/wrapup.smart.Rd              |   14 +-
 man/zapoisson.Rd                 |    6 +-
 man/zero.Rd                      |    2 +-
 vignettes/categoricalVGAM.Rnw    | 2325 ++++++++++++++++++++
 vignettes/categoricalVGAMbib.bib |  653 ++++++
 vignettes/crVGAM.Rnw             | 2247 ++++++++++++++++++++
 vignettes/crVGAM.bib             |  516 +++++
 134 files changed, 16315 insertions(+), 4561 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 70dc0e0..51ba3d1 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: VGAM
-Version: 0.9-7
-Date: 2015-03-06
+Version: 0.9-8
+Date: 2015-05-11
 Title: Vector Generalized Linear and Additive Models
 Author: Thomas W. Yee <t.yee at auckland.ac.nz>
 Maintainer: Thomas Yee <t.yee at auckland.ac.nz>
@@ -9,23 +9,25 @@ Suggests: VGAMdata, MASS
 Description: An implementation of about 6 major classes of
     statistical regression models. At the heart of it are the
     vector generalized linear and additive model (VGLM/VGAM)
-    classes.  Many (150+) models and distributions are estimated
-    by maximum likelihood estimation (MLE) or penalized MLE, using
-    Fisher scoring. VGLMs can be loosely thought of as multivariate
-    GLMs. VGAMs are data-driven VGLMs (i.e., with smoothing). The
-    other classes are RR-VGLMs (reduced-rank VGLMs), quadratic
-    RR-VGLMs, reduced-rank VGAMs, RCIMs (row-column interaction
-    models)---these classes perform constrained and unconstrained
-    quadratic ordination (CQO/UQO) models in ecology, as well
-    as constrained additive ordination (CAO). Note that these
-    functions are subject to change, especially before version
-    1.0.0 is released; see the NEWS file for latest changes.
+    classes. Currently only fixed-effects models are implemented,
+    i.e., no random-effects models. Many (150+) models and
+    distributions are estimated by maximum likelihood estimation
+    (MLE) or penalized MLE, using Fisher scoring. VGLMs can be
+    loosely thought of as multivariate GLMs. VGAMs are data-driven
+    VGLMs (i.e., with smoothing). The other classes are RR-VGLMs
+    (reduced-rank VGLMs), quadratic RR-VGLMs, reduced-rank VGAMs,
+    RCIMs (row-column interaction models)---these classes perform
+    constrained and unconstrained quadratic ordination (CQO/UQO)
+    models in ecology, as well as constrained additive ordination
+    (CAO). Note that these functions are subject to change,
+    especially before version 1.0.0 is released; see the NEWS
+    file for latest changes.
 License: GPL-2
-URL: http://www.stat.auckland.ac.nz/~yee/VGAM
+URL: https://www.stat.auckland.ac.nz/~yee/VGAM
 NeedsCompilation: yes
 BuildVignettes: yes
 LazyLoad: yes
 LazyData: yes
-Packaged: 2015-03-06 09:11:02 UTC; tyee001
+Packaged: 2015-05-11 03:49:53 UTC; tyee001
 Repository: CRAN
-Date/Publication: 2015-03-06 10:38:52
+Date/Publication: 2015-05-11 11:14:54
diff --git a/MD5 b/MD5
index 069a649..6d4fd77 100644
--- a/MD5
+++ b/MD5
@@ -1,7 +1,7 @@
 66414b6ed296192426033f4ac29a6af2 *BUGS
-d7c7e332462be2810169b8dd45f28660 *DESCRIPTION
-58a9357fcae44be200423c8bd8f156b6 *NAMESPACE
-0fa7e63737588c9dcbc71f7d3dc964a1 *NEWS
+6506758e0e7e02c861ae3b7ca8e214b7 *DESCRIPTION
+febc599169d12ea006018ae570d5918b *NAMESPACE
+a7c3248f29ee5bcb29fd6559ba66325c *NEWS
 46f97e789f77839767fc3d3511d2f990 *R/Links.R
 b6b017bdea768a643afc8171516d193b *R/aamethods.q
 4ffc1530ca8113d2f2d8b0d5cc1db282 *R/add1.vglm.q
@@ -16,38 +16,37 @@ ce3d85bf00ad08175321e2098ae87462 *R/cao.fit.q
 9a4e3479392194fbe0c6e55cacb03f62 *R/cqo.fit.q
 d411a1bf3bfbe7057b4211255c33ba53 *R/deviance.vlm.q
 54b928344dc9efab031bf3e83d04f21f *R/effects.vglm.q
-4147f1613b1576d3ecefb7ab744a10d7 *R/family.actuary.R
+73607c9675d480649795248cf79816db *R/family.actuary.R
 0a2e88e04cab2be01307a0a27ed659f7 *R/family.aunivariate.R
-f92ab9ae022219eebb512a18fec30f9f *R/family.basics.R
-e2ff7c9d8835dd42e6dd608c7acf40cc *R/family.binomial.R
+267ab38b53898ea5bd37c5a8e090d3e2 *R/family.basics.R
+7006c9efcd3263c79ca7c13366cfa238 *R/family.binomial.R
 be15ddf47d35b2ce3de5d69b9acbfbe9 *R/family.bivariate.R
-55b5b0aa2ad15e68615b495b38dfa616 *R/family.categorical.R
-2a1cc1122243a38c4ee678d7692293e6 *R/family.censored.R
+9e76ec4bc461e0c06e6069054a060f54 *R/family.categorical.R
+6cf1f264328d97878e0d069ea7fa48b5 *R/family.censored.R
 0c13f3a38a685b978dde42ace40e55e8 *R/family.circular.R
 b9fedec9d690c434a69d89c9e6845bb7 *R/family.exp.R
-e74415fd99041e7645255d20ffd58ee2 *R/family.extremes.R
+54d3654ea2ec5f5b11428c672ad11c03 *R/family.extremes.R
 251b551aaf906c754d9e75342e7ea1af *R/family.functions.R
-5586e8d6e71cb830f2c6b852a10b3beb *R/family.genetic.R
-842bd10d4a8da3b0d367517c272eda56 *R/family.glmgam.R
+8a51bab8f687ffa92c116663af075ba3 *R/family.genetic.R
+281657d44d72ef287c94653979aa5aa5 *R/family.glmgam.R
 688abce2e4312da5c9e03a6fb2cdf935 *R/family.loglin.R
 5679a8a30b54ac8f66dd945b2d1ccd2a *R/family.math.R
 f3a38cecabef0846e4f0c7bdb5c9ee81 *R/family.mixture.R
-0d5e4c835d8bab66576a0b66830afbf9 *R/family.nonlinear.R
-22b5c53812a48db4711e61da40f5aba4 *R/family.normal.R
+f1a35abe66015033d743f525578179d1 *R/family.nonlinear.R
+1c0d93ee5dc8d6a3aa044c449ae6e33e *R/family.normal.R
 a3ea06d6b5e0c41585333addbe62ffe0 *R/family.others.R
-1663b0fa1af37724fd51607db5699d01 *R/family.positive.R
+88039b35018615a28e631b670d2e971b *R/family.positive.R
 9041d6d34c26ffff3f40e816b3263e29 *R/family.qreg.R
-2cf55e062652004b40b734273199f35d *R/family.quantal.R
-03397070df548d5a86ca5930b156073c *R/family.rcim.R
-990f65032a38762d9cebec6f66794235 *R/family.rcqo.R
+c86c5bac3606eb3ba284b4b6854a7684 *R/family.rcim.R
+eaf63cac3cffe7fd0bd9352fe8223a60 *R/family.rcqo.R
 ffd541fd4b00179e6595b9e6926f3291 *R/family.robust.R
 d8845f8b8bf363485bcf7f530617358f *R/family.rrr.R
 943ff0caa6e0cf7294b32a0a8dc1ad98 *R/family.sur.R
 ebb9d8fde5a537c58b25f47482cad8d2 *R/family.survival.R
-d66b2f805fff817efd32ba5c5c7a56aa *R/family.ts.R
-bca7c0c7874d0c5601dfdfc25d475fee *R/family.univariate.R
+fe810bd8c50c5b6102e67afc15e13b68 *R/family.ts.R
+fcf0a36d2fe04e88d2dd74de20e7a0bc *R/family.univariate.R
 8d7d5df3e005750453f8ed0977c0c4f6 *R/family.vglm.R
-9a0ddbb47ed714430e122b85e8e254b9 *R/family.zeroinf.R
+be19cb2f6dba872ae43dd12a5d21d620 *R/family.zeroinf.R
 e5a738b6ba3f59a3962eb089e56e5786 *R/fittedvlm.R
 d706ca44527adda488800e2834d5d278 *R/formula.vlm.q
 1c7d28893d43c88a934731219098fd5c *R/generic.q
@@ -77,7 +76,7 @@ cf62bdb183fe009cd47316eacbe3b14e *R/s.q
 ea860d4429fbcfb1c8e494a198e72adc *R/summary.vgam.q
 24bddb3d9264aa2079dc0bc1949dac08 *R/summary.vglm.q
 8233ae7e692d6254ac739541a4774109 *R/summary.vlm.q
-3d8dbd2d0163b95334a891c45b66b468 *R/vgam.R
+f53cc75eb61ade505b6ee2d55f0ac377 *R/vgam.R
 3a7ea81a3f0c6509e71466cfae4c108c *R/vgam.control.q
 f6da05ed223f0cac5b7731c8f5da2095 *R/vgam.fit.q
 c7836fc6514f090c9852ef7427b68a95 *R/vgam.match.q
@@ -87,57 +86,58 @@ c7836fc6514f090c9852ef7427b68a95 *R/vgam.match.q
 d3c11b3c2876d98a37ea6f4a5658a4a6 *R/vlm.R
 50d41f729a51f21ac03d717a33d708fb *R/vlm.wfit.q
 9c9d0afc47501544ea1da2703e60b4e9 *R/vsmooth.spline.q
-2cdabbff91d4f47a58705b2fff199298 *data/Huggins89.t1.rda
-3faa9073b7ae52defc01fde39527c39a *data/Huggins89table1.rda
+d00804c5f46ec827f32f4eb64029553e *build/vignette.rds
+3870f8993c4bea0c5192b50640e10c84 *data/Huggins89.t1.rda
+a613e4fafb1a688f5aa446073014b01f *data/Huggins89table1.rda
 d89f69ab78bc3c7a526960c8bdb9454b *data/V1.txt.gz
-941fce2d969b7deed268fe512bc1bf64 *data/alclevels.rda
-5d2d8a0d2e992a2772e31175a9647b64 *data/alcoff.rda
-1a1c61f5e5286fb5dd35e317d1bef268 *data/auuc.rda
-912079f646e2b98ef4055a5c043e04c3 *data/backPain.rda
+02b718733148b399711af0138fabfb99 *data/alclevels.rda
+c6b2edafb26be2f1f0e55be907faa271 *data/alcoff.rda
+3acc92474abbbfe3dc30752e370913ff *data/auuc.rda
+fb8c3190d2030185534ec3d67d1048da *data/backPain.rda
 4fa3eac69a59ea5ed0123d54528e5595 *data/backPain.txt.gz
-027577fedb0d18b412e1148516500a53 *data/beggs.rda
+26dafe8e06fbca22ca07861091270805 *data/beggs.rda
 e039fd36c33b359830b2ac811ca7fc49 *data/bmi.nz.txt.xz
-c10d000ab6edd6d7b8fffde46371a8af *data/car.all.rda
-e521415c5ee7141dbf97fd5101c20047 *data/cfibrosis.rda
+ceaf646e909dcc16b9bd89edcb058e8e *data/car.all.rda
+642479ab63be9250e1950a2b7499d876 *data/cfibrosis.rda
 b29c1a4125f0898885b0a723442d6a92 *data/chest.nz.txt.bz2
 4df5fd8b5db905c4c19071e1e6a698a4 *data/chinese.nz.txt.gz
 3cb8bc8e1fc615416f0c8838a50b3f51 *data/coalminers.txt.gz
-eed6cd50d7aaef10522b1085fec41c11 *data/corbet.rda
-890f464c7e30620440ac65c265ac9b40 *data/crashbc.rda
-7cf569633b8dafb9a4f86b134bd7947a *data/crashf.rda
-145640d0af2b70d71607392c2767c53e *data/crashi.rda
-4774a3d5c2f62eb0beb038462196c53a *data/crashmc.rda
-1d3c4a6ebff20d079a6a3ed3c6fbdc74 *data/crashp.rda
-c6df5decc6ce502fecc236c65248eede *data/crashtr.rda
-2360553382387ee92888f6ada418d819 *data/deermice.rda
-5117494b87c6dbac229830f421734f85 *data/ducklings.rda
+eb5202e2ea25642262ab857b7d514bf8 *data/corbet.rda
+aecd77d50f22e2c4f482b7c22b7edb49 *data/crashbc.rda
+7c3b7f5539427a0da536c31003d06540 *data/crashf.rda
+08808e8f40be4371114793765f431c48 *data/crashi.rda
+7a3e0f5a401ecc681e47b98b921f4cee *data/crashmc.rda
+36b8fbbafcbde4304a392d592bbfa99c *data/crashp.rda
+1e427d92814a65f98fd1d8446f37fc06 *data/crashtr.rda
+24fc8bf721df45e0e2b861c2b2d50696 *data/deermice.rda
+b5042793e4d3cde7b66c78403ef57074 *data/ducklings.rda
 08e87bb80a2364697b17ccec6260387c *data/enzyme.txt.gz
-67e2d5489a51805dcb70a8ed17113be1 *data/finney44.rda
-3f07cf57e178c098bb51d3bd9d8d00d5 *data/flourbeetle.rda
+87a43a19f87ca93b45a672cd9e549c8c *data/finney44.rda
+35f3600d34f7376e7b0da2c151c997dc *data/flourbeetle.rda
 3125b7b004c671f9d4516999c8473eac *data/gew.txt.gz
 bec512b2d2d680889c9b71c7b97dbffd *data/grain.us.txt.bz2
 9dcb8cdf026f5468fa70f8037fd72a0b *data/hormone.txt.bz2
-9b109ac6270bf7de6eca8ae8108a3fde *data/hspider.rda
+e87b6cf3f2bd1413111f075b69651512 *data/hspider.rda
 dffe21fbabf645127bccc3f3733098a7 *data/hunua.txt.bz2
-84961eee0908ed3ae95bccadf04134e8 *data/lakeO.rda
-4072c9dc17de0e500971b56398f26429 *data/leukemia.rda
+b86e36a995219a44437a844bc5b8a70c *data/lakeO.rda
+41ad3d75f07b78392324f1c66a03fdb2 *data/leukemia.rda
 aba4885e0eeda8ee887a422fee01e02a *data/lirat.txt.gz
 7d7e59127af09903659c5727d71acc56 *data/machinists.txt.gz
-9b042f23117458dcb00cfe37fc77c232 *data/marital.nz.rda
-3253d715eb3ed463af03fb54f365ef13 *data/melbmaxtemp.rda
+40d0a1b7719f9a76b76b332be23ae83a *data/marital.nz.rda
+9b9739d472f4c9e15d2f175ad7908437 *data/melbmaxtemp.rda
 56490506642d6415ac67d9b6a7f7aff6 *data/olym08.txt.gz
 fe334fe839d5efbe61aa3a757c38faeb *data/olym12.txt.gz
 3ed63397c4a34f3233326ade6cfd1279 *data/oxtemp.txt.gz
-b34cc9fc94e2f742156da5bd52dfd14d *data/pneumo.rda
+ac721c748208e66ad2068d61c3528179 *data/pneumo.rda
 0cd66b7ce4e596ad3ca75e1e2ec0a73c *data/prats.txt.gz
-ece7a33214e3d99d751a7bc0b320e79e *data/prinia.rda
-691569018c9e20b0ec911f9306f5b407 *data/ruge.rda
-e244f038a340e4613875602c82aac0f8 *data/toxop.rda
+a20303b93a38645c9310e6ce51d3db95 *data/prinia.rda
+dae1c1a61019b400f1ee8f21796fdda4 *data/ruge.rda
+bc3d45e95e47f05c97d2b7e542ba5719 *data/toxop.rda
 1b059fc42c890bf89f2282298828d098 *data/ucberk.txt.gz
-1ce9f1e51fb745d0e13b98f6cb74e121 *data/venice.rda
-755a35ccd4c237fac6ef2ae5c0a0e914 *data/venice90.rda
+e46b189f709219e3d6edb70094adcc43 *data/venice.rda
+fb2409f3985d6bb29e62b74641b92e0a *data/venice90.rda
 e990ca4deea25b60febd2d315a6a9ec4 *data/waitakere.txt.bz2
-7b2f5d6ccd5cf362280cdb3b9998f91a *data/wine.rda
+d074784b6dfe8d86e15c45a289148d09 *data/wine.rda
 81f7f0844a196dc48e91870c4cfafc99 *demo/00Index
 9327dcfa4015cf47172717bac166f353 *demo/binom2.or.R
 b9f0af62a654d77a3052997eb4cc15e2 *demo/cqo.R
@@ -145,12 +145,20 @@ b9f0af62a654d77a3052997eb4cc15e2 *demo/cqo.R
 541e1a831b9abf6a2a5bfe193b03b1b4 *demo/lmsqreg.R
 ab8081763fe2144558be25f3a154327b *demo/vgam.R
 65570d10948785994d70d817f574bd96 *demo/zipoisson.R
-60616e1e78fe61c1fd4acdf0d3129747 *inst/CITATION
+01decdf5ff5cdc5cbeca6c4c42244e58 *inst/CITATION
+4ff0e35d38b3c5bb38f1f7232b9af863 *inst/doc/categoricalVGAM.R
+bfa11dbdbff271fb20342560f2bacd53 *inst/doc/categoricalVGAM.Rnw
+162de766cebba3dde055eae8e91766ab *inst/doc/categoricalVGAM.pdf
+1a11a44235dac378645ca48db2c58495 *inst/doc/crVGAM.R
+8b5466d6da7f3c0e97030bd8364a9ca4 *inst/doc/crVGAM.Rnw
+6c712d6436cedaccc8a5f42e1e6ca1b8 *inst/doc/crVGAM.pdf
 9b97006cdc82d3a0c0ace3d43c9758de *man/A1A2A3.Rd
 cc9d465fc9db15abb65061e0b41a0f9e *man/AA.Aa.aa.Rd
 26a120083d1d9d77ac0a5193d0c186b9 *man/AB.Ab.aB.ab.Rd
 c6c2a703e0f76c8b0f9e0a7d36f13386 *man/ABO.Rd
 38647708600610216a454c61450810ff *man/AICvlm.Rd
+028b3edf4cf6f4de340796b6635343a3 *man/AR1.Rd
+e7f6a39f61b6403d60cf99f0e17f3dc1 *man/AR1UC.Rd
 0f4a799e95b245cfa0b5a37280a446ef *man/BICvlm.Rd
 32daae0afb71eae3cdeefc042f4241c6 *man/Coef.Rd
 7b7ad4188c687ac8361fa1176697ce88 *man/Coef.qrrvglm-class.Rd
@@ -174,8 +182,8 @@ ea581f4824e64871d53376a9751c8a2e *man/SURff.Rd
 6ed5239b716d4aaef069b66f248503f0 *man/SurvS4.Rd
 21dc3918d6b5375c18dcc6cc05be554e *man/Tol.Rd
 6930cfc91e602940cafeb95cbe4a60d3 *man/V1.Rd
-f53b5bbb06501b7531361f0914d79a78 *man/VGAM-package.Rd
-f27b784569a22f080ff1ded6d9bbd17a *man/acat.Rd
+caae3db8a87eac92853fc85446377c7c *man/VGAM-package.Rd
+c7ed6cfd12ca60f833e67541f00ad317 *man/acat.Rd
 b346a61c9c3965d8ca97f3c98d9cacc0 *man/alaplace3.Rd
 8c0d8e4d9e634a0c2539e3a052afa9cc *man/alaplaceUC.Rd
 8e181f4f03b718c6c9825ea3b6c4b8d6 *man/amlbinomial.Rd
@@ -189,16 +197,16 @@ bcddb8c1df8893cf14a4400ee5dee6df *man/backPain.Rd
 65a5426c021e0a6c90731c14786a3395 *man/benfUC.Rd
 afa1ccbe6dd6e769dc1bbbc5702148dd *man/benini.Rd
 12d28242eea600b3e6f52db5d71d871f *man/beniniUC.Rd
-f4cabec88ec30505db5785b1aaf1eb48 *man/betaII.Rd
-d27525262d9c6975b15a77219afeb362 *man/betaR.Rd
+dbf1d7ee255da6a85fbafbc84f2c0650 *man/betaII.Rd
+3a31e0a304c2ccab10469d866ae8acdb *man/betaR.Rd
 6d202361c5c1981d29c597fd716050f0 *man/betabinomUC.Rd
 bbb0ddef9113d1b8d1e036ac66f9bb87 *man/betabinomial.Rd
 4e9c0e3075be1050db8ad3fe1e8dce6e *man/betabinomialff.Rd
-d4fbb7ebcc599765b2e0df4ff840876f *man/betaff.Rd
+29d0247eaef9f6447e173c8ac994acbd *man/betaff.Rd
 4b590ee6208b2f3025109b82c1f6d67c *man/betageomUC.Rd
 725a8c9d8b4a9facb0c3cb815d75266b *man/betageometric.Rd
 7553029f69c2be7dbb20c864b97102e5 *man/betanormUC.Rd
-5a0a047bcd18649d5076999057bd1d49 *man/betaprime.Rd
+4a10d6e7f4fd8eb99d7c0b14e1845433 *man/betaprime.Rd
 f41bc1b37620bca37ba4d2f16fdae05d *man/biamhcop.Rd
 495e32601db2c4f22462811e27436c9d *man/biamhcopUC.Rd
 003ba5eb60e8e27f6c9a022ae1e336d1 *man/biclaytoncop.Rd
@@ -212,7 +220,7 @@ faeb492060203a0d89d5cf4f40b0e4c4 *man/bifgmcopUC.Rd
 7a1c045834b0bd9de92a4aa97f52ab3c *man/bigumbelIexp.Rd
 ffcbfc72f334094f6dfd4842ab522e96 *man/bilogisUC.Rd
 e913aabb8e3808c637d264f28c90bf52 *man/bilogistic.Rd
-cebfba7c59c17329f50eb34c40c0b810 *man/binom2.or.Rd
+c7a7e2b700c4358fb65489876ead2d79 *man/binom2.or.Rd
 129f6be1cf1a039f137e5ef3da503fca *man/binom2.orUC.Rd
 a8cc7cbfa4c21672956a187c4ffba22d *man/binom2.rho.Rd
 20cb304b16a9073488621b104549e361 *man/binom2.rhoUC.Rd
@@ -231,20 +239,20 @@ f0816002d3fb698dbc17a6e55d91c18f *man/bistudentt.Rd
 81a2433effb7547679702256a5536b04 *man/bmi.nz.Rd
 214e2f5b25156e937a5af65d1e6e1b58 *man/borel.tanner.Rd
 a25a019943aa0d82d35d6c46ec726c67 *man/bortUC.Rd
-b727c9787c7fcfe1e3dc19f92f6a4cb1 *man/brat.Rd
+37886ad1c2aea4c4cee78bad74c92d5d *man/brat.Rd
 4b158e93b6c981f016ed121e987c50b7 *man/bratUC.Rd
-5ee1485749d235a2d1aa1be8849accc7 *man/bratt.Rd
+c7322bedb2b3d8ba4e7c0a19a2098ced *man/bratt.Rd
 f640961a0c1a206ce052a54bb7b4ca34 *man/calibrate-methods.Rd
 b121ffb4e604644ef7082d777b4411df *man/calibrate.Rd
 22f73cce0070ea9bb785567af837e14f *man/calibrate.qrrvglm.Rd
 22e9a881f2f077f7e01e1dde9043dc7d *man/calibrate.qrrvglm.control.Rd
-8a71703f9846bdda282e59f67832e941 *man/cao.Rd
+afbb7b695f652a4bccfb0e6cb80a8739 *man/cao.Rd
 4005c8bdb2b1a2e7d0ff5f1a800f4224 *man/cao.control.Rd
 10f72289cb33f5f734d39826893a280b *man/cardUC.Rd
 53ff522ff00f7bcfe443309762441150 *man/cardioid.Rd
 f4674b1787a58c87fbabdb369dc8a1ca *man/cauchit.Rd
 d361f0253fb328f70a716c09fd597fdc *man/cauchy.Rd
-9035d92ae411d748c08d35086d5d3be1 *man/cdf.lmscreg.Rd
+4973007c9a18278e2130994b68a2e47d *man/cdf.lmscreg.Rd
 6c41f48884c2e92fa7842266d02a5a6d *man/cens.gumbel.Rd
 f96d45016bcca1b72249a3548520a2cf *man/cens.normal.Rd
 d5293110487b396f767fbd2224774b58 *man/cens.poisson.Rd
@@ -259,20 +267,20 @@ b1985e33c967fdddf79e10cbb646b974 *man/coalminers.Rd
 e492f5f148514df05cc4bf101b7505e2 *man/coefvlm.Rd
 1409b01c52bad85c87e9740fb003699a *man/concoef-methods.Rd
 e9a2bf379aac3e4035b8259463a5374b *man/concoef.Rd
-e9cef803313f5a964f99b76995dd235f *man/constraints.Rd
+30bff4a27550ca7e9a699e5b5cba007e *man/constraints.Rd
 523567ea78adcaaeab2d9629b2aa2cf2 *man/corbet.Rd
-0a020921c3d1686d817fc73eb9067cff *man/cqo.Rd
+5314268c4257680ac10edf26e9222944 *man/cqo.Rd
 8b1b3a39d15fe353a7eceec9f6a327d4 *man/crashes.Rd
 ca3db2c26abb8120651e1d179ac6fbb3 *man/cratio.Rd
-db26c5eb26f1a3cc591502bca797489f *man/cumulative.Rd
-a7ccaa9a82bc79f77514dca45f2d1100 *man/dagum.Rd
+21c6374195045745b28c854b70312efa *man/cumulative.Rd
+f2ce3a3f6ad52abbbb75eddf5baf1893 *man/dagum.Rd
 12192f19751804a540e6d0852e29726c *man/dagumUC.Rd
-8fa6a29bde444a45be31b3d8979afc00 *man/deermice.Rd
+d5439d37875ba50990406c5c5f8595eb *man/deermice.Rd
 dbebc9542906034905fe1137e86a1256 *man/deplot.lmscreg.Rd
 0e0f2e7368fa906e837d8432bb3cfb36 *man/depvar.Rd
 bffbb780b54bd3c8c76cf546ec87e4a0 *man/df.residual.Rd
 276aebb1ed4a71af9f9096e9f9c4515d *man/dirichlet.Rd
-6ea8579fe8a75bec917b2c26019c9e0a *man/dirmul.old.Rd
+17afdbe28f8a8d93725e2747c2daa303 *man/dirmul.old.Rd
 7a63063be35f8510ea5198556bf1c192 *man/dirmultinomial.Rd
 ed927db10e5cf69502d5485f300a9aa7 *man/double.cens.normal.Rd
 7557104d36b3087ed4d34345bdab7017 *man/double.expbinomial.Rd
@@ -291,16 +299,16 @@ f39dd0be93d3e24eda78f08310ff4b2f *man/expgeometricUC.Rd
 59e10a79028eef76da5bdc868e6bb38e *man/explink.Rd
 89ce96662b931aa17182192618085ed0 *man/explogUC.Rd
 e51211ad603eeecbe72cd7f6db0e76e0 *man/explogff.Rd
-4e490ef9e08ab74a3af274a720a988d3 *man/exponential.Rd
+fdbbdfc5e8f244b0ec6759aa8894bced *man/exponential.Rd
 f3cca02f31b091259c7a8cf690f93148 *man/exppoisson.Rd
 79f43e2f29b5cca093569fd81aea3abe *man/exppoissonUC.Rd
 0712cad8a071a24a0676bbea9b09094c *man/felix.Rd
 c5d0b237e64605d008502da6b8f4f64c *man/felixUC.Rd
 09fc6553edb037bc708396a30fe3c8f2 *man/fff.Rd
-741f6474d688a5bc6ed61042d9a12eb6 *man/fill.Rd
+9d679a175cfe7165b89906441e5efebc *man/fill.Rd
 b929e2ab670eb59700bc4a1db07bbbc0 *man/finney44.Rd
 5fd279ebc2d6ec3df74557cdca6940c0 *man/fisherz.Rd
-7f7753b3325004cdfcc7cc145115fc99 *man/fisk.Rd
+c75c1ffce51c2de0fec04f54bbaf466b *man/fisk.Rd
 5966dbc9e396bd3cbb15b2650d885177 *man/fiskUC.Rd
 c75d3ae0a8669fed4a71f54b8be64266 *man/fittedvlm.Rd
 742b72298fd6b2ca944812681ad625a6 *man/flourbeetle.Rd
@@ -316,10 +324,12 @@ c173815d95bd553fa952911bd2ca71aa *man/gammaR.Rd
 3558584dfba54663dc4de34e21cc9aa9 *man/gammahyperbola.Rd
 edd2c4cefb99138667d2528f3d878bad *man/garma.Rd
 e0fdd50e95e43075ac79c911f05c0b61 *man/gaussianff.Rd
-4332a0e0d6a9585eb5a222e923f03464 *man/genbetaII.Rd
+a666a1118f74b8bff779fa283e483cbc *man/genbetaII.Rd
+45999add2a92fc243422b25bfc8f8198 *man/genbetaIIUC.Rd
 59fb27b205e8ff10daca7d8d37a5d3f1 *man/gengamma.Rd
 588e10d5c3fd9ff745c679435c5f2457 *man/gengammaUC.Rd
-231a6af41c4b7ed78907ffb0542cac18 *man/genpoisson.Rd
+0a765eb0392ad75d94c0b0f0c517f9fb *man/genpoisUC.Rd
+296e471d13459805b0cb9d98e2de2a00 *man/genpoisson.Rd
 15429ac99e67921a77cb78e47210d7fc *man/genrayleigh.Rd
 2b8ec736188410b1502ce23ba1852463 *man/genrayleighUC.Rd
 94c6189883bf1848735e23156e25cdc0 *man/geometric.Rd
@@ -350,14 +360,15 @@ d3df700bb2a4f9ae85b13abe7ffea123 *man/hunua.Rd
 e3a9765eba431e1f55e2fdc11ff52b4b *man/hypersecant.Rd
 2bf15af91bb331e94b94dd69050589c0 *man/hzeta.Rd
 04198bb4e2bf6a230e17b4e84251887f *man/hzetaUC.Rd
-c4b8cf96eae282e0746bf8126231a7f5 *man/iam.Rd
+7f0e64784914835bb11c6f43643aae15 *man/iam.Rd
 c2796439b1c32144c3a1ffcbd7f6da72 *man/identitylink.Rd
 857cbf6f8c5970a18867fe560f275f6f *man/inv.binomial.Rd
 745b6c5557776c23bed67b268f03f432 *man/inv.gaussianff.Rd
-ef005dcdf1e63aa98280b927adcb7820 *man/inv.lomax.Rd
+c64f106b3cd1010819641b86b926440a *man/inv.lomax.Rd
 4492e4a4f91d5fe7d4ec75a128bf4e07 *man/inv.lomaxUC.Rd
-43bff747dfa6b3c2af61853823f5b0da *man/inv.paralogistic.Rd
+af702822d0c222741dc25184e3a6a134 *man/inv.paralogistic.Rd
 6f740a890a174ff4ff3879fa8719ec58 *man/inv.paralogisticUC.Rd
+b2ce02b5af6709a1b2d294fcf254d393 *man/is.buggy.Rd
 a501c3d3de4a744a0e0cdbc0673b543d *man/is.parallel.Rd
 e68a1f19e55cd95da21eec0b119c0ad8 *man/is.smart.Rd
 1b33dcd08e9f444146fb7fe03a425add *man/is.zero.Rd
@@ -382,9 +393,9 @@ d3fb68f03d6cc946da6b48772bea3297 *man/lgammaUC.Rd
 20873e71a07de6b42d07fc6e0008ea05 *man/lino.Rd
 f56802c0fe3ec1b61cd313c370b9ff58 *man/linoUC.Rd
 b5dfa4faa955b15ebade0a3bdc8f93fe *man/lirat.Rd
-1ecc473854215d5c5209ea54ad206370 *man/lms.bcg.Rd
-194627e9dc632ec82df59b116971582a *man/lms.bcn.Rd
-eea220ccf6de89caf996cf8edf346064 *man/lms.yjn.Rd
+913facfe3f915290ad154061ccd5accb *man/lms.bcg.Rd
+77ad928a6aa56adf1cfed93e6358369d *man/lms.bcn.Rd
+b0a070fdafa635bab794c5cf3ac88ba0 *man/lms.yjn.Rd
 20824c03fc9d40f749ca42d60805124d *man/log1pexp.Rd
 34cbd6bc583c55d2acd79a46a66e064e *man/logF.Rd
 06a1ce6e6f01fca7e7037eabc6cf3dad *man/logF.UC.Rd
@@ -398,11 +409,11 @@ e956c4aae749e9034b7cf7fdf8661a64 *man/logc.Rd
 8822ba593955e90e63a8779aaf74d29b *man/loglapUC.Rd
 0f6dd1a9c0fc77dd6521af733693f52e *man/loglaplace.Rd
 49d5183ac04d29b5427b9159fa101dc3 *man/loglinb2.Rd
-a569b31d918209e8b54a62e8594a3268 *man/loglinb3.Rd
+22ad47055f4be0a62a6f418b0024c911 *man/loglinb3.Rd
 f5f48817604ad9b59304d4fb571359dd *man/loglog.Rd
 a56f1a0e81c3dfdc8620c4cef1b87450 *man/lognormal.Rd
 e859c980e26eb3e483d0f3648b502d13 *man/logoff.Rd
-ad3e8f3b35bfbd792e8a8cb6105a2121 *man/lomax.Rd
+1a96739cc02213e306e77d33c5dec358 *man/lomax.Rd
 dbc62e15528097b42fb64d49be5f22f3 *man/lomaxUC.Rd
 ac49f1d5575295a237328c2de3cbab10 *man/lqnorm.Rd
 fc9ca61a4c495cf650cba5a458b0dae1 *man/lrtest.Rd
@@ -419,7 +430,7 @@ c7fcbd341df77f76494a92836715789a *man/maxwellUC.Rd
 bd8250aaa1bc17c017c0b201642882dd *man/mccullagh89.Rd
 c007d94fac5c46a26baae899a04aaf9d *man/melbmaxtemp.Rd
 4d8d0f37dc8249d00e52283764534e98 *man/meplot.Rd
-3fe36bd9f77590dc17a9a2e9380dc0bd *man/micmen.Rd
+b1d15dda4a8aae6193ce4283ec7251bd *man/micmen.Rd
 5eed4788f6366c1814ea5c9a250424e8 *man/mix2exp.Rd
 232e7ac50df002b7c0a1d7ba70fd0bbf *man/mix2normal.Rd
 364791d9a909112b530deda4135f30f7 *man/mix2poisson.Rd
@@ -427,20 +438,20 @@ c007d94fac5c46a26baae899a04aaf9d *man/melbmaxtemp.Rd
 3d875985c00b26af9cb66e0ae0e3aef8 *man/model.matrixvlm.Rd
 199ef13d300d6fe1210885af1647c13b *man/moffset.Rd
 764cafd682a3364a495cdf243e3a528e *man/multilogit.Rd
-d2ecbe308776f1e5065b0399959e2d99 *man/multinomial.Rd
+900df9b977e6f2d0726620e2d7f6469e *man/multinomial.Rd
 c3248f9d509aecb0726bd0e6e36a13d4 *man/nakagami.Rd
 61319d756fcb8509696cc1aa55ae4ed2 *man/nakagamiUC.Rd
 7669f124f04f2912a3b004d509f9d15d *man/nbcanlink.Rd
 869ec0706195a833c57365fc8507c1bf *man/nbolf.Rd
-e83e0c32f33d41bd3c3d6816d81acb39 *man/negbinomial.Rd
+fd9adceacaf591a824a2eebd442000af *man/negbinomial.Rd
 01e4d3c6a45020bef55cbadbad8388d3 *man/negbinomial.size.Rd
 14c4a7db111d0d9f41e5a810a3afdea2 *man/normal.vcm.Rd
-9872fa02f51e95fc254c1ed7ce95df69 *man/notdocumentedyet.Rd
+9c60d91960d5448bbcdb2486bba6275f *man/notdocumentedyet.Rd
 5e590acdda3ff0a9e2df0db8d233f848 *man/nparamvglm.Rd
 98b83e406ea1968ba3e8b17d0933b2cf *man/olym.Rd
 858c73ce3c458d33e5151342a4e36707 *man/ordpoisson.Rd
 025c5545a37dd996931ea7d2b42211b5 *man/oxtemp.Rd
-2fdefe9211b855ae8a00e0ec0f88fe35 *man/paralogistic.Rd
+a0b0563f3e865287ae3be10ca2f6eea8 *man/paralogistic.Rd
 383805a5130a512c207a6a30c28553d3 *man/paralogisticUC.Rd
 b8a1bd0580460ec6155b7c7bb2dae503 *man/paretoIV.Rd
 9e30cad5872ffef80576a429e37cdaca *man/paretoIVUC.Rd
@@ -450,9 +461,9 @@ a0d64aa4469a9ca70fcfa4e5af26956a *man/perksUC.Rd
 60fac0e03c8dce88e04e2c3f6def20b9 *man/persp.qrrvglm.Rd
 a38168dd57b4be503cf47732714e441b *man/pgamma.deriv.Rd
 8e0120c68b69d0760218c483490aed8e *man/pgamma.deriv.unscaled.Rd
-791d04a5c3a3bc514bf0ed1fc639f8ab *man/plotdeplot.lmscreg.Rd
+2c3491351af8d4eb4618723f612c4f26 *man/plotdeplot.lmscreg.Rd
 cea29349aed21cbaf8c70f81b7900b15 *man/plotqrrvglm.Rd
-29857fd00ca50614d9564247b07a2bf3 *man/plotqtplot.lmscreg.Rd
+24a05d0db169fb74f603b21f0b8dd7b8 *man/plotqtplot.lmscreg.Rd
 3e689a8ffae086e45cbe82fcd5255042 *man/plotrcim0.Rd
 8c391f9ad83a6afeab6446044f22b16d *man/plotvgam.Rd
 72bade4a008240a55ae5a8e5298e30b8 *man/plotvgam.control.Rd
@@ -465,17 +476,17 @@ cea29349aed21cbaf8c70f81b7900b15 *man/plotqrrvglm.Rd
 696c74487d4cebf0251299be00d545c7 *man/polonoUC.Rd
 2f4dfc6a802a52da2e14e9789e0170ae *man/posbernUC.Rd
 a746161f043ec5c5517df4b9cf71501e *man/posbernoulli.b.Rd
-1ecd67b130cd5c4f6d2d3066fdbe849b *man/posbernoulli.t.Rd
-936b86f4b44e438536136d1aec313be4 *man/posbernoulli.tb.Rd
+04f6169b69f25ad72e088a51ef9e99b7 *man/posbernoulli.t.Rd
+12ee5b18104f163749da385de04fa175 *man/posbernoulli.tb.Rd
 c2c82f9a71f8a7d20e991dee48a9c734 *man/posbinomUC.Rd
 aab909e407aa248772db0235e64890dd *man/posbinomial.Rd
 dc19e3d023a2a46c670e431a2cc853e0 *man/posgeomUC.Rd
 2963a956fa63f0bd9452b10b432d4fc8 *man/posnegbinUC.Rd
 d1594d0598d420affef6f14a1c263685 *man/posnegbinomial.Rd
 45b528182d1c01bc352dea7b84fd7671 *man/posnormUC.Rd
-e130fade4adc7216d9d825d73cf83dd6 *man/posnormal.Rd
+e3daf452b9b545aa37cefad2a93da0bd *man/posnormal.Rd
 137d3986fcbad41bf77c10585dace0b0 *man/pospoisUC.Rd
-02066c793ac6cc88cdcb14ceb9b67fcb *man/pospoisson.Rd
+89e1ac898695d90f1d6075cafa971460 *man/pospoisson.Rd
 cc06ad7f82789c3703e4977cc39828ed *man/powerlink.Rd
 66bad6a1a2012e256b483e1727aca7e9 *man/prats.Rd
 ee31e58dfd33c2c3b0d51eac95b553ad *man/predictqrrvglm.Rd
@@ -483,12 +494,12 @@ cb6a8c644c31d6ec5e8977ea7b1198df *man/predictvglm.Rd
 4b6da0d45912d1b7fbd9d833f20ec3e9 *man/prentice74.Rd
 5f4fbb060b2d8386d8d2bfde926d9d5d *man/prinia.Rd
 d1b88140c378a21755511fb4a6ae6bce *man/probit.Rd
-0718999b2644fa5d30ffcd81722350e5 *man/propodds.Rd
+a80e37fe75a14efcfe236f6181cc58ac *man/propodds.Rd
 241402d089ef4159f01fb4cd2c72b9a3 *man/prplot.Rd
 ab1399d5d5f71707fd46960dc3efad04 *man/put.smart.Rd
 8f4e6ebea74037334377e346c5b476f6 *man/qrrvglm.control.Rd
 0b4cf628cd3e15b0668ae4ddae4d3ee6 *man/qtplot.gumbel.Rd
-19419758045a8282b21c6c7a8412a725 *man/qtplot.lmscreg.Rd
+b10bad72776d283be77901e730593f2e *man/qtplot.lmscreg.Rd
 bf8b2681beaeae00d54c8cb5422ad069 *man/quasibinomialff.Rd
 1dbf7bc4c97a7aafebcd736cf1baddbb *man/quasipoissonff.Rd
 bbde69d1bad346cd4ad04763c96d6ffe *man/qvar.Rd
@@ -499,18 +510,18 @@ a95c0df100dedc0b4e80be0659858441 *man/rayleighUC.Rd
 585af0deb3deb7b61388d6d4557994d8 *man/rec.exp1.Rd
 64ea5646e75515a8b40fbd136fa6065e *man/rec.normal.Rd
 49abf27f1c088a43cda71f0723cf188b *man/reciprocal.Rd
-a56ddce8598af2320fdadb94c42a9b24 *man/rhobit.Rd
+acfa691d70513cd589bcab400e529f4a *man/rhobit.Rd
 d907e0bbe40b4fb02b0763ab6076309e *man/riceUC.Rd
 85498654134f98f8aa887bed07b4985a *man/riceff.Rd
 9dd5a151bfc05adcce0ae88a02eb08a8 *man/rigff.Rd
 0e12c48578228c300e8c04ab3b08c04a *man/rlplot.egev.Rd
 3c6afb0af10ae003dfa8cf9caa567d9b *man/rrar.Rd
-21af7f47c09e9758460cbf6d2ebf79cc *man/rrvglm-class.Rd
-78c5e9f5ae598e17f06957873e645c96 *man/rrvglm.Rd
+330d39b23f38eea93d453f07fcb7574b *man/rrvglm-class.Rd
+6c28407aa99813ab33175602570fbd3b *man/rrvglm.Rd
 71e3f19a37b6f429458eb9060f5e2ef4 *man/rrvglm.control.Rd
 eb0e4a0a8b0c63cd0c17120e9ca8df53 *man/rrvglm.optim.control.Rd
 ecc44804896b8f3d4a9d469a952fe9a6 *man/ruge.Rd
-b60106c185ce93eb2c09bc34d1f7b349 *man/s.Rd
+21a97af245ddc566ddd8935381f6ea22 *man/s.Rd
 3ebe2abf58080c4588a912c695adae77 *man/sc.studentt2.Rd
 114f55f02750721179c9fc78d93f686c *man/sc.t2UC.Rd
 c3096134b4f765a7d1d893fb9388488b *man/seq2binomial.Rd
@@ -518,8 +529,8 @@ c3096134b4f765a7d1d893fb9388488b *man/seq2binomial.Rd
 451a726465c8e82555ba50a857e86ce0 *man/simplex.Rd
 f158e6c60a4e6b6e13f2a9519515a021 *man/simplexUC.Rd
 41af17badd0ef1b17cee591a35d46a12 *man/simulate.vlm.Rd
-86da93168db7b3bf3153e7b0eca85439 *man/sinmad.Rd
-9a4c16a6d079f7d9e5c22914e30497dc *man/sinmadUC.Rd
+bab7555bb34c57f8e56b59af277a5cc4 *man/sinmad.Rd
+95cbc5903a187d325c52c3d9d07ee252 *man/sinmadUC.Rd
 5327f9644795a6ed4e1909159156b656 *man/skellam.Rd
 2424940e3cff6d5a3ddd0ee99565ea39 *man/skellamUC.Rd
 b62da6a60b01916a10d691e980253bc0 *man/skewnormUC.Rd
@@ -528,12 +539,12 @@ b62da6a60b01916a10d691e980253bc0 *man/skewnormUC.Rd
 9fc90a85fdd63c0b3c49203f5e3d776f *man/slashUC.Rd
 21bada3a13aca65ba49fb28127575144 *man/smart.expression.Rd
 5726ef8bb900532df62b24bd4b7b8fe4 *man/smart.mode.is.Rd
-3d5d3a55f66ef8048b446da063e36ceb *man/smartpred.Rd
-098bc8b943b6ae2e0de9a4da57fcfd22 *man/sratio.Rd
+21a1d3bd045859ceab377610a53ba976 *man/smartpred.Rd
+a9dd55f0d6949934e7c89abc6c124e83 *man/sratio.Rd
 0c48da9ab33eb24273c6348320a64f64 *man/studentt.Rd
 0258a94ee53da230fb2aea74fd90192a *man/tikuv.Rd
 ccaa57b076049fdf3cee1c321a2ab456 *man/tikuvUC.Rd
-5fbf542c18e27e990c98bacedd614a39 *man/tobit.Rd
+2e766ddeb8f6fb02dcd50740639de6ad *man/tobit.Rd
 5130a86e60a3b1010b1364155a1afdd0 *man/tobitUC.Rd
 b70afa170b0cf98a6c2a9eea9dc58483 *man/toxop.Rd
 9b6a285a017b9928ae92a76eaf9e502d *man/triangle.Rd
@@ -543,16 +554,16 @@ c786330c607d69d19e59fc3823d1e2f2 *man/trplot.qrrvglm.Rd
 aeaf42ac6e475f1dc3f180450d56c2ee *man/truncparetoUC.Rd
 1d47c3a8f732ea01782c7e0b9929a921 *man/truncweibull.Rd
 50ada9ecd189456ce9f218d22b49089c *man/ucberk.Rd
-c21d431f9341336ff44cc7019e755c98 *man/undocumented-methods.Rd
-2fd783dbf5c2dbcb81727fe479729163 *man/uninormal.Rd
+39838a7245484fda644f275531575884 *man/undocumented-methods.Rd
+f8f257cf6c91bb3c2765bc9c1d5fd4f1 *man/uninormal.Rd
 f787bf505e7e68f5f16a49f48abb9bcb *man/venice.Rd
-215970e9b9824a503e8984e432c5c924 *man/vgam-class.Rd
-a04996c86d14b710cac6615958b50caf *man/vgam.Rd
+8ab09ea32a3839db780ac641218c322e *man/vgam-class.Rd
+77ac046fef4733e8dc8a26ecb61d201d *man/vgam.Rd
 ea3fe248b860921783367037c8302c49 *man/vgam.control.Rd
-d11e5c5279c115678bb103f5b4575938 *man/vglm-class.Rd
-042568190fce50db76f5574b790e36e1 *man/vglm.Rd
-c21cd55efce9d242cbe555cb65aea5e3 *man/vglm.control.Rd
-8d9fa0cc290e49e459947c38c292df4c *man/vglmff-class.Rd
+126b55b4567a63cf2edb04a8b6d91506 *man/vglm-class.Rd
+def3c8ac9a4ab8a58b88a3d10df8958d *man/vglm.Rd
+0fb3b6b60182efdce44c9d225bcf0a64 *man/vglm.control.Rd
+7cab64090aec93a3edb1a7df961a1fe0 *man/vglmff-class.Rd
 95420c89f2280b447cbd7784f83e7454 *man/vonmises.Rd
 25b2ef45238e3f61e82dcf52f3d17090 *man/vsmooth.spline.Rd
 c498f29d7fc8156fd345b4892f02190d *man/waitakere.Rd
@@ -560,7 +571,7 @@ c498f29d7fc8156fd345b4892f02190d *man/waitakere.Rd
 9e36f5a354e39e4d645e105c7252ad00 *man/weibullR.Rd
 e41e54f8623a002d20e55df65c5b6a87 *man/weightsvglm.Rd
 3557b17f6054a1699cb653b36f6d1a37 *man/wine.Rd
-f5a3078b689d511325cb1dc0fd4e21f3 *man/wrapup.smart.Rd
+a814b37503a9534c86789482ab81333f *man/wrapup.smart.Rd
 622f0105b04159f54fcfb361972e4fb7 *man/yeo.johnson.Rd
 ebfff81b0f4730417de95f80b7c82c41 *man/yip88.Rd
 225fcd19868f17b4a5d2590e834cb888 *man/yulesimon.Rd
@@ -572,8 +583,8 @@ ae671324c0f93f66adc72f053ef9ebd9 *man/zabinomUC.Rd
 78eef8b541d039b00e9990ff758e53e9 *man/zanegbinUC.Rd
 7292195daf3dd8898a1eb971f9f46d21 *man/zanegbinomial.Rd
 b4bcb3a52a6e60efbdaa5d3cfed6fbf4 *man/zapoisUC.Rd
-0122299a628d1aea9cf560d1470d1367 *man/zapoisson.Rd
-41b375aed0074b0d0e87b2913685cda9 *man/zero.Rd
+e9861638c7394e812db8f7d18b660e3a *man/zapoisson.Rd
+64b7af3fd4cd0d0c367778c8bacabe24 *man/zero.Rd
 7985338d08e88fa23cce9cc0a09724b6 *man/zeta.Rd
 e0ef189ae8251b5e0d20b614c18cdd5a *man/zetaUC.Rd
 648342ad0677587e55e4f92d906d0d42 *man/zetaff.Rd
@@ -612,3 +623,7 @@ e9187111f5c6ce1e5808bbb3dc088c17 *src/vlinpack3.f
 753359563526a9cd5ebac104dab2d754 *src/vmux.f
 9083b462bcc275ee6dda47e97f1ebf94 *src/vmux3.c
 b19585d2495c46800b0c95f347fe89f9 *src/zeta3.c
+bfa11dbdbff271fb20342560f2bacd53 *vignettes/categoricalVGAM.Rnw
+d7beca978b587625654f981f7dc433d0 *vignettes/categoricalVGAMbib.bib
+8b5466d6da7f3c0e97030bd8364a9ca4 *vignettes/crVGAM.Rnw
+f1aac71e04f29f2b6c28434f90617ffa *vignettes/crVGAM.bib
diff --git a/NAMESPACE b/NAMESPACE
index db489f4..35e9d7e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -7,6 +7,16 @@
 useDynLib(VGAM)
 
 
+export(dgenpois)
+export(AR1)
+export(dAR1)
+export(AR1.control)
+export(param.names)
+export(is.buggy.vlm)
+exportMethods(is.buggy)
+
+
+
 importMethodsFrom("splines")
 importFrom("splines", splineDesign, bs, ns)
 
@@ -230,7 +240,6 @@ export(pnorm2, dnorm2)
 
 export(iam,
 fill, fill1, fill2, fill3,
-abbott,
 biamhcop, dbiamhcop, pbiamhcop, rbiamhcop, 
 bigamma.mckay,
 freund61,
@@ -549,7 +558,7 @@ export(dgengamma.stacy, pgengamma.stacy, qgengamma.stacy, rgengamma.stacy)
 
 export(
 dbenf, pbenf, qbenf, rbenf,
-genbetaII, genpoisson,
+genbetaII, dgenbetaII, genpoisson,
 geometric, truncgeometric,
 dlino, plino, qlino, rlino, lino, 
 grc,
diff --git a/NEWS b/NEWS
index 73afb0a..b5b570f 100755
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,43 @@
 
 
 
+                CHANGES IN VGAM VERSION 0.9-8
+
+NEW FEATURES
+
+    o   Tested okay on R 3.2.0.
+    o   is.buggy() tests to see if a fitted VGAM object suffers
+        from known bugs, e.g., a vgam() object with at least one
+        s() term whose constraint matrix does not have orthogonal
+        columns.
+    o   New family function: AR1(d).
+    o   New function: dgenpois().
+    o   The package has been updated to reflect the new
+        J. Stat. Soft. paper by Yee, Stoklosa and Huggins.
+        A vignette based on this paper is now included.
+    o   dgenbetaII() has now been written; and genbetaII() improved,
+        and about 8 special cases of genbetaII() have
+        all been modernized to handle multiple responses
+        and a default grid search over all the parameters
+        (arguments 'gscale' and 'gshape1.a', etc.). These
+        families are based on Kleiber and Kotz (2003).
+
+
+BUG FIXES and CHANGES
+
+    o   Family function genpoisson() has been modernized, and 
+        should give correct results wrt AIC() etc.
+    o   Argument 'init.alpha' renamed to 'ialpha', for the brat(),
+        bratt(), and dirmul.old() families.
+    o   Calls to N.hat.posbernoulli() used Hlist = constraints rather
+        than Hlist = Hlist; this failed for RR-VGLMs.
+    o   Family function tobit() obtains initial values even
+        in the case when it would otherwise fit an underdetermined
+        system of equations. Thanks to McClelland Kemp for picking
+        this up.
+
+
+
                 CHANGES IN VGAM VERSION 0.9-7
 
 NEW FEATURES
@@ -21,7 +58,6 @@ NEW FEATURES
 
 BUG FIXES and CHANGES
 
-    o   family.name() renamed to familyname().
     o   Argument 'mv' has been renamed to 'multiple.responses'.
         This applies to about 10 family functions such as binomialff().
     o   Argument 'lss' added to betaII(), dagum(), fisk(),
diff --git a/R/family.actuary.R b/R/family.actuary.R
index 6b69361..b297e51 100644
--- a/R/family.actuary.R
+++ b/R/family.actuary.R
@@ -13,7 +13,6 @@
 
 
 
-
 dgumbelII <- function(x, scale = 1, shape, log = FALSE) {
 
 
@@ -2412,235 +2411,469 @@ if (ii < 3) {
 
 
 
-
- genbetaII <- function(lss,
+ genbetaII <- function(lscale    = "loge",
                        lshape1.a = "loge",
-                       lscale = "loge",
                        lshape2.p = "loge",
                        lshape3.q = "loge",
+                       iscale    = NULL,
                        ishape1.a = NULL,
-                       iscale = NULL,
-                       ishape2.p = 1.0,
-                       ishape3.q = 1.0,
-                       zero = NULL) {
+                       ishape2.p = NULL,
+                       ishape3.q = NULL,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       gshape2.p = exp(-5:5),
+                       gshape3.q = exp(-5:5),
+                       zero      = ifelse(lss, -(2:4), -c(1, 3:4))) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
 
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(ishape2.p) && !is.Numeric(ishape2.p, positive = TRUE))
+    stop("Bad input for argument 'ishape2.p'")
+  
+  if (length(ishape3.q) && !is.Numeric(ishape3.q, positive = TRUE))
+    stop("Bad input for argument 'ishape3.q'")
+  
 
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
+  lscale <- as.list(substitute(lscale))
+  escale <- link2list(lscale)
+  lscale <- attr(escale, "function.name")
+  
   lshape1.a <- as.list(substitute(lshape1.a))
   eshape1.a <- link2list(lshape1.a)
   lshape1.a <- attr(eshape1.a, "function.name")
-
+  
   lshape2.p <- as.list(substitute(lshape2.p))
   eshape2.p <- link2list(lshape2.p)
   lshape2.p <- attr(eshape2.p, "function.name")
-
+  
   lshape3.q <- as.list(substitute(lshape3.q))
   eshape3.q <- link2list(lshape3.q)
   lshape3.q <- attr(eshape3.q, "function.name")
-
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
-
-
-
-  new("vglmff",
-  blurb = c("Generalized Beta II distribution\n\n",
-            "Links:    ",
-            namesof("shape1.a", lshape1.a, earg = eshape1.a), ", ", 
-            namesof("scale",    lscale,    earg = escale), ", ", 
-            namesof("shape2.p", lshape2.p, earg = eshape2.p), ", ", 
-            namesof("shape3.q", lshape3.q, earg = eshape3.q), "\n", 
-            "Mean:     scale * gamma(shape2.p + 1/shape1.a) * ",
-                      "gamma(shape3.q - 1/shape1.a) / ",
-                      "(gamma(shape2.p) * gamma(shape3.q))"),
+   
+
+  new("vglmff", 
+  blurb = 
+    c("Generalized Beta II distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), ", ",
+      namesof("shape2.p" , lshape2.p, earg = eshape2.p), ", ",
+      namesof("shape3.q" , lshape3.q, earg = eshape3.q), "\n",
+      "Mean:     scale * gamma(shape2.p + 1/shape1.a) * ",
+                "gamma(shape3.q - 1/shape1.a) / ",
+                "(gamma(shape2.p) * gamma(shape3.q))"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 4
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
-    predictors.names <-
-      c(namesof("shape1.a", .lshape1.a , earg = .eshape1.a , tag = FALSE),
-        namesof("scale"   , .lscale    , earg = .escale    , tag = FALSE),
-        namesof("shape2.p", .lshape2.p , earg = .eshape2.p , tag = FALSE),
-        namesof("shape3.q", .lshape3.q , earg = .eshape3.q , tag = FALSE))
-
-    if (!length( .ishape1.a ) || !length( .iscale )) {
-      qvec <- c( .25, .5, .75)  # Arbitrary; could be made an argument
-      ishape3.q <- if (length( .ishape3.q)) .ishape3.q else 1
-      xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
-    if (!length(etastart)) {
-      aa      <- rep(if (length( .ishape1.a )) .ishape1.a else
-                     abs(1 / fit0$coef[2]),
-                     length.out = n)
-      scale   <- rep(if (length( .iscale )) .iscale else
-                     exp(fit0$coef[1]), length = n)
-      qq      <- rep(if (length( .ishape3.q )) .ishape3.q else 1.0,
-                     length.out = n)
-      parg    <- rep(if (length( .ishape2.p )) .ishape2.p else 1.0,
-                     length.out = n)
-
-
-      outOfRange <- (qq - 1/aa <= 0)
-      qq[outOfRange] <- 1 / aa[outOfRange] + 1
-      outOfRange <- (parg + 1/aa <= 0)
-      parg[outOfRange] <- 1 / aa[outOfRange] + 1
+  infos = eval(substitute(function(...) {
+    list(M1 = 4,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale    = .lscale    , lshape1.a = .lshape1.a ,
+         escale    = .escale    , eshape1.a = .eshape1.a ,
+         lshape2.p = .lshape2.p , lshape3.q = .lshape3.q ,
+         eshape2.p = .eshape2.p , eshape3.q = .eshape3.q ,
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+           .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 4  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
+    sha2.names  <- param.names("shape2.p",  NOS)
+    sha3.names  <- param.names("shape3.q",  NOS)
+
+    predictors.names <- c(
+      if ( .lss ) {
+        c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+          namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+      } else {
+        c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+          namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
+      },
+      namesof(sha2.names , .lshape2.p , earg = .eshape2.p , tag = FALSE),
+      namesof(sha3.names , .lshape3.q , earg = .eshape3.q , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
     
+    if (!length(etastart)) {
+      sc.init <-
+      aa.init <-
+      pp.init <-
+      qq.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
+
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          gshape2.p  <- .gshape2.p        
+          gshape3.q  <- .gshape3.q
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          if (length( .ishape2.p ))
+            gshape2.p <-  rep( .ishape2.p , length = NOS)
+          if (length( .ishape3.q ))
+            gshape3.q <-  rep( .ishape3.q , length = NOS)
+          allmat1 <- expand.grid(shape1.a = gshape1.a,
+                                 shape2.p = gshape2.p,
+                                 shape3.q = gshape3.q)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.gbII <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = extraargs$Shape2.p,
+                                         shape3.q = extraargs$Shape3.q,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.gbII,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1],
+                                             Shape2.p = allmat1[iloc, 2],
+                                             Shape3.q = allmat1[iloc, 3]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+          pp.init[, spp.] <- allmat1[ind5, 2]
+          qq.init[, spp.] <- allmat1[ind5, 3]
+      }  # End of for (spp. ...)
+
+
+      finite.mean <- 1 < aa.init * qq.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        qq.init[!finite.mean] <- 0.1 + qq.init[!finite.mean] * COP.use
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init * qq.init
+      }
 
       etastart <-
-        cbind(theta2eta(aa,    .lshape1.a , earg = .eshape1.a ),
-              theta2eta(scale, .lscale ,    earg = .escale ),
-              theta2eta(parg,  .lshape2.p , earg = .eshape2.p ),
-              theta2eta(qq,    .lshape3.q , earg = .eshape3.q ))
-    }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
+        cbind(if ( .lss )
+              cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                    theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+              cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                    theta2eta(sc.init, .lscale    , earg = .escale    )),
+              theta2eta(pp.init , .lshape2.p , earg = .eshape2.p ),
+              theta2eta(qq.init , .lshape3.q , earg = .eshape3.q ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
-            .eshape1.a = eshape1.a, .escale = escale, 
             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-            .ishape1.a = ishape1.a, .iscale = iscale, 
-            .ishape2.p = ishape2.p, .ishape3.q = ishape3.q ))),
+            .ishape2.p = ishape2.p, .ishape3.q = ishape3.q,
+            .gshape2.p = gshape2.p, .gshape3.q = gshape3.q,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg   <- eta2theta(eta[, 3], .lshape2.p , earg = .eshape2.p )
-    qq     <- eta2theta(eta[, 4], .lshape3.q , earg = .eshape3.q )
-    ans <- Scale * exp(lgamma(parg + 1/aa) +
-                       lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
-    ans[parg + 1/aa <= 0] <- NA
-    ans[qq   - 1/aa <= 0] <- NA
-    ans[aa          <= 0] <- NA
-    ans[Scale       <= 0] <- NA
-    ans[parg        <= 0] <- NA
-    ans[qq          <= 0] <- NA
+    M1 <- 4
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    parg <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p )
+    qq   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q )
+
+    ans <- cbind(Scale * exp(lgamma(parg + 1/aa) +
+                             lgamma(qq   - 1/aa) -
+                             lgamma(parg) - lgamma(qq)))
     ans
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale, 
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-    c(shape1.a = .lshape1.a , scale = .lscale ,
-                      shape2.p = .lshape2.p , shape3.q = .lshape3.q )
+    M1 <- 4
+
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly),
+                   rep( .lshape2.p , length = ncoly),
+                   rep( .lshape3.q , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names, sha2.names, sha3.names)
+    } else {
+      c(sha1.names, scaL.names, sha2.names, sha3.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$earg <- list(shape1.a = .eshape1.a , scale = .escale ,
-                      shape2.p = .eshape2.p , shape3.q = .eshape3.q )
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      if ( .lss ) {
+        misc$earg[[M1*ii-3]] <- .escale
+        misc$earg[[M1*ii-2]] <- .eshape1.a
+    } else {
+        misc$earg[[M1*ii-3]] <- .eshape1.a
+        misc$earg[[M1*ii-2]] <- .escale
+    }
+      misc$earg[[M1*ii-1]] <- .eshape2.p
+      misc$earg[[M1*ii  ]] <- .eshape3.q
+    }
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale  <- eta2theta(eta[, 2], .lscale , earg = .escale )
-    parg   <- eta2theta(eta[, 3], .lshape2.p , earg = .eshape2.p )
-    qq     <- eta2theta(eta[, 4], .lshape3.q , earg = .eshape3.q )
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <-
-        c(w) * (log(aa) + (aa * parg - 1) * log(y) -
-                aa * parg * log(scale) +
-              - lbeta(parg, qq) - (parg + qq) * log1p((y/scale)^aa))
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 4
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale, 
-           .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+      parg   <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                          .lshape2.p , earg = .eshape2.p )
+      qq     <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lshape3.q , earg = .eshape3.q )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = qq, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
+             .lss = lss ))),
   vfamily = c("genbetaII"),
-  deriv = eval(substitute(expression({
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale  <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg   <- eta2theta(eta[, 3], .lshape2.p , earg = .eshape2.p )
-    qq     <- eta2theta(eta[, 4], .lshape3.q , earg = .eshape3.q )
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 3, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p)
+    qq   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q)
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
     temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
     temp4 <- log1p(temp2)
-
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
     dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
     dl.dp <- aa * temp1 + temp3 - temp3a - temp4
     dl.dq <- temp3 - temp3b - temp4
-
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
     da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
-    dscale.deta <- dtheta.deta(scale, .lscale ,    earg = .escale )
     dp.deta     <- dtheta.deta(parg,  .lshape2.p , earg = .eshape2.p )
     dq.deta     <- dtheta.deta(qq,    .lshape3.q , earg = .eshape3.q )
-
-    c(w) * cbind( dl.da * da.deta,
-                  dl.dscale * dscale.deta,
-                  dl.dp * dp.deta,
-                  dl.dq * dq.deta )
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta,
+                   dl.dp * dp.deta,
+                   dl.dq * dq.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta,
+                   dl.dp * dp.deta,
+                   dl.dq * dq.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
+             .lss = lss ))),
   weight = eval(substitute(expression({
     temp5  <- trigamma(parg + qq)
     temp5a <- trigamma(parg)
     temp5b <- trigamma(qq)
 
-    ned2l.da <- (1 + parg+qq + parg * qq * (temp5a + temp5b +
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
                 (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
                 (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1+parg+qq))
-    ned2l.dp <- temp5a - temp5
-    ned2l.dq <- temp5b - temp5
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dp      <- temp5a - temp5
+    ned2l.dq      <- temp5b - temp5
     ned2l.dascale <- (parg - qq - parg * qq *
-                     (temp3a -temp3b)) / (scale*(1 + parg+qq))
-    ned2l.dap <- -(qq   * (temp3a -temp3b) -1) / (aa*(parg+qq))
-    ned2l.daq <- -(parg * (temp3b -temp3a) -1) / (aa*(parg+qq))
-    ned2l.dscalep <-  aa * qq   / (scale*(parg+qq))
-    ned2l.dscaleq <- -aa * parg / (scale*(parg+qq))
-    ned2l.dpq <- -temp5
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  # M==4 means 10=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dp * dp.deta^2
-    wz[, iam(4, 4, M)] <- ned2l.dq * dq.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
-    wz[, iam(1, 3, M)] <- ned2l.dap * da.deta * dp.deta
-    wz[, iam(1, 4, M)] <- ned2l.daq * da.deta * dq.deta
-    wz[, iam(2, 3, M)] <- ned2l.dscalep * dscale.deta * dp.deta
-    wz[, iam(2, 4, M)] <- ned2l.dscaleq * dscale.deta * dq.deta
-    wz[, iam(3, 4, M)] <- ned2l.dpq * dp.deta * dq.deta
-    wz <- c(w) * wz
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    ned2l.dap     <- -(qq   * (temp3a -temp3b) -1) / (aa*(parg+qq))
+    ned2l.daq     <- -(parg * (temp3b -temp3a) -1) / (aa*(parg+qq))
+    ned2l.dscalep <-  aa * qq   / (Scale*(parg+qq))
+    ned2l.dscaleq <- -aa * parg / (Scale*(parg+qq))
+    ned2l.dpq     <- -temp5
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.dap * da.deta * dp.deta,
+              c(w) * ned2l.dpq * dp.deta * dq.deta,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta,
+              c(w) * ned2l.daq * da.deta * dq.deta,
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta,
+              c(w) * ned2l.dpq * dp.deta * dq.deta,
+              c(w) * ned2l.dap * da.deta * dp.deta,
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta,
+              c(w) * ned2l.daq * da.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lshape1.a <- lshape1.a, .lscale = lscale,
-            .eshape1.a <- eshape1.a, .escale = escale, 
-            .eshape2.p <- eshape2.p, .eshape3.q = eshape3.q,
-            .lshape2.p <- lshape2.p, .lshape3.q = lshape3.q ))))
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
+            .lss = lss ))))
+}
+
+
+
+
+
+
+
+
+
+
+dgenbetaII <- function(x, scale = 1, shape1.a, shape2.p, shape3.q,
+                       log = FALSE)  {
+
+  if (!is.logical(log.arg <- log) || length(log) != 1)
+    stop("Bad input for argument 'log'")
+  rm(log)
+
+
+  logden <- log(shape1.a) + (shape1.a * shape2.p - 1) * log(abs(x)) -
+            shape1.a * shape2.p * log(scale) -
+            lbeta(shape2.p, shape3.q) -
+            (shape2.p + shape3.q) * log1p((abs(x)/scale)^shape1.a)
+  
+
+  if (any(x <= 0) || any(is.infinite(x))) {
+    LLL <- max(length(x),        length(scale),
+               length(shape1.a), length(shape2.p), length(shape3.q))
+    if (length(x)        != LLL) x        <- rep(x,        length = LLL)
+    if (length(scale)    != LLL) scale    <- rep(scale,    length = LLL)
+    if (length(shape1.a) != LLL) shape1.a <- rep(shape1.a, length = LLL)
+    if (length(shape2.p) != LLL) shape2.p <- rep(shape2.p, length = LLL)
+    if (length(shape3.q) != LLL) shape3.q <- rep(shape3.q, length = LLL)
+
+    logden[is.infinite(x)] <- log(0)
+    logden[x < 0] <- log(0)  
+    x.eq.0 <- !is.na(x) & (x == 0)
+    if (any(x.eq.0)) {
+      axp <- shape1.a[x.eq.0] * shape2.p[x.eq.0]
+      logden[x.eq.0 & axp <  1] <- log(Inf)
+      ind5 <- x.eq.0 & axp == 1
+      logden[ind5] <- log(shape1.a[ind5]) -
+        shape1.a[ind5] * shape2.p[ind5] * log(scale[ind5]) -
+        lbeta(shape2.p[ind5], shape3.q[ind5]) -
+        (shape2.p[ind5] + shape3.q[ind5]) *
+        log1p((0/scale[ind5])^shape1.a[ind5])
+      logden[x.eq.0 & axp >  1] <- log(0)    
+    }
+  }
+
+  if (log.arg) logden else exp(logden)
 }
 
 
+
+
+
 rsinmad <- function(n, scale = 1, shape1.a, shape3.q)
   qsinmad(runif(n), shape1.a = shape1.a, scale = scale,
           shape3.q = shape3.q)
@@ -2958,6 +3191,15 @@ pinv.paralogistic <- function(q, scale = 1, shape1.a,
 
 
 
+
+
+dbetaII <- function(x, scale = 1, shape2.p, shape3.q, log = FALSE)
+  dgenbetaII(x = x, scale = scale, shape1.a = 1,
+             shape2.p = shape2.p, shape3.q = shape3.q, log = log)
+
+
+
+
 dsinmad <- function(x, scale = 1, shape1.a, shape3.q, log = FALSE) {
 
   if (!is.logical(log.arg <- log) || length(log) != 1)
@@ -2966,9 +3208,9 @@ dsinmad <- function(x, scale = 1, shape1.a, shape3.q, log = FALSE) {
 
   LLL      <- max(length(x),     length(shape1.a),
                   length(scale), length(shape3.q))
-  x        <- rep(x,         length.out = LLL);
+  x        <- rep(x,         length.out = LLL)
   shape1.a <- rep(shape1.a,  length.out = LLL)
-  scale    <- rep(scale,     length.out = LLL);
+  scale    <- rep(scale,     length.out = LLL)
   shape3.q <- rep(shape3.q,  length.out = LLL)
 
   Loglik <- rep(log(0), length.out = LLL)
@@ -3054,104 +3296,231 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
 
 
 
- sinmad <- function(lss,
-                    lshape1.a = "loge",
-                    lscale = "loge",
-                    lshape3.q = "loge",
-                    ishape1.a = NULL, 
-                    iscale = NULL,
-                    ishape3.q = 1.0, 
-                    zero = NULL) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
+ sinmad    <- function(lscale    = "loge",
+                       lshape1.a = "loge",
+                       lshape3.q = "loge",
+                       iscale    = NULL,
+                       ishape1.a = NULL,
+                       ishape3.q = NULL,
+                       imethod   = 1,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       gshape3.q = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = ifelse(lss, -(2:3), -c(1, 3))) {
+
+
 
 
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(ishape3.q) && !is.Numeric(ishape3.q, positive = TRUE))
+    stop("Bad input for argument 'ishape3.q'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
+  lscale <- as.list(substitute(lscale))
+  escale <- link2list(lscale)
+  lscale <- attr(escale, "function.name")
+  
   lshape1.a <- as.list(substitute(lshape1.a))
   eshape1.a <- link2list(lshape1.a)
   lshape1.a <- attr(eshape1.a, "function.name")
-
+  
   lshape3.q <- as.list(substitute(lshape3.q))
   eshape3.q <- link2list(lshape3.q)
   lshape3.q <- attr(eshape3.q, "function.name")
-
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
-
-
-
-  new("vglmff",
-  blurb = c("Singh-Maddala distribution\n\n",
-          "Links:    ",
-          namesof("shape1.a", lshape1.a, earg = eshape1.a), ", ", 
-          namesof("scale",    lscale,    earg = escale),    ", ", 
-          namesof("shape3.q", lshape3.q, earg = eshape3.q), "\n", 
-          "Mean:     scale * gamma(1 + 1/shape1.a) * ",
-                    "gamma(shape3.q - 1/shape1.a) / ",
-                    "gamma(shape3.q)"),
+   
+
+  new("vglmff", 
+  blurb = 
+    c("Singh-Maddala distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), ", ",
+      namesof("shape3.q" , lshape3.q, earg = eshape3.q), "\n",
+      "Mean:     scale * gamma(shape2.p + 1/shape1.a) * ",
+                "gamma(shape3.q - 1/shape1.a) / ",
+                "gamma(shape3.q)"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 3
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
-    predictors.names <-
-        c(namesof("shape1.a", .lshape1.a , earg = .eshape1.a , tag = FALSE),
-          namesof("scale",    .lscale ,    earg = .escale ,    tag = FALSE),
-          namesof("shape3.q", .lshape3.q , earg = .eshape3.q , tag = FALSE))
-    parg <- 1
-
-    if (!length( .ishape1.a) || !length( .iscale )) {
-        qvec <- c( .25, .5, .75)  # Arbitrary; could be made an argument
-        ishape3.q <- if (length( .ishape3.q)) .ishape3.q else 1
-        xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
-        fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
-
-
+  infos = eval(substitute(function(...) {
+    list(M1 = 3,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale = .lscale ,       lshape1.a = .lshape1.a ,
+         escale = .escale ,       eshape1.a = .eshape1.a ,
+                                  lshape3.q = .lshape3.q ,
+                                  eshape3.q = .eshape3.q ,
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 3  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
+    sha3.names  <- param.names("shape3.q",  NOS)
+
+    predictors.names <- c(
+      if ( .lss ) {
+        c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+          namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+      } else {
+        c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+          namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
+      },
+      namesof(sha3.names , .lshape3.q , earg = .eshape3.q , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      aa    <- rep(if (length( .ishape1.a)) .ishape1.a else 1/fit0$coef[2],
-                   length.out = n)
-      scale <- rep(if (length( .iscale )) .iscale else exp(fit0$coef[1]),
-                   length.out = n)
-      qq    <- rep(if (length( .ishape3.q)) .ishape3.q else 1.0,
-                   length.out = n)
-
-
-      outOfRange <- (aa * qq <= 1)
-      qq[outOfRange] <- 1 / aa[outOfRange] + 1 
-
+      sc.init <-
+      aa.init <-
+      qq.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
 
-        etastart <-
-          cbind(theta2eta(aa,    .lshape1.a , earg = .eshape1.a ),
-                theta2eta(scale, .lscale    , earg = .escale ),
-                theta2eta(qq,    .lshape3.q , earg = .eshape3.q ))
-    }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .lshape3.q = lshape3.q,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape3.q = eshape3.q,
-            .ishape1.a = ishape1.a, .iscale = iscale, 
-            .ishape3.q = ishape3.q ))),
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          gshape3.q  <- .gshape3.q
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          if (length( .ishape3.q ))
+            gshape3.q <-  rep( .ishape3.q , length = NOS)
+          allmat1 <- expand.grid(shape1.a = gshape1.a,
+                                 shape3.q = gshape3.q)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.sinm <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = 1,
+                                         shape3.q = extraargs$Shape3.q,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.sinm,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1],
+                                             Shape3.q = allmat1[iloc, 2]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+          qq.init[, spp.] <- allmat1[ind5, 2]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          ishape3.q <- if (length( .ishape3.q )) .ishape3.q else 1
+          xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec,  qvec)))
+          sc.init[, spp.] <- if (length( .iscale )) .iscale else
+                             exp(fit0$coef[1])
+          aa.init[, spp.] <- if (length( .ishape1.a )) .ishape1.a else
+                             1/fit0$coef[2]
+          qq.init[, spp.] <- ishape3.q
+        }
+      }  # End of for (spp. ...)
+
+      finite.mean <- 1 < aa.init * qq.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        qq.init[!finite.mean] <- 0.1 + qq.init[!finite.mean] * COP.use
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init * qq.init
+      }
 
+      etastart <-
+        cbind(if ( .lss )
+              cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                    theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+              cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                    theta2eta(sc.init, .lscale    , earg = .escale    )),
+              theta2eta(qq.init , .lshape3.q , earg = .eshape3.q ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q,
+                                    .ishape3.q = ishape3.q,
+                                    .gshape3.q = gshape3.q,
+            .imethod   = imethod   , .probs.y  = probs.y,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg   <- 1
-    qq     <- eta2theta(eta[, 3], .lshape3.q , earg = .eshape3.q )
+    M1 <- 3
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    parg <- 1
+    qq   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q )
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
                       lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
@@ -3161,55 +3530,82 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[Scale       <= 0] <- NA
     ans[qq          <= 0] <- NA
     ans
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale, 
-           .eshape3.q = eshape3.q,
-           .lshape3.q = lshape3.q ))),
-
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q,
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-
-         c(shape1.a = .lshape1.a ,
-           scale    = .lscale ,
-           shape3.q = .lshape3.q )
+    M1 <- 3
 
-    misc$earg <-
-      list(shape1.a = .eshape1.a ,
-           scale    = .escale ,
-           shape3.q = .eshape3.q )
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly),
+                   rep( .lshape3.q , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names,             sha3.names)
+    } else {
+      c(sha1.names, scaL.names,             sha3.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      if ( .lss ) {
+        misc$earg[[M1*ii-2]] <- .escale
+        misc$earg[[M1*ii-1]] <- .eshape1.a
+    } else {
+        misc$earg[[M1*ii-2]] <- .eshape1.a
+        misc$earg[[M1*ii-1]] <- .escale
+    }
+      misc$earg[[M1*ii  ]] <- .eshape3.q
+    }
 
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape3.q = eshape3.q,
-            .lshape3.q = lshape3.q ))),
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q,
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg <- 1
-    qq    <- eta2theta(eta[, 3], .lshape3.q, earg = .eshape3.q )
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dsinmad(x = y, shape1.a = aa, scale = scale,
-                                shape3.q = qq, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 3
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .lshape3.q = lshape3.q,
-           .eshape1.a = eshape1.a, .escale = escale,
-           .eshape3.q = eshape3.q ))),
+      parg   <- 1
+      qq     <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lshape3.q , earg = .eshape3.q )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = qq, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+                                     .lshape3.q = lshape3.q,
+                                     .eshape3.q = eshape3.q,
+             .lss = lss ))),
   vfamily = c("sinmad"),
-
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -3217,168 +3613,350 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
               pwts else weights(object, type = "prior")
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
-    eta <- predict(object)
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    qq    <- eta2theta(eta[, 3], .lshape3.q,  earg = .eshape3.q )
-    rsinmad(nsim * length(qq), shape1.a = aa, scale = scale,
-            shape3.q = qq)
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .lshape3.q = lshape3.q,
-           .eshape1.a = eshape1.a, .escale = escale,
-           .eshape3.q = eshape3.q ))),
 
+    eta <- predict(object)
+    M1 <- 3
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    qq   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q )
 
-  deriv = eval(substitute(expression({
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale    , earg = .escale )
+    rsinmad(nsim * length(qq), shape1.a = aa, scale = Scale,
+            shape3.q = qq)
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q
+                      ))),
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
     parg <- 1
-    qq <-    eta2theta(eta[, 3], .lshape3.q , earg = .eshape3.q )
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+    qq   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q)
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
-
-    dl.da <- 1 / aa + parg * temp1 - (parg + qq) * temp1 / (1 + 1 / temp2)
-    dl.dscale <- (aa / scale) * (-parg + (parg + qq) / (1 + 1 / temp2))
-    dl.dq <- digamma(parg + qq) - temp3b - log1p(temp2)
-
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
+    dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
+    dl.dq <- temp3 - temp3b - temp4
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
     da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
-    dscale.deta <- dtheta.deta(scale, .lscale ,    earg = .escale )
     dq.deta     <- dtheta.deta(qq,    .lshape3.q , earg = .eshape3.q )
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta,
+                   dl.dq * dq.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta,
+                   dl.dq * dq.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+                                     .lshape3.q = lshape3.q,
+                                     .eshape3.q = eshape3.q,
+             .lss = lss ))),
+  weight = eval(substitute(expression({
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
 
-    c(w) * cbind(dl.da     * da.deta,
-                 dl.dscale * dscale.deta,
-                 dl.dq     * dq.deta )
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape3.q = eshape3.q,
-            .lshape3.q = lshape3.q ))),
-
-  weight = eval(substitute(expression({
-    ned2l.da <- (1 + parg+qq + parg * qq * (trigamma(parg) + trigamma(qq) +
-                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 - 
-                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1 + parg + qq))
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1+parg+qq))
-    ned2l.dq <- 1/qq^2
-    ned2l.dascale <- (parg - qq - parg*qq *
-                     (temp3a -temp3b)) / (scale*(1 + parg+qq))
-    ned2l.daq <- -(parg * (temp3b -temp3a) -1) / (aa*(parg+qq))
-    ned2l.dscaleq <- -aa * parg / (scale*(parg+qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M==3 means 6=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dq * dq.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
-    wz[, iam(1, 3, M)] <- ned2l.daq * da.deta * dq.deta
-    wz[, iam(2, 3, M)] <- ned2l.dscaleq * dscale.deta * dq.deta
-    wz <- c(w) * wz
-    wz
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape3.q = eshape3.q,
-            .lshape3.q = lshape3.q ))))
-}
-
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
+                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
+                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dq      <- temp5b - temp5
+    ned2l.dascale <- (parg - qq - parg * qq *
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    ned2l.daq     <- -(parg * (temp3b -temp3a) -1) / (aa*(parg+qq))
+    ned2l.dscaleq <- -aa * parg / (Scale*(parg+qq))
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.daq * da.deta * dq.deta,
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta,
+              c(w) * ned2l.daq * da.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
+    wz
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q,
+            .lss = lss ))))
+}
 
- dagum <- function(lss,
-                   lshape1.a = "loge",
-                   lscale = "loge",
-                   lshape2.p = "loge",
-                   ishape1.a = NULL, 
-                   iscale = NULL,
-                   ishape2.p = 1.0, 
-                   zero = NULL) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-      stop("bad input for argument 'zero'")
 
 
-  lshape1.a <- as.list(substitute(lshape1.a))
-  eshape1.a <- link2list(lshape1.a)
-  lshape1.a <- attr(eshape1.a, "function.name")
 
-  lshape2.p <- as.list(substitute(lshape2.p))
-  eshape2.p <- link2list(lshape2.p)
-  lshape2.p <- attr(eshape2.p, "function.name")
 
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
 
 
 
-  new("vglmff",
-  blurb = c("Dagum distribution\n\n",
-          "Links:    ",
-          namesof("shape1.a",     lshape1.a,     earg = eshape1.a), ", ", 
-          namesof("scale",        lscale,        earg = escale), ", ", 
-          namesof("shape2.p",     lshape2.p,     earg = eshape2.p), "\n", 
-        "Mean:     scale * gamma(shape2.p + 1/shape1.a) * ",
-                  "gamma(1 - 1/shape1.a) / ",
-                  "gamma(shape2.p)"),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
-  }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
+ dagum     <- function(lscale    = "loge",
+                       lshape1.a = "loge",
+                       lshape2.p = "loge",
+                       iscale    = NULL,
+                       ishape1.a = NULL,
+                       ishape2.p = NULL,
+                       imethod   = 1,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       gshape2.p = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = ifelse(lss, -(2:3), -c(1, 3))) {
 
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
 
-    predictors.names <-
-      c(namesof("shape1.a", .lshape1.a , earg = .eshape1.a , tag = FALSE),
-        namesof("scale",    .lscale ,    earg = .escale ,    tag = FALSE),
-        namesof("shape2.p", .lshape2.p , earg = .eshape2.p , tag = FALSE))
-
-    if (!length( .ishape1.a) || !length( .iscale )) {
-        qvec <- c( .25, .5, .75)  # Arbitrary; could be made an argument
-        ishape2.p <- if (length( .ishape2.p)) .ishape2.p else 1
-        xvec <- log( qvec^(-1/ ishape2.p ) - 1 )
-        fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
 
-        if (!length(etastart)) {
-          parg <- rep(if (length( .ishape2.p )) .ishape2.p else 1.0,
-                      length.out = n)
-          aa <- rep(if (length( .ishape1.a )) .ishape1.a else
-                    -1/fit0$coef[2],
-                   length.out = n)
-          scale <- rep(if (length( .iscale )) .iscale else
-                       exp(fit0$coef[1]),
-                       length.out = n)
 
 
-      outOfRange <- (parg + 1/aa <= 0)
-      parg[outOfRange] <- 1 / aa[outOfRange] + 1
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(ishape2.p) && !is.Numeric(ishape2.p, positive = TRUE))
+    stop("Bad input for argument 'ishape2.p'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
+  lscale <- as.list(substitute(lscale))
+  escale <- link2list(lscale)
+  lscale <- attr(escale, "function.name")
+  
+  lshape1.a <- as.list(substitute(lshape1.a))
+  eshape1.a <- link2list(lshape1.a)
+  lshape1.a <- attr(eshape1.a, "function.name")
+  
+  lshape2.p <- as.list(substitute(lshape2.p))
+  eshape2.p <- link2list(lshape2.p)
+  lshape2.p <- attr(eshape2.p, "function.name")
+  
+
+  new("vglmff", 
+  blurb = 
+    c("Dagum distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), ", ",
+      namesof("shape2.p" , lshape2.p, earg = eshape2.p), "\n",
+      "Mean:     scale * gamma(shape2.p + 1/shape1.a) * ",
+                "gamma(1 - 1/shape1.a) / ",
+                "gamma(shape2.p)"),
+  constraints = eval(substitute(expression({
+    dotzero <- .zero
+    M1 <- 3
+    eval(negzero.expression.VGAM)
+  }), list( .zero = zero ))),
+  infos = eval(substitute(function(...) {
+    list(M1 = 3,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale    = .lscale    , lshape1.a = .lshape1.a ,
+         escale    = .escale    , eshape1.a = .eshape1.a ,
+         lshape2.p = .lshape2.p ,                         
+         eshape2.p = .eshape2.p ,                         
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+           .lshape2.p = lshape2.p,                        
+           .eshape2.p = eshape2.p,                        
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 3  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
+    sha2.names  <- param.names("shape2.p",  NOS)
+
+    predictors.names <- c(
+      if ( .lss ) {
+        c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+          namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+      } else {
+        c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+          namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
+      },
+      namesof(sha2.names , .lshape2.p , earg = .eshape2.p , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
     
+    if (!length(etastart)) {
+      sc.init <-
+      aa.init <-
+      pp.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
+
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          gshape2.p  <- .gshape2.p        
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          if (length( .ishape2.p ))
+            gshape2.p <-  rep( .ishape2.p , length = NOS)
+          allmat1 <- expand.grid(shape1.a = gshape1.a,
+                                 shape2.p = gshape2.p)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.dagu <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = extraargs$Shape2.p,
+                                         shape3.q = 1,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.dagu,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1],
+                                             Shape2.p = allmat1[iloc, 2]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+          pp.init[, spp.] <- allmat1[ind5, 2]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          ishape2.p <- if (length( .ishape2.p )) .ishape2.p else 1
+          xvec <- log( qvec^(-1/ ishape2.p) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec,  qvec)))
+          sc.init[, spp.] <- if (length( .iscale )) .iscale else
+                             exp(fit0$coef[1])
+          aa.init[, spp.] <- if (length( .ishape1.a )) .ishape1.a else
+                             -1/fit0$coef[2]
+          pp.init[, spp.] <- ishape2.p
+      }
+    }  # End of for (spp. ...)
 
 
 
+      finite.mean <- 1 < aa.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init
+      }
 
       etastart <-
-        cbind(theta2eta(aa,    .lshape1.a ,  earg = .eshape1.a ),
-              theta2eta(scale, .lscale ,     earg = .escale ),
-              theta2eta(parg,  .lshape2.p ,  earg = .eshape2.p ))
-    }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .lshape2.p = lshape2.p,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape2.p = eshape2.p,
-            .ishape1.a = ishape1.a, .iscale = iscale, 
-            .ishape2.p = ishape2.p ))),
+        cbind(if ( .lss )
+              cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                    theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+              cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                    theta2eta(sc.init, .lscale    , earg = .escale    )),
+              theta2eta(pp.init , .lshape2.p , earg = .eshape2.p ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
+            .lshape2.p = lshape2.p,                        
+            .eshape2.p = eshape2.p,                        
+            .ishape2.p = ishape2.p,
+            .gshape2.p = gshape2.p,
+            .imethod   = imethod   , .probs.y = probs.y,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a ,  earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale ,     earg = .escale )
-    parg   <- eta2theta(eta[, 3], .lshape2.p ,  earg = .eshape2.p )
+    M1 <- 3
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    parg <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p )
+
     qq     <- 1
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
@@ -3389,49 +3967,82 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[Scale       <= 0] <- NA
     ans[parg        <= 0] <- NA
     ans
-    }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-             .eshape1.a = eshape1.a, .escale = escale, 
-             .eshape2.p = eshape2.p,
-             .lshape2.p = lshape2.p ))),
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lshape2.p = lshape2.p,
+           .eshape2.p = eshape2.p,
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-    c(shape1.a = .lshape1.a , scale = .lscale ,
-                      p = .lshape2.p )
+    M1 <- 3
 
-    misc$earg <- list(shape1.a = .eshape1.a , scale = .escale ,
-                      p = .eshape2.p )
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly),
+                   rep( .lshape2.p , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names, sha2.names)
+    } else {
+      c(sha1.names, scaL.names, sha2.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      if ( .lss ) {
+        misc$earg[[M1*ii-2]] <- .escale
+        misc$earg[[M1*ii-1]] <- .eshape1.a
+      } else {
+        misc$earg[[M1*ii-2]] <- .eshape1.a
+        misc$earg[[M1*ii-1]] <- .escale
+      }
+      misc$earg[[M1*ii  ]] <- .eshape2.p
+    }
+
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
             .lshape2.p = lshape2.p,
-            .eshape1.a = eshape1.a, .escale = escale,
-            .eshape2.p = eshape2.p ))),
+            .eshape2.p = eshape2.p,
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa    <- eta2theta(eta[, 1], .lshape1.a ,  earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,     earg = .escale )
-    parg  <- eta2theta(eta[, 3], .lshape2.p ,  earg = .eshape2.p )
-    qq <- 1
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * ddagum(x = y, shape1.a = aa, scale = Scale,
-                               shape2.p = parg, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 3
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .lshape2.p = lshape2.p, 
-           .eshape1.a = eshape1.a, .escale = escale,
-           .eshape2.p = eshape2.p ))),
+      parg   <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lshape2.p , earg = .eshape2.p )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = 1, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lshape2.p = lshape2.p,
+             .eshape2.p = eshape2.p,
+             .lss = lss ))),
   vfamily = c("dagum"),
 
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -3440,163 +4051,321 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
     eta <- predict(object)
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg  <- eta2theta(eta[, 3], .lshape2.p,  earg = .eshape2.p )
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p)
+    qq   <- 1
     rdagum(nsim * length(parg), shape1.a = aa, scale = Scale,
            shape2.p = parg)
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .lshape2.p = lshape2.p, 
-           .eshape1.a = eshape1.a, .escale = escale,
-           .eshape2.p = eshape2.p ))),
+  }, list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lshape2.p = lshape2.p,
+            .eshape2.p = eshape2.p,
+            .lss = lss ))),
 
 
   deriv = eval(substitute(expression({
-    aa    <- eta2theta(eta[, 1], .lshape1.a ,  earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,     earg = .escale )
-    parg  <- eta2theta(eta[, 3], .lshape2.p ,  earg = .eshape2.p )
-    qq <- 1
-
-    temp1 <- log(y / Scale)
-    temp2 <- (y / Scale)^aa
+    M1 <- 3
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p)
+    qq   <- 1
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
-
-    dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
+    temp4 <- log1p(temp2)
+  
     dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
-    dl.dp <- aa * temp1 + digamma(parg + qq) - temp3a - log1p(temp2)
-
-    da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
+    dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
+    dl.dp <- aa * temp1 + temp3 - temp3a - temp4
+    
     dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
+    da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
     dp.deta     <- dtheta.deta(parg,  .lshape2.p , earg = .eshape2.p )
-
-    c(w) * cbind( dl.da     * da.deta,
-                  dl.dscale * dscale.deta,
-                  dl.dp     * dp.deta )
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .eshape2.p = eshape2.p,
-            .lshape2.p = lshape2.p ))),
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta,
+                   dl.dp * dp.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta,
+                   dl.dp * dp.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lshape2.p = lshape2.p,
+             .eshape2.p = eshape2.p,
+             .lss = lss ))),
   weight = eval(substitute(expression({
-    ned2l.da <- (1 + parg+qq + parg * qq * (trigamma(parg) + trigamma(qq) +
-                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 - 
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
+
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
+                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
                 (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
-    ned2l.dscale <- aa^2 * parg * qq / (Scale^2 * (1+parg+qq))
-    ned2l.dp <- 1 / parg^2 
-    ned2l.dascale <- (parg - qq - parg * qq *(temp3a -temp3b)
-                   ) / (Scale * (1 + parg+qq))
-    ned2l.dap= -(qq   * (temp3a -temp3b) -1) / (aa*(parg+qq))
-    ned2l.dscalep <-  aa * qq   / (Scale * (parg + qq))
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M==3 means 6=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da     * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dp     * dp.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
-    wz[, iam(1, 3, M)] <- ned2l.dap * da.deta * dp.deta
-    wz[, iam(2, 3, M)] <- ned2l.dscalep * dscale.deta * dp.deta
-    wz <- c(w) * wz
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dp      <- temp5a - temp5
+    ned2l.dascale <- (parg - qq - parg * qq *
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    ned2l.dap     <- -(qq   * (temp3a -temp3b) -1) / (aa*(parg+qq))
+    ned2l.dscalep <-  aa * qq   / (Scale*(parg+qq))
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.dap * da.deta * dp.deta,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta,
+              c(w) * ned2l.dap * da.deta * dp.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+            .lshape2.p = lshape2.p,
             .eshape2.p = eshape2.p,
-            .lshape2.p = lshape2.p ))))
+            .lss = lss ))))
 }
 
 
 
- betaII <-
-   function(  # lss,
-            lscale = "loge", lshape2.p = "loge", lshape3.q = "loge",
-            iscale = NULL,   ishape2.p = 2,      ishape3.q = 2,
-            zero = NULL) {
 
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
+    betaII <- function(lscale    = "loge",
+                       lshape2.p = "loge",
+                       lshape3.q = "loge",
+                       iscale    = NULL,
+                       ishape2.p = NULL,
+                       ishape3.q = NULL,
+                       imethod   = 1,
+                       gscale    = exp(-5:5),
+                       gshape2.p = exp(-5:5),
+                       gshape3.q = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = -(2:3)) {
 
 
 
+
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale   ) && !is.Numeric(iscale   ,  positive = TRUE))
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape2.p) && !is.Numeric(ishape2.p, positive = TRUE))
+    stop("Bad input for argument 'ishape2.p'")
+  
+  if (length(ishape3.q) && !is.Numeric(ishape3.q, positive = TRUE))
+    stop("Bad input for argument 'ishape3.q'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
+  lscale <- as.list(substitute(lscale))
+  escale <- link2list(lscale)
+  lscale <- attr(escale, "function.name")
+  
   lshape2.p <- as.list(substitute(lshape2.p))
   eshape2.p <- link2list(lshape2.p)
   lshape2.p <- attr(eshape2.p, "function.name")
-
+  
   lshape3.q <- as.list(substitute(lshape3.q))
   eshape3.q <- link2list(lshape3.q)
   lshape3.q <- attr(eshape3.q, "function.name")
-
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
-
-
-
-  new("vglmff",
-  blurb = c("Beta II distribution\n\n",
-          "Links:    ",
-          namesof("scale",    lscale,    earg = escale), ", ", 
-          namesof("shape2.p", lshape2.p, earg = eshape2.p), ", ", 
-          namesof("shape3.q", lshape3.q, earg = eshape3.q), "\n", 
-        "Mean:     scale * gamma(shape2.p + 1) * ",
-                  "gamma(shape3.q - 1) / ",
-                  "(gamma(shape2.p) * gamma(shape3.q))"),
+   
+
+  new("vglmff", 
+  blurb = 
+    c("Beta II distribution \n\n",
+      "Links:    ", 
+      namesof("scale"    , lscale   , earg = escale   ), ", ",
+      namesof("shape2.p" , lshape2.p, earg = eshape2.p), ", ",
+      namesof("shape3.q" , lshape3.q, earg = eshape3.q), "\n",
+      "Mean:     scale * gamma(shape2.p + 1) * ",
+                "gamma(shape3.q - 1) / ",
+                "(gamma(shape2.p) * gamma(shape3.q))"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 3
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
+  infos = eval(substitute(function(...) {
+    list(M1 = 3,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale    = .lscale    ,
+         escale    = .escale    ,
+         lshape2.p = .lshape2.p , lshape3.q = .lshape3.q ,
+         eshape2.p = .eshape2.p , eshape3.q = .eshape3.q ,
+         .zero = zero )
+  }, list( .lscale = lscale      ,
+           .escale = escale      ,
+           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+           .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 3  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha2.names  <- param.names("shape2.p",  NOS)
+    sha3.names  <- param.names("shape3.q",  NOS)
 
     predictors.names <-
-    c(namesof("scale",    .lscale ,   earg = .escale ,   tag = FALSE),
-      namesof("shape2.p", .lshape2.p, earg = .eshape2.p, tag = FALSE),
-      namesof("shape3.q", .lshape3.q, earg = .eshape3.q, tag = FALSE))
-
-    if (!length( .iscale )) {
-      qvec <- c(0.25, .5, .75)  # Arbitrary; could be made an argument
-      ishape3.q <- if (length( .ishape3.q)) .ishape3.q else 1
-      xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
+      c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+        namesof(sha2.names , .lshape2.p , earg = .eshape2.p , tag = FALSE),
+        namesof(sha3.names , .lshape3.q , earg = .eshape3.q , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      scale <- rep(if (length( .iscale )) .iscale else
-                   exp(fit0$coef[1]),
-                   length.out = n)
-      qq    <- rep(if (length( .ishape3.q)) .ishape3.q else 1.0,
-                   length.out = n)
-      parg  <- rep(if (length( .ishape2.p)) .ishape2.p else 1.0,
-                   length.out = n)
+      sc.init <-
+      pp.init <-
+      qq.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
+
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape2.p  <- .gshape2.p        
+          gshape3.q  <- .gshape3.q
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape2.p ))
+            gshape2.p <-  rep( .ishape2.p , length = NOS)
+          if (length( .ishape3.q ))
+            gshape3.q <-  rep( .ishape3.q , length = NOS)
+          allmat1 <- expand.grid(shape2.p = gshape2.p,
+                                 shape3.q = gshape3.q)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.beII <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = 1,
+                                         shape2.p = extraargs$Shape2.p,
+                                         shape3.q = extraargs$Shape3.q,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.beII,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape2.p = allmat1[iloc, 1],
+                                             Shape3.q = allmat1[iloc, 2]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          pp.init[, spp.] <- allmat1[ind5, 1]
+          qq.init[, spp.] <- allmat1[ind5, 2]
+      } else {  # .imethod == 2
+
+        sc.init[, spp.] <- if (length( .iscale )) .iscale else {
+          qvec <- .probs.y
+          ishape3.q <- if (length( .ishape3.q )) .ishape3.q else 1
+          xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec, qvec )))
+          exp(fit0$coef[1])
+        }
+        pp.init[, spp.] <- if (length( .ishape2.p )) .ishape2.p else 1.0
+        qq.init[, spp.] <- if (length( .ishape3.q )) .ishape3.q else 1.0
+       }
+     }  # End of for (spp. ...)
 
 
 
-      aa     <- 1
-      outOfRange <- (parg + 1/aa <= 0)
-      parg[outOfRange] <- 1 / aa[outOfRange] + 1
-      outOfRange <- (qq   - 1/aa <= 0)
-      qq[outOfRange] <- 1 / aa + 1
 
+      finite.mean <- 1 < qq.init
+      COP.use <- 1.15
+      while (any(!finite.mean)) {
+        qq.init[!finite.mean] <- 0.1 + qq.init[!finite.mean] * COP.use
+        finite.mean <- 1 < qq.init
+      }
 
       etastart <-
-        cbind(theta2eta(scale, .lscale ,   earg = .escale ),
-              theta2eta(parg,  .lshape2.p, earg = .eshape2.p ),
-              theta2eta(qq,    .lshape3.q, earg = .eshape3.q))
-    }
-  }), list( .lscale = lscale,
-            .escale = escale, 
+        cbind(theta2eta(sc.init , .lscale    , earg = .escale    ),
+              theta2eta(pp.init , .lshape2.p , earg = .eshape2.p ),
+              theta2eta(qq.init , .lshape3.q , earg = .eshape3.q ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .iscale    = iscale   ,
+            .gscale    = gscale   ,
             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-            .iscale = iscale, 
-            .ishape2.p = ishape2.p,
-            .ishape3.q = ishape3.q ))),
+            .ishape2.p = ishape2.p, .ishape3.q = ishape3.q,
+            .gshape2.p = gshape2.p, .gshape3.q = gshape3.q,
+            .imethod   = imethod   , .probs.y = probs.y
+                       ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- 1
-    Scale  <- eta2theta(eta[, 1], .lscale , earg = .escale )
-    parg   <- eta2theta(eta[, 2], .lshape2.p, earg = .eshape2.p )
-    qq     <- eta2theta(eta[, 3], .lshape3.q, earg = .eshape3.q)
+    M1 <- 3
+    NOS <- ncol(eta)/M1
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                       .lscale ,  earg = .escale )
+    aa    <- 1
+    parg  <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                      .lshape2.p , earg = .eshape2.p )
+    qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                      .lshape3.q , earg = .eshape3.q )
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
                        lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
@@ -3606,215 +4375,303 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[parg        <= 0] <- NA
     ans[qq          <= 0] <- NA
     ans
-    }, list( .lscale = lscale,
-             .escale = escale, 
-             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+  }, list( .lscale    = lscale   ,
+           .escale    = escale   ,
+           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+           .eshape2.p = eshape2.p, .eshape3.q = eshape3.q
+                      ))),
   last = eval(substitute(expression({
-    misc$link <-    c(scale    = .lscale ,
-                      shape2.p = .lshape2.p,
-                      shape3.q = .lshape3.q)
+    M1 <- 3
 
-    misc$earg <- list(scale    = .escale ,
-                      shape2.p = .eshape2.p,
-                      shape3.q = .eshape3.q)
+    misc$link <- c(rep( .lscale    , length = ncoly),
+                   rep( .lshape2.p , length = ncoly),
+                   rep( .lshape3.q , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- c(scaL.names,             sha2.names, sha3.names)
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list(
-    .lscale = lscale, .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
-    .escale = escale, .eshape2.p = eshape2.p, .eshape3.q = eshape3.q ))),
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      misc$earg[[M1*ii-2]] <- .escale
+      misc$earg[[M1*ii-1]] <- .eshape2.p
+      misc$earg[[M1*ii  ]] <- .eshape3.q
+    }
+
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q
+                       ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa <- 1
-    scale <- eta2theta(eta[, 1], .lscale ,   earg = .escale )
-    parg  <- eta2theta(eta[, 2], .lshape2.p, earg = .eshape2.p )
-    qq    <- eta2theta(eta[, 3], .lshape3.q, earg = .eshape3.q)
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <-
-        c(w) * (log(aa) + (aa*parg-1)*log(y) - aa*parg*log(scale) +
-               (-lbeta(parg, qq)) - (parg+qq)*log1p((y/scale)^aa))
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 3
+      NOS <- ncol(eta)/M1
+      Scale  <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                          .lscale    , earg = .escale    )
+      aa     <- 1
+      parg   <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                          .lshape2.p , earg = .eshape2.p )
+      qq     <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lshape3.q , earg = .eshape3.q )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
       } else {
-        ll.elts
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = qq, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
       }
-    }
-  }, list( .lscale = lscale,
-           .escale = escale, 
-           .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-           .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+    }, list( .lscale    = lscale   ,
+             .escale    = escale   ,
+             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q
+                        ))),
   vfamily = c("betaII"),
-  deriv = eval(substitute(expression({
-    aa <- 1
-    scale <- eta2theta(eta[, 1], .lscale   , earg = .escale )
-    parg  <- eta2theta(eta[, 2], .lshape2.p, earg = .eshape2.p )
-    qq    <- eta2theta(eta[, 3], .lshape3.q, earg = .eshape3.q)
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                       .lscale    , earg = .escale    )
+    aa    <- 1
+    parg  <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lshape2.p , earg = .eshape2.p )
+    qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape3.q , earg = .eshape3.q )
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
     temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
     temp4 <- log1p(temp2)
-
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
     dl.dp <- aa * temp1 + temp3 - temp3a - temp4
     dl.dq <- temp3 - temp3b - temp4
-
-    dscale.deta <- dtheta.deta(scale, .lscale , earg = .escale )
-    dp.deta <- dtheta.deta(parg, .lshape2.p, earg = .eshape2.p )
-    dq.deta <- dtheta.deta(qq, .lshape3.q, earg = .eshape3.q)
-
-    c(w) * cbind( dl.dscale * dscale.deta,
-                  dl.dp * dp.deta,
-                  dl.dq * dq.deta )
-  }), list( .lscale = lscale,
-            .escale = escale, 
-            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q,
-            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q ))),
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
+    dp.deta     <- dtheta.deta(parg,  .lshape2.p , earg = .eshape2.p )
+    dq.deta     <- dtheta.deta(qq,    .lshape3.q , earg = .eshape3.q )
+    
+    myderiv <-
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.dp * dp.deta,
+                   dl.dq * dq.deta)
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   ,
+             .escale    = escale   ,
+             .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+             .eshape2.p = eshape2.p, .eshape3.q = eshape3.q
+                        ))),
   weight = eval(substitute(expression({
     temp5  <- trigamma(parg + qq)
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1+parg+qq))
-    ned2l.dp <- trigamma(parg) - temp5
-    ned2l.dq <- trigamma(qq) - temp5
-    ned2l.dscalep <-  aa * qq   / (scale*(parg+qq))
-    ned2l.dscaleq <- -aa * parg / (scale*(parg+qq))
-    ned2l.dpq <- -temp5
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M==3 means 6=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dp * dp.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dq * dq.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dscalep * dscale.deta * dp.deta
-    wz[, iam(1, 3, M)] <- ned2l.dscaleq * dscale.deta * dq.deta
-    wz[, iam(2, 3, M)] <- ned2l.dpq * dp.deta * dq.deta
-    wz <- c(w) * wz
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
+
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dp      <- temp5a - temp5
+    ned2l.dq      <- temp5b - temp5
+    ned2l.dscalep <-  aa * qq   / (Scale*(parg+qq))
+    ned2l.dscaleq <- -aa * parg / (Scale*(parg+qq))
+    ned2l.dpq     <- -temp5
+    wz <- 
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta,
+              c(w) * ned2l.dpq * dp.deta * dq.deta,  # Switched!!
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list(
-    .lscale = lscale, .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
-    .escale = escale, .eshape2.p = eshape2.p, .eshape3.q = eshape3.q ))))
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .lshape2.p = lshape2.p, .lshape3.q = lshape3.q,
+            .eshape2.p = eshape2.p, .eshape3.q = eshape3.q
+                       ))))
 }
 
 
 
- lomax <- function(  # lss,
-                   lscale = "loge",    lshape3.q = "loge",
-                   iscale = NULL,      ishape3.q = NULL,  # 2.0, 
-                                       gshape3.q = exp(-5:5),
-                   zero = NULL) {
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
+
+ lomax     <- function(lscale    = "loge",
+                       lshape3.q = "loge",
+                       iscale    = NULL,
+                       ishape3.q = NULL,
+                       imethod   = 1,
+                       gscale    = exp(-5:5),
+                       gshape3.q = exp(-5:5),  # Finite mean only if qq>1
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = -2) {
 
 
 
-  lshape3.q <- as.list(substitute(lshape3.q))
-  eshape3.q <- link2list(lshape3.q)
-  lshape3.q <- attr(eshape3.q, "function.name")
 
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape3.q) && !is.Numeric(ishape3.q, positive = TRUE))
+    stop("Bad input for argument 'ishape3.q'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
   lscale <- as.list(substitute(lscale))
   escale <- link2list(lscale)
   lscale <- attr(escale, "function.name")
-
-
-
-  new("vglmff",
-  blurb = c("Lomax distribution\n\n",
-          "Links:    ",
-          namesof("scale",    lscale,    earg = escale), ", ", 
-          namesof("shape3.q", lshape3.q, earg = eshape3.q), "\n", 
-          "Mean:     scale / (shape3.q - 1)"),
+  
+  lshape3.q <- as.list(substitute(lshape3.q))
+  eshape3.q <- link2list(lshape3.q)
+  lshape3.q <- attr(eshape3.q, "function.name")
+   
+
+  new("vglmff", 
+  blurb = 
+    c("Lomax distribution \n\n",
+      "Links:    ", 
+      namesof("scale"    , lscale   , earg = escale   ), ", ",
+      namesof("shape3.q" , lshape3.q, earg = eshape3.q), "\n",
+      "Mean:     scale / (shape3.q - 1)"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-    if (ncol(cbind(y)) != 1)
-        stop("response must be a vector or a one-column matrix")
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale = .lscale ,
+         escale = .escale ,
+                                  lshape3.q = .lshape3.q ,
+                                  eshape3.q = .eshape3.q ,
+         .zero = zero )
+  }, list( .lscale = lscale      ,
+           .escale = escale      ,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 2  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha3.names  <- param.names("shape3.q",  NOS)
 
-    mynames1 <- "scale"
-    mynames2 <- "shape3.q"
     predictors.names <-
-      c(namesof(mynames1,  .lscale    , earg = .escale ,    tag = FALSE),
-        namesof(mynames2,  .lshape3.q , earg = .eshape3.q , tag = FALSE))
-
-    aa <- parg <- 1
-
-
-
-                   
-    lomax.Loglikfun <- function(shape3.q, y, x, w, extraargs) {
-      qvec <- c(0.25, 0.5, 0.75)  # Arbitrary; could be made an argument
-      xvec <- log( (1-qvec)^(-1/ shape3.q ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-      init.scale <- exp(fit0$coef[1])
-
-      ans <- sum(c(w) * dlomax(x = y, shape3.q = shape3.q,
-                               scale = init.scale, log = TRUE))
-      ans
-    }
-
-    shape3.q.grid <- .gshape3.q
-    yvec <- y
-    wvec <- w
-    Init.shape3.q <-
-        grid.search(shape3.q.grid,
-                    objfun = lomax.Loglikfun,
-                    y = yvec, x = x, w = wvec,
-                    extraargs = NULL)
-
-
-
-
-    if (!length( .iscale )) {
-      qvec <- c(0.25, 0.5, 0.75)  # Arbitrary; could be made an argument
-      ishape3.q <- if (length( .ishape3.q )) .ishape3.q else Init.shape3.q
-      xvec <- log( (1-qvec)^(-1/ ishape3.q ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
+      c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+        namesof(sha3.names , .lshape3.q , earg = .eshape3.q , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      qq    <- rep(if (length( .ishape3.q)) .ishape3.q else Init.shape3.q,
-                   length.out = n)
-      scale <- rep(if (length( .iscale )) .iscale else
-                   exp(fit0$coef[1]),
-                   length.out = n)
-
-
-      aa     <- 1
-      outOfRange <- (qq   - 1/aa <= 0)
-      qq[outOfRange] <- 1 / aa + 1
+      sc.init <-
+      qq.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
 
+        if ( .imethod == 1) {
+          gscale     <- .gscale
+          gshape3.q  <- .gshape3.q
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape3.q ))
+            gshape3.q <-  rep( .ishape3.q , length = NOS)
+          allmat1 <- cbind(shape3.q = gshape3.q)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.lomx <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = 1,
+                                         shape2.p = 1,
+                                         shape3.q = extraargs$Shape3.q,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.lomx,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape3.q = allmat1[iloc, 1]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          qq.init[, spp.] <- allmat1[ind5, 1]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          iscale <- if (length( .iscale )) .iscale else 1
+          xvec <- log1p( quantile(yvec / iscale, probs = qvec) )
+          fit0 <- lsfit(x = xvec, y = -log1p(-qvec), intercept = FALSE)
+          sc.init[, spp.] <- iscale
+          qq.init[, spp.] <- if (length( .ishape3.q )) .ishape3.q else
+                             fit0$coef
+        }
+      }  # End of for (spp. ...)
 
+      finite.mean <- 1 < qq.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        qq.init[!finite.mean] <- 0.1 + qq.init[!finite.mean] * COP.use
+        finite.mean <- 1 < qq.init
+      }
 
       etastart <-
-        cbind(theta2eta(scale, .lscale   , earg = .escale ),
-              theta2eta(qq,    .lshape3.q, earg = .eshape3.q))
-    }
-  }), list( .lscale = lscale, .lshape3.q = lshape3.q,
-            .escale = escale, .eshape3.q = eshape3.q,
-                              .gshape3.q = gshape3.q,
-            .iscale = iscale, .ishape3.q = ishape3.q ))),
+        cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+              theta2eta(qq.init, .lshape3.q , earg = .eshape3.q ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .iscale    = iscale   ,
+            .gscale    = gscale   ,
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q,
+                                    .ishape3.q = ishape3.q,
+                                    .gshape3.q = gshape3.q,
+            .imethod   = imethod   , .probs.y  = probs.y
+                       ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- 1
-    Scale  <- eta2theta(eta[, 1], .lscale    , earg = .escale )
-    parg   <- 1
-    qq     <- eta2theta(eta[, 2], .lshape3.q , earg = .eshape3.q )
-
-
-
-
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale    , earg = .escale    )
+    aa    <- 1
+    parg  <- 1
+    qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape3.q , earg = .eshape3.q )
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
                        lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
@@ -3823,42 +4680,64 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[Scale       <= 0] <- NA
     ans[qq          <= 0] <- NA
     ans
-  }, list( .lscale = lscale, .lshape3.q = lshape3.q,
-           .escale = escale, .eshape3.q = eshape3.q ))),
+  }, list( .lscale    = lscale   ,
+           .escale    = escale   ,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q
+                      ))),
   last = eval(substitute(expression({
-    misc$link <-    c(scale = .lscale , shape3.q = .lshape3.q )
+    M1 <- 2
 
-    misc$earg <- list(scale = .escale , shape3.q = .eshape3.q )
+    misc$link <- c(rep( .lscale    , length = ncoly),
+                   rep( .lshape3.q , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <-
+      c(scaL.names,                         sha3.names)
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      misc$earg[[M1*ii-1]] <- .escale
+      misc$earg[[M1*ii  ]] <- .eshape3.q
+    }
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lscale = lscale, .lshape3.q = lshape3.q,
-            .escale = escale, .eshape3.q = eshape3.q ))),
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q
+                       ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa <- 1
-    scale <- eta2theta(eta[, 1], .lscale ,    earg = .escale )
-    parg <- 1
-    qq <-    eta2theta(eta[, 2], .lshape3.q,  earg = .eshape3.q )
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dlomax(x = y, scale = scale,
-                               shape3.q = qq, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 2
+      NOS <- ncol(eta)/M1
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale    )
+      aa    <- 1
+      parg  <- 1
+      qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape3.q , earg = .eshape3.q )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
       } else {
-        ll.elts
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = qq, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
       }
-    }
-  }, list( .lscale = lscale, .lshape3.q = lshape3.q,
-           .escale = escale, .eshape3.q = eshape3.q ))),
+    }, list( .lscale    = lscale   ,
+             .escale    = escale   ,
+                                     .lshape3.q = lshape3.q,
+                                     .eshape3.q = eshape3.q
+                        ))),
   vfamily = c("lomax"),
-
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -3866,173 +4745,340 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
               pwts else weights(object, type = "prior")
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
+
     eta <- predict(object)
-    scale <- eta2theta(eta[, 1], .lscale ,    earg = .escale )
-    qq <-    eta2theta(eta[, 2], .lshape3.q,  earg = .eshape3.q )
-    rlomax(nsim * length(qq), scale = scale, shape3.q = qq)
-  }, list( .lscale = lscale, .lshape3.q = lshape3.q,
-           .escale = escale, .eshape3.q = eshape3.q ))),
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale ,  earg = .escale )
+    qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape3.q , earg = .eshape3.q )
+
+    rlomax(nsim * length(qq), scale = Scale, shape3.q = qq)
+  }, list( .lscale    = lscale   ,
+           .escale    = escale   ,
+                                   .lshape3.q = lshape3.q,
+                                   .eshape3.q = eshape3.q
+                      ))),
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale    , earg = .escale   )
+    aa    <- 1
+    parg  <- 1
+    qq    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape3.q , earg = .eshape3.q)
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
+    temp3a <- digamma(parg)
+    temp3b <- digamma(qq)
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
+    dl.dq <- temp3 - temp3b - temp4
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
+    dq.deta     <- dtheta.deta(qq,    .lshape3.q , earg = .eshape3.q )
+    
+    myderiv <-
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.dq * dq.deta)
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   ,
+             .escale    = escale   ,
+                                     .lshape3.q = lshape3.q,
+                                     .eshape3.q = eshape3.q
+                        ))),
+  weight = eval(substitute(expression({
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
 
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dq      <- temp5b - temp5
+    ned2l.dscaleq <- -aa * parg / (Scale*(parg+qq))
+    wz <-
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dq * dq.deta^2,
+              c(w) * ned2l.dscaleq * dscale.deta * dq.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    wz <- arwz2wz(wz, M = M, M1 = M1)
+    wz
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+                                    .lshape3.q = lshape3.q,
+                                    .eshape3.q = eshape3.q
+                       ))))
+}
 
-  deriv = eval(substitute(expression({
-    aa <- 1
-    scale <- eta2theta(eta[, 1], .lscale , earg = .escale )
-    parg <- 1
-    qq <- eta2theta(eta[, 2], .lshape3.q, earg = .eshape3.q)
-    temp2 <- (y/scale)^aa
 
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
-    dl.dq <- digamma(parg + qq) - digamma(qq) - log1p(temp2)
 
-    dscale.deta <- dtheta.deta(scale, .lscale ,   earg = .escale )
-    dq.deta     <- dtheta.deta(qq,    .lshape3.q, earg = .eshape3.q)
 
-    c(w) * cbind( dl.dscale * dscale.deta,
-                  dl.dq * dq.deta )
-  }), list( .lscale = lscale, .lshape3.q = lshape3.q,
-            .escale = escale, .eshape3.q = eshape3.q ))),
-  weight = eval(substitute(expression({
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1+parg+qq))
-    ned2l.dq <- 1/qq^2 
-    ned2l.dscaleq <- -aa * parg / (scale*(parg+qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M == 2 means 3=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dq * dq.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dscaleq * dscale.deta * dq.deta
-    wz <- c(w) * wz
-    wz
-  }), list( .lscale = lscale, .lshape3.q = lshape3.q,
-            .escale = escale, .eshape3.q = eshape3.q ))))
-}
 
 
 
- fisk <- function(lss,
-                  lshape1.a = "loge", lscale = "loge",
-                  ishape1.a = NULL,   iscale = NULL,
-                  zero = NULL) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
+  fisk     <- function(lscale    = "loge",
+                       lshape1.a = "loge",
+                       iscale    = NULL,
+                       ishape1.a = NULL,
+                       imethod   = 1,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = ifelse(lss, -2, -1)) {
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
 
 
-  lshape1.a <- as.list(substitute(lshape1.a))
-  eshape1.a <- link2list(lshape1.a)
-  lshape1.a <- attr(eshape1.a, "function.name")
 
+
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
   lscale <- as.list(substitute(lscale))
   escale <- link2list(lscale)
   lscale <- attr(escale, "function.name")
+  
+  lshape1.a <- as.list(substitute(lshape1.a))
+  eshape1.a <- link2list(lshape1.a)
+  lshape1.a <- attr(eshape1.a, "function.name")
+  
 
-
-
-  new("vglmff",
-  blurb = c("Fisk distribution\n\n",
-            "Links:    ",
-            namesof("shape1.a", lshape1.a, earg = eshape1.a), ", ", 
-            namesof("scale",    lscale,    earg = escale), "\n", 
-            "Mean:     scale * gamma(1 + 1/shape1.a) * ",
-                      "gamma(1 - 1/shape1.a)"),
+  new("vglmff", 
+  blurb = 
+    c("Fisk distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), "\n",
+      "Mean:     scale * gamma(1 + 1/shape1.a) * ",
+                "gamma(1 - 1/shape1.a)"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
-    predictors.names <-
-      c(namesof("shape1.a", .lshape1.a , earg = .eshape1.a , tag = FALSE),
-        namesof("scale",    .lscale    , earg = .escale    , tag = FALSE))
-
-    qq <- parg <- 1
-
-    if (!length( .iscale )) {
-      qvec <- c( 0.25, 0.5, 0.75)  # Arbitrary; could be made an argument
-      xvec <- log( 1 / qvec - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale = .lscale ,       lshape1.a = .lshape1.a ,
+         escale = .escale ,       eshape1.a = .eshape1.a ,
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 2  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
+
+    predictors.names <- if ( .lss ) {
+      c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+        namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+    } else {
+      c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+        namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
     }
-
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      aa <- rep(if (length( .ishape1.a)) .ishape1.a else
-                abs(-1 / fit0$coef[2]),
-                length.out = n)
-      scale <- rep(if (length( .iscale )) .iscale else
-                   exp(fit0$coef[1]),
-                   length.out = n)
-
+      sc.init <-
+      aa.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
 
-      parg   <- 1
-      qq     <- 1
-      outOfRange <- (parg + 1/aa <= 0)
-        parg[outOfRange] <- 1 / aa[outOfRange] + 1
-        outOfRange <- (qq   - 1/aa <= 0)
-      qq[outOfRange] <- 1 / aa + 1
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          allmat1 <- cbind(shape1.a = gshape1.a)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.fisk <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = 1,
+                                         shape3.q = 1,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.fisk,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          iscale <- if (length( .iscale )) .iscale else 1
+          xvec <- log( quantile(yvec / iscale, probs = qvec) )
+          fit0 <- lsfit(x = xvec, y = logit(qvec), intercept = FALSE)
+          sc.init[, spp.] <- iscale
+          aa.init[, spp.] <- if (length( .ishape1.a )) .ishape1.a else
+                             fit0$coef
+        }
+      }  # End of for (spp. ...)
 
 
+      finite.mean <- 1 < aa.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init
+      }
 
       etastart <-
-        cbind(theta2eta(aa,    .lshape1.a , earg = .eshape1.a ),
-              theta2eta(scale, .lscale    , earg = .escale ))
-    }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .ishape1.a = ishape1.a, .iscale = iscale ))),
+        if ( .lss )
+          cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+          cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                theta2eta(sc.init, .lscale    , earg = .escale    ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
+            .imethod   = imethod   , .probs.y  = probs.y,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale    , earg = .escale )
-    parg   <- 1
-    qq     <- 1
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    parg <- 1
+    qq   <- 1
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
-                       lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
+                      lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
     ans[parg + 1/aa <= 0] <- NA
     ans[qq   - 1/aa <= 0] <- NA
-    ans[aa          <= 0] <- NA
     ans[Scale       <= 0] <- NA
+    ans[aa          <= 0] <- NA
     ans
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-    c(shape1.a = .lshape1.a , scale = .lscale )
+    M1 <- 2
 
-    misc$earg <- list(shape1.a = .eshape1.a , scale = .escale )
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names)
+    } else {
+      c(sha1.names, scaL.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly)
+      if ( .lss ) {
+        misc$earg[[M1*ii-1]] <- .escale
+        misc$earg[[M1*ii  ]] <- .eshape1.a
+      } else {
+        misc$earg[[M1*ii-1]] <- .eshape1.a
+        misc$earg[[M1*ii  ]] <- .escale
+      }
+
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale    , earg = .escale )
-    parg <- qq <- 1
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dfisk(x = y, shape1.a = aa, scale = Scale, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 2
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dfisk(x = y, scale = Scale, shape1.a = aa, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
   vfamily = c("fisk"),
-
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -4040,168 +5086,326 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
               pwts else weights(object, type = "prior")
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
+
     eta <- predict(object)
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
     rfisk(nsim * length(aa), shape1.a = aa, scale = Scale)
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale ))),
-
-
-  deriv = eval(substitute(expression({
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale    , earg = .escale )
-    parg <- qq <- 1
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lss = lss ))),
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- 1
+    qq   <- 1
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
-
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
     dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
-
-    da.deta     <- dtheta.deta(aa,   .lshape1.a , earg = .eshape1.a )
-    dscale.deta <- dtheta.deta(scale, .lscale   , earg = .escale )
-
-    c(w) * cbind( dl.da * da.deta,
-                  dl.dscale * dscale.deta )
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
-    weight = eval(substitute(expression({
-    ned2l.da <- (1 + parg+qq + parg * qq * (trigamma(parg) + trigamma(qq) +
-                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 - 
-                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1 + parg + qq))
-    ned2l.dascale <- (parg - qq - parg*qq*(temp3a - temp3b)) / (
-                      scale * (1 + parg + qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M == 2 means 3=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
+    da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
+  weight = eval(substitute(expression({
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
 
-    wz <- c(w) * wz
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
+                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
+                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dascale <- (parg - qq - parg * qq *
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))))
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+            .lss = lss ))))
 }
 
 
- inv.lomax <- function(  # lss,
-                       lscale = "loge",
-                       lshape2.p = "loge",
-                       iscale = NULL,
-                       ishape2.p = 1.0, 
-                       zero = NULL) {
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
 
 
-  lshape2.p <- as.list(substitute(lshape2.p))
-  eshape2.p <- link2list(lshape2.p)
-  lshape2.p <- attr(eshape2.p, "function.name")
 
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
 
 
 
 
-  new("vglmff",
-  blurb = c("Inverse Lomax distribution\n\n",
-          "Links:    ",
-          namesof("scale",    lscale,    earg = escale), ", ", 
-          namesof("shape2.p", lshape2.p, earg = eshape2.p), "\n", 
-          "Mean:     does not exist"),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
-  }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
 
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
+ inv.lomax <- function(lscale    = "loge",
+                       lshape2.p = "loge",
+                       iscale    = NULL,
+                       ishape2.p = NULL,
+                       imethod   = 1,
+                       gscale    = exp(-5:5),
+                       gshape2.p = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = -2) {
 
 
 
-    predictors.names <-
-      c(namesof("scale",    .lscale ,   earg = .escale    , tag = FALSE),
-        namesof("shape2.p", .lshape2.p, earg = .eshape2.p , tag = FALSE))
 
-    qq <- aa <- 1
 
-    if (!length( .iscale )) {
-      qvec <- c(0.25, .5, .75)  # Arbitrary; could be made an argument
-      ishape2.p <- if (length( .ishape2.p)) .ishape2.p else 1
-      xvec <- log( qvec^(-1/ ishape2.p ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-        if (!length(etastart)) {
-          scale <- rep(if (length( .iscale )) .iscale else
-                       exp(fit0$coef[1]),
-                       length.out = n)
-          parg <- rep(if (length( .ishape2.p)) .ishape2.p else 1.0,
-                      length.out = n)
 
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape2.p) && !is.Numeric(ishape2.p, positive = TRUE))
+    stop("Bad input for argument 'ishape2.p'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
 
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
+  lscale <- as.list(substitute(lscale))
+  escale <- link2list(lscale)
+  lscale <- attr(escale, "function.name")
+  
+  lshape2.p <- as.list(substitute(lshape2.p))
+  eshape2.p <- link2list(lshape2.p)
+  lshape2.p <- attr(eshape2.p, "function.name")
+  
 
+  new("vglmff", 
+  blurb = 
+    c("Inverse Lomax distribution \n\n",
+      "Links:    ", 
+      namesof("scale"    , lscale   , earg = escale),    ", ",
+      namesof("shape2.p" , lshape2.p, earg = eshape2.p), "\n",
+      "Mean:     does not exist"),
+  constraints = eval(substitute(expression({
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
+  }), list( .zero = zero ))),
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale    = .lscale    ,
+         escale    = .escale    ,
+         lshape2.p = .lshape2.p ,                         
+         eshape2.p = .eshape2.p ,                         
+         .zero = zero )
+  }, list( .lscale = lscale      ,
+           .escale = escale      ,
+           .lshape2.p = lshape2.p,                        
+           .eshape2.p = eshape2.p,                        
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 2  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha2.names  <- param.names("shape2.p",  NOS)
 
-          etastart <-
-            cbind(theta2eta(scale, .lscale ,   earg = .escale ),
-                  theta2eta(parg,  .lshape2.p, earg = .eshape2.p ))
-        }
-    }), list( .lscale = lscale, .lshape2.p = lshape2.p,
-              .escale = escale, .eshape2.p = eshape2.p,
-              .iscale = iscale, .ishape2.p = ishape2.p ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    Scale  <- eta2theta(eta[, 1], .lscale ,   earg = .escale )
-    parg   <- eta2theta(eta[, 2], .lshape2.p, earg = .eshape2.p )
+    predictors.names <-
+      c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+        namesof(sha2.names , .lshape2.p , earg = .eshape2.p , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
+    if (!length(etastart)) {
+      sc.init <-
+      pp.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
+
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape2.p  <- .gshape2.p        
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape2.p ))
+            gshape2.p <-  rep( .ishape2.p , length = NOS)
+          allmat1 <- cbind(shape2.p = gshape2.p)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.invL <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = 1,
+                                         shape2.p = extraargs$Shape2.p,
+                                         shape3.q = 1,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.invL,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape2.p = allmat1[iloc, 1]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          pp.init[, spp.] <- allmat1[ind5, 1]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          ishape2.p <- if (length( .ishape2.p )) .ishape2.p else 1
+          xvec <- log( qvec^(-1/ ishape2.p) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec,  qvec)))
+          sc.init[, spp.] <- if (length( .iscale )) .iscale else
+                             exp(fit0$coef[1])
+          pp.init[, spp.] <- ishape2.p
+      }
+    }  # End of for (spp. ...)
 
 
+      etastart <-
+        cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+              theta2eta(pp.init, .lshape2.p , earg = .eshape2.p ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .iscale    = iscale   ,
+            .gscale    = gscale   ,
+            .lshape2.p = lshape2.p,                        
+            .eshape2.p = eshape2.p,                        
+            .ishape2.p = ishape2.p,
+            .gshape2.p = gshape2.p,
+            .imethod   = imethod   , .probs.y = probs.y
+                       ))), 
+  linkinv = eval(substitute(function(eta, extra = NULL) {
+
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale    , earg = .escale    )
+    parg  <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape2.p , earg = .eshape2.p )
 
     qinv.lomax(p = 0.5, scale = Scale, shape2.p = parg)
-    }, list( .lscale = lscale,
-             .escale = escale, 
-             .eshape2.p = eshape2.p,
-             .lshape2.p = lshape2.p ))),
+  }, list( .lscale    = lscale   ,
+           .escale    = escale   ,
+           .lshape2.p = lshape2.p,
+           .eshape2.p = eshape2.p
+                      ))),
   last = eval(substitute(expression({
-    misc$link <-    c(scale = .lscale , shape2.p = .lshape2.p )
+    M1 <- 2
 
-    misc$earg <- list(scale = .escale , shape2.p = .eshape2.p )
+    misc$link <- c(rep( .lscale    , length = ncoly),
+                   rep( .lshape2.p , length = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- c(scaL.names, sha2.names)
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lscale = lscale,
-            .escale = escale, 
-            .eshape2.p = eshape2.p,
-            .lshape2.p = lshape2.p ))),
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      misc$earg[[M1*ii-1]] <- .escale
+      misc$earg[[M1*ii  ]] <- .eshape2.p
+    }
+
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .lshape2.p = lshape2.p,
+            .eshape2.p = eshape2.p
+                       ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa <- 1
-    Scale <- eta2theta(eta[, 1], .lscale    , earg = .escale    )
-    parg  <- eta2theta(eta[, 2], .lshape2.p , earg = .eshape2.p )
-    qq <- 1
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dinv.lomax(x = y, scale = Scale,
-                                  shape2.p = parg, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 2
+      NOS <- ncol(eta)/M1
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale    )
+      parg  <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape2.p , earg = .eshape2.p )
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
       } else {
-        ll.elts
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = 1,
+                            shape2.p = parg, shape3.q = 1, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
       }
-    }
-  }, list( .lscale = lscale, .lshape2.p = lshape2.p,
-           .escale = escale, .eshape2.p = eshape2.p ))),
+    }, list( .lscale    = lscale   ,
+             .escale    = escale   ,
+             .lshape2.p = lshape2.p,
+             .eshape2.p = eshape2.p
+                        ))),
   vfamily = c("inv.lomax"),
 
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -4210,139 +5414,267 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
     eta <- predict(object)
-    Scale <- eta2theta(eta[, 1], .lscale    , earg = .escale    )
-    parg  <- eta2theta(eta[, 2], .lshape2.p , earg = .eshape2.p )
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale    , earg = .escale    )
+    parg  <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape2.p , earg = .eshape2.p )
+    aa   <- 1
+    qq   <- 1    
     rinv.lomax(nsim * length(Scale), scale = Scale, shape2.p = parg)
-  }, list( .lscale = lscale, .lshape2.p = lshape2.p,
-           .escale = escale, .eshape2.p = eshape2.p ))),
+  }, list(  .lscale    = lscale   ,
+            .escale    = escale   ,
+            .lshape2.p = lshape2.p,
+            .eshape2.p = eshape2.p
+                       ))),
 
 
   deriv = eval(substitute(expression({
-    aa <- qq <- 1 
-    scale <- eta2theta(eta[, 1], .lscale    , earg = .escale    )
-    parg  <- eta2theta(eta[, 2], .lshape2.p , earg = .eshape2.p )
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
-
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
-    dl.dp <- aa * temp1 + digamma(parg + qq) - digamma(parg) - log1p(temp2)
-
-    dscale.deta <- dtheta.deta(scale, .lscale ,    earg = .escale )
+    M1 <- 2
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                       .lscale    , earg = .escale    )
+    parg  <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                       .lshape2.p , earg = .eshape2.p )
+    qq   <- 1
+    aa   <- 1
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
+    temp3a <- digamma(parg)
+    temp3b <- digamma(qq)
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
+    dl.dp <- aa * temp1 + temp3 - temp3a - temp4
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
     dp.deta     <- dtheta.deta(parg,  .lshape2.p , earg = .eshape2.p )
-
-    c(w) * cbind( dl.dscale * dscale.deta,
-                  dl.dp     * dp.deta )
-  }), list( .lscale = lscale, .lshape2.p = lshape2.p,
-            .escale = escale, .eshape2.p = eshape2.p ))),
+    
+    myderiv <-
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.dp * dp.deta)
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   ,
+             .escale    = escale   ,
+             .lshape2.p = lshape2.p,
+             .eshape2.p = eshape2.p
+                        ))),
   weight = eval(substitute(expression({
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1 + parg + qq))
-    ned2l.dp <- 1 / parg^2 
-    ned2l.dscalep <-  aa * qq   / (scale * (parg + qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M == 2 means 3=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dp * dp.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dscalep * dscale.deta * dp.deta
-    wz <- c(w) * wz
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
+
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dp      <- temp5a - temp5
+    ned2l.dscalep <-  aa * qq   / (Scale*(parg+qq))
+    wz <-
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dp * dp.deta^2,
+              c(w) * ned2l.dscalep * dscale.deta * dp.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lscale = lscale, .lshape2.p = lshape2.p,
-            .escale = escale, .eshape2.p = eshape2.p ))))
+  }), list( .lscale    = lscale   ,
+            .escale    = escale   ,
+            .lshape2.p = lshape2.p,
+            .eshape2.p = eshape2.p
+                       ))))
 }
 
 
- paralogistic <- function(lss,
-                          lshape1.a = "loge",
-                          lscale = "loge",
-                          ishape1.a = 2,
-                          iscale = NULL,
-                          zero = NULL) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
 
+ paralogistic <-
+              function(lscale    = "loge",
+                       lshape1.a = "loge",
+                       iscale    = NULL,
+                       ishape1.a = NULL,
+                       imethod   = 1,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = ifelse(lss, -2, -1)) {
+
+
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
 
 
-  lshape1.a <- as.list(substitute(lshape1.a))
-  eshape1.a <- link2list(lshape1.a)
-  lshape1.a <- attr(eshape1.a, "function.name")
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
 
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
   lscale <- as.list(substitute(lscale))
   escale <- link2list(lscale)
   lscale <- attr(escale, "function.name")
-
-
+  
+  lshape1.a <- as.list(substitute(lshape1.a))
+  eshape1.a <- link2list(lshape1.a)
+  lshape1.a <- attr(eshape1.a, "function.name")
+  
 
   new("vglmff",
-  blurb = c("Paralogistic distribution\n\n",
-          "Links:    ",
-          namesof("shape1.a", lshape1.a, earg = eshape1.a), ", ", 
-          namesof("scale",    lscale,    earg = escale), "\n", 
-          "Mean:     scale * gamma(1 + 1/shape1.a) * ",
-                    "gamma(shape1.a - 1/shape1.a) / gamma(shape1.a)"),
+  blurb = 
+    c("Paralogistic distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), "\n",
+      "Mean:     scale * gamma(1 + 1/shape1.a) * ",
+                "gamma(shape1.a - 1/shape1.a) / ",
+                "gamma(shape1.a)"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale = .lscale ,       lshape1.a = .lshape1.a ,
+         escale = .escale ,       eshape1.a = .eshape1.a ,
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 2  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
 
     predictors.names <-
-      c(namesof("shape1.a", .lshape1.a , earg = .eshape1.a , tag = FALSE),
-        namesof("scale",    .lscale ,    earg = .escale ,    tag = FALSE))
-
-    parg <- 1
-
-    if (!length( .ishape1.a) || !length( .iscale )) {
-      qvec <- c( .25, .5, .75)  # Arbitrary; could be made an argument
-      ishape1.a <- if (length( .ishape1.a)) .ishape1.a else 1
-      xvec <- log( (1-qvec)^(-1/ ishape1.a ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
+      if ( .lss ) {
+        c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+          namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+      } else {
+        c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+          namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
+      }
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      aa <- rep(if (length( .ishape1.a)) .ishape1.a else
-                abs(1/fit0$coef[2]),
-                length.out = n)
-      scale <- rep(if (length( .iscale )) .iscale else
-                   exp(fit0$coef[1]),
-                   length.out = n)
-
-
-
-
-      parg   <- 1
-      qq     <- aa
-      outOfRange <- (parg + 1/aa <= 0)
-      parg[outOfRange] <- 1 / aa[outOfRange] + 1
-      outOfRange <- (qq   - 1/aa <= 0)
-      aa[outOfRange] <-
-      qq[outOfRange] <- 2 # Need aa > 1, where aa == qq
+      sc.init <-
+      aa.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
 
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          allmat1 <- expand.grid(shape1.a = gshape1.a)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.para <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = 1,
+                                         shape3.q = extraargs$Shape1.a,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.para,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+       } else {  # .imethod == 2
+          qvec <- .probs.y 
+          ishape3.q <- if (length( .ishape1.a )) .ishape1.a else 1
+          xvec <- log( (1-qvec)^(-1/ ishape3.q) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec,  qvec)))
+          sc.init[, spp.] <- if (length( .iscale )) .iscale else
+                             exp(fit0$coef[1])
+          aa.init[, spp.] <- if (length( .ishape1.a )) .ishape1.a else
+                             1/fit0$coef[2]
+        }
+      }  # End of for (spp. ...)
 
-        etastart <-
-          cbind(theta2eta(aa,    .lshape1.a , earg = .eshape1.a ),
-                theta2eta(scale, .lscale ,    earg = .escale ))
+      finite.mean <- 1 < aa.init * aa.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init * aa.init
       }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale, 
-            .ishape1.a = ishape1.a, .iscale = iscale
-            ))),
+
+      etastart <- if ( .lss )
+              cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                    theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+              cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                    theta2eta(sc.init, .lscale    , earg = .escale    ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
+            .imethod   = imethod  , .probs.y  = probs.y,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg   <- 1
-    qq     <- aa
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
+    parg <- 1
+    qq   <- aa
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
                        lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
@@ -4351,42 +5683,72 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[aa          <= 0] <- NA
     ans[Scale       <= 0] <- NA
     ans
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-    c(shape1.a = .lshape1.a , scale = .lscale )
+    M1 <- 2
+
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names)
+    } else {
+      c(sha1.names, scaL.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$earg <- list(shape1.a = .eshape1.a , scale = .escale )
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly)
+      if ( .lss ) {
+        misc$earg[[M1*ii-1]] <- .escale
+        misc$earg[[M1*ii  ]] <- .eshape1.a
+      } else {
+        misc$earg[[M1*ii-1]] <- .eshape1.a
+        misc$earg[[M1*ii  ]] <- .escale
+      }
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg <- 1
-    qq <- aa
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dparalogistic(x = y, shape1.a = aa,
-                                      scale = Scale, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 2
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+      parg   <- 1
+      qq     <- aa
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = parg, shape3.q = aa, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
   vfamily = c("paralogistic"),
-
-
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -4394,142 +5756,288 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
               pwts else weights(object, type = "prior")
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
+
     eta <- predict(object)
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale  ,   earg = .escale )
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
     rparalogistic(nsim * length(Scale), shape1.a = aa, scale = Scale)
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
-
-
-
-  deriv = eval(substitute(expression({
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lss = lss ))),
+  deriv = eval(substitute(expression({ 
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
     parg <- 1
-    qq <- aa
+    qq   <- aa
 
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
-
-    dl.da     <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
-
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
+    dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
     da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
-    dscale.deta <- dtheta.deta(scale, .lscale ,    earg = .escale )
-
-    c(w) * cbind( dl.da * da.deta,
-               dl.dscale * dscale.deta)
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
   weight = eval(substitute(expression({
-    ned2l.da <- (1 + parg + qq +
-                 parg * qq * (trigamma(parg) + trigamma(qq) +
-                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 - 
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
+
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
+                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
                 (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1+parg+qq))
-    ned2l.dascale <- (parg - qq - parg*qq*(temp3a -temp3b)) /(
-                      scale*(1 + parg+qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #M == 2 means 3=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
-    wz <- c(w) * wz
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dascale <- (parg - qq - parg * qq *
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))))
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+            .lss = lss ))))
 }
 
 
 
- inv.paralogistic <- function(lss,
-                              lshape1.a = "loge", lscale = "loge",
-                              ishape1.a = 2,      iscale = NULL,
-                              zero = NULL) {
 
-  if (!is.logical(lss) || lss)
-    stop("argument 'lss' not specified correctly. ",
-         "See online help for important information")
 
 
+ inv.paralogistic <-
+              function(lscale    = "loge",
+                       lshape1.a = "loge",
+                       iscale    = NULL,
+                       ishape1.a = NULL,
+                       imethod   = 1,
+                       lss       = TRUE,
+                       gscale    = exp(-5:5),
+                       gshape1.a = exp(-5:5),
+                       probs.y   = c(0.25, 0.50, 0.75),
+                       zero      = ifelse(lss, -2, -1)) {
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
 
 
-  lshape1.a <- as.list(substitute(lshape1.a))
-  eshape1.a <- link2list(lshape1.a)
-  lshape1.a <- attr(eshape1.a, "function.name")
 
+  if (length(lss) != 1 && !is.logical(lss))
+    stop("Argument 'lss' not specified correctly")
+  
+  if (!is.Numeric(imethod, length.arg = 1,
+                  integer.valued = TRUE, 
+                  positive = TRUE) || imethod > 2)
+    stop("Bad input for argument 'imethod'")
+  
+  if (length(iscale) && !is.Numeric(iscale, positive = TRUE)) 
+    stop("Bad input for argument 'iscale'")
+  
+  if (length(ishape1.a) && !is.Numeric(ishape1.a, positive = TRUE))
+    stop("Bad input for argument 'ishape1.a'")
+  
+  if (length(probs.y) < 2 || max(probs.y) > 1 || 
+        !is.Numeric(probs.y, positive = TRUE))
+    stop("Bad input for argument 'probs.y'")
+
+  if (length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("bad input for argument 'zero'")
+  
   lscale <- as.list(substitute(lscale))
   escale <- link2list(lscale)
   lscale <- attr(escale, "function.name")
+  
+  lshape1.a <- as.list(substitute(lshape1.a))
+  eshape1.a <- link2list(lshape1.a)
+  lshape1.a <- attr(eshape1.a, "function.name")
+  
 
-
-  new("vglmff",
-  blurb = c("Inverse paralogistic distribution\n\n",
-            "Links:    ",
-            namesof("shape1.a", lshape1.a, earg = eshape1.a), ", ", 
-            namesof("scale",    lscale,    earg = escale), "\n", 
-            "Mean:     scale * gamma(shape1.a + 1/shape1.a) * ",
-                      "gamma(1 - 1/shape1.a)/gamma(shape1.a)"),
+  new("vglmff", 
+  blurb = 
+    c("Inverse paralogistic distribution \n\n",
+      "Links:    ", 
+      ifelse (lss,
+              namesof("scale"   , lscale   , earg = escale),
+              namesof("shape1.a", lshape1.a, earg = eshape1.a)), ", ",
+      ifelse (lss, 
+              namesof("shape1.a", lshape1.a, earg = eshape1.a),
+              namesof("scale"   , lscale   , earg = escale)), "\n",
+      "Mean:     scale * gamma(shape1.a + 1/shape1.a) * ",
+                "gamma(1 - 1/shape1.a) / ",
+                "gamma(shape1.a)"),
   constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y,
-              ncol.w.max = 1, ncol.y.max = 1)
-
-
-    predictors.names <-
-      c(namesof("shape1.a", .lshape1.a ,  earg = .eshape1.a , tag = FALSE),
-        namesof("scale",    .lscale    ,  earg = .escale ,    tag = FALSE))
-
-    if (!length( .ishape1.a) || !length( .iscale )) {
-      qvec <- c( .25, .5, .75)  # Arbitrary; could be made an argument
-      ishape2.p <- if (length( .ishape1.a )) .ishape1.a else 1
-      xvec <- log( qvec^(-1/ ishape2.p ) - 1 )
-      fit0 <- lsfit(x = xvec, y = log(quantile(y, qvec )))
-    }
-
-    qq <- 1
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         expected = TRUE,
+         zero = .zero ,
+         multipleResponses = TRUE,
+         lscale    = .lscale    , lshape1.a = .lshape1.a ,
+         escale    = .escale    , eshape1.a = .eshape1.a ,
+         .zero = zero )
+  }, list( .lscale = lscale      , .lshape1.a = lshape1.a,
+           .escale = escale      , .eshape1.a = eshape1.a,
+           .zero = zero ))),
+  initialize = eval(substitute(expression({ 
+    temp5 <- w.y.check(w = w, y = y, 
+                       Is.positive.y = TRUE, 
+                       ncol.w.max = Inf, 
+                       ncol.y.max = Inf, 
+                       out.wy = TRUE, 
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    y    <- temp5$y
+    w    <- temp5$w
+    M1 <- 2  # Number of parameters for one response
+    NOS  <- ncoly <- ncol(y)
+    M    <- M1*ncol(y)
+
+    scaL.names  <- param.names("scale",     NOS)
+    sha1.names  <- param.names("shape1.a",  NOS)
+
+    predictors.names <- if ( .lss ) {
+        c(namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE),
+          namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE))
+      } else {
+        c(namesof(sha1.names , .lshape1.a , earg = .eshape1.a , tag = FALSE),
+          namesof(scaL.names , .lscale    , earg = .escale    , tag = FALSE))
+      }
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+    
     if (!length(etastart)) {
-      aa <- rep(if (length( .ishape1.a)) .ishape1.a else -1/fit0$coef[2],
-                length = n)
-      scale <- rep(if (length( .iscale )) .iscale else
-                  exp(fit0$coef[1]), length = n)
-
-
-
+      sc.init <-
+      aa.init <- matrix(as.numeric(NA), n, NOS)
+          
+      for (spp. in 1:NOS) {  # For each response 'y_spp.'... do:
+        yvec <- y[, spp.]
+        wvec <- w[, spp.]
 
+        if ( .imethod == 1 ) {
+          gscale     <- .gscale
+          gshape1.a  <- .gshape1.a
+          if (length( .iscale ))
+            gscale    <-  rep( .iscale    , length = NOS)
+          if (length( .ishape1.a ))
+            gshape1.a <-  rep( .ishape1.a , length = NOS)
+          allmat1 <- cbind(shape1.a = gshape1.a)
+          allmat2 <- matrix(as.numeric(NA), nrow(allmat1), 2)
+
+          ll.invp <- function(scaleval, x = x, y = y, w = w, extraargs) { 
+            ans <- sum(c(w) * dgenbetaII(x = y,
+                                         scale    = scaleval,
+                                         shape1.a = extraargs$Shape1.a,
+                                         shape2.p = extraargs$Shape1.a,
+                                         shape3.q = 1,
+                                         log = TRUE))
+            ans
+          }
+
+          for (iloc in 1:nrow(allmat1)) {
+            allmat2[iloc, ] <-
+              grid.search(gscale, objfun = ll.invp,
+                            y = yvec, x = x, w = wvec,
+                            ret.objfun = TRUE,  # 2nd value is the loglik
+                            extraargs = list(Shape1.a = allmat1[iloc, 1]))
+          }
+          ind5 <- which.max(allmat2[, 2])  # 2nd value is the loglik
+          sc.init[, spp.] <- allmat2[ind5, 1]
+          aa.init[, spp.] <- allmat1[ind5, 1]
+        } else {  # .imethod == 2
+          qvec <- .probs.y 
+          ishape2.p <- if (length( .ishape1.a )) .ishape1.a else 1
+          xvec <- log( qvec^(-1/ ishape2.p) - 1 )
+          fit0 <- lsfit(x = xvec, y = log(quantile(yvec,  qvec)))
+          sc.init[, spp.] <- if (length( .iscale )) .iscale else
+                             exp(fit0$coef[1])
+          aa.init[, spp.] <- if (length( .ishape1.a )) .ishape1.a else
+                             -1/fit0$coef[2]
+        }
+      }  # End of for (spp. ...)
 
-    parg <- aa
-    qq <- 1
-      outOfRange <- (parg + 1/aa <= 0)
-      parg[outOfRange] <-
-        aa[outOfRange] <- 2
-      outOfRange <- (qq   - 1/aa <= 0)
-      qq[outOfRange] <- 1 / aa[outOfRange] + 1
 
 
+      finite.mean <- 1 < aa.init
+      COP.use <- 1.15
+      while (FALSE && any(!finite.mean)) {
+        aa.init[!finite.mean] <- 0.1 + aa.init[!finite.mean] * COP.use
+        finite.mean <- 1 < aa.init
+      }
 
-      etastart <-
-        cbind(theta2eta(aa,    .lshape1.a , earg = .eshape1.a ),
-              theta2eta(scale, .lscale ,    earg = .escale ))
-    }
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale,
-            .ishape1.a = ishape1.a, .iscale = iscale ))),
+      etastart <- if ( .lss )
+          cbind(theta2eta(sc.init, .lscale    , earg = .escale    ),
+                theta2eta(aa.init, .lshape1.a , earg = .eshape1.a )) else
+          cbind(theta2eta(aa.init, .lshape1.a , earg = .eshape1.a ),
+                theta2eta(sc.init, .lscale    , earg = .escale    ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1)]
+    }  # End of etastart.
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .iscale    = iscale   , .ishape1.a = ishape1.a,
+            .gscale    = gscale   , .gshape1.a = gshape1.a,           
+            .imethod   = imethod  , .probs.y   = probs.y,
+            .lss = lss ))), 
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    aa     <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale  <- eta2theta(eta[, 2], .lscale    , earg = .escale )
+    M1 <- 2
+    NOS <- ncol(eta)/M1
+    if ( .lss ) {
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale ,  earg = .escale )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a )
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    ,  earg = .escale )
+    }
     parg <- aa
-    qq <- 1
+    qq   <- 1
 
     ans <- Scale * exp(lgamma(parg + 1/aa) +
                        lgamma(qq   - 1/aa) - lgamma(parg) - lgamma(qq))
@@ -4538,42 +6046,72 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     ans[aa          <= 0] <- NA
     ans[Scale       <= 0] <- NA
     ans
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+  }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+           .escale    = escale   , .eshape1.a = eshape1.a,
+           .lss = lss ))),
   last = eval(substitute(expression({
-    misc$link <-    c(shape1.a = .lshape1.a , scale = .lscale )
+    M1 <- 2
 
-    misc$earg <- list(shape1.a = .eshape1.a , scale = .escale )
+    misc$link <- c(rep( if ( .lss ) .lscale else .lshape1.a , len = ncoly),
+                   rep( if ( .lss ) .lshape1.a else .lscale , len = ncoly))[
+                   interleave.VGAM(M, M = M1)]
+    temp.names <- if ( .lss ) {
+      c(scaL.names, sha1.names)
+    } else {
+      c(sha1.names, scaL.names)
+    }
+    names(misc$link) <- temp.names[interleave.VGAM(M, M = M1)]
 
-    misc$expected <- TRUE
-    misc$multipleResponses <- FALSE
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
+    misc$earg <- vector("list", M)
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly)
+      if ( .lss ) {
+        misc$earg[[M1*ii-1]] <- .escale
+        misc$earg[[M1*ii  ]] <- .eshape1.a
+      } else {
+        misc$earg[[M1*ii-1]] <- .eshape1.a
+        misc$earg[[M1*ii  ]] <- .escale
+      }
+
+    misc$expected          <- TRUE
+    misc$multipleResponses <- TRUE
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lss = lss ))), 
   loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg <- aa
-    qq <- 1
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dinv.paralogistic(x = y, shape1.a = aa,
-                                         scale = Scale, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
+    function(mu, y, w, residuals = FALSE, 
+             eta, extra = NULL, summation = TRUE) {
+      M1  <- 2
+      NOS <- ncol(eta)/M1
+      if ( .lss ) {
+        Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lscale    , earg = .escale    )
+        aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
       } else {
-        ll.elts
+        aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                           .lshape1.a , earg = .eshape1.a )
+        Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                           .lscale    , earg = .escale    )
       }
-    }
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
+      parg   <- aa
+      if (residuals) {
+        stop("loglikelihood residuals not implemented yet")
+      } else {
+        ll.elts <-
+          c(w) * dgenbetaII(x = y, scale = Scale, shape1.a = aa,
+                            shape2.p = aa, shape3.q = 1, log = TRUE)
+        if (summation) {
+          sum(ll.elts)
+        } else {
+          ll.elts
+        }
+      }
+    }, list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
   vfamily = c("inv.paralogistic"),
 
-
-
   simslot = eval(substitute(
   function(object, nsim) {
 
@@ -4582,54 +6120,92 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
     if (any(pwts != 1)) 
       warning("ignoring prior weights")
     eta <- predict(object)
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    Scale <- eta2theta(eta[, 2], .lscale  ,   earg = .escale )
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- aa
+    qq   <- 1
     rinv.paralogistic(nsim * length(Scale), shape1.a = aa, scale = Scale)
-  }, list( .lshape1.a = lshape1.a, .lscale = lscale,
-           .eshape1.a = eshape1.a, .escale = escale))),
-
+  }, list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a,
+            .lss = lss ))),
 
 
   deriv = eval(substitute(expression({
-    aa    <- eta2theta(eta[, 1], .lshape1.a , earg = .eshape1.a )
-    scale <- eta2theta(eta[, 2], .lscale ,    earg = .escale )
-    parg <- aa 
-    qq <- 1
-
-    temp1 <- log(y/scale)
-    temp2 <- (y/scale)^aa
+    M1 <- 2
+    NOS <- ncol(eta)/M1  # Needed for summary()
+    if ( .lss ) { 
+      Scale <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lscale    , earg = .escale   )
+      aa    <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+    } else {
+      aa    <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                         .lshape1.a , earg = .eshape1.a)
+      Scale <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                         .lscale    , earg = .escale )
+    }  
+    parg <- aa
+    qq   <- 1
+    temp1 <- log(y/Scale)
+    temp2 <- (y/Scale)^aa
+    temp3 <- digamma(parg + qq)
     temp3a <- digamma(parg)
     temp3b <- digamma(qq)
-
+    temp4 <- log1p(temp2)
+  
+    dl.dscale <- (aa/Scale) * (-parg + (parg+qq) / (1+1/temp2))
     dl.da <- 1/aa + parg * temp1 - (parg+qq) * temp1 / (1+1/temp2)
-    dl.dscale <- (aa/scale) * (-parg + (parg+qq) / (1+1/temp2))
-
+    
+    dscale.deta <- dtheta.deta(Scale, .lscale ,    earg = .escale )
     da.deta     <- dtheta.deta(aa,    .lshape1.a , earg = .eshape1.a )
-    dscale.deta <- dtheta.deta(scale, .lscale ,    earg = .escale )
-
-    c(w) * cbind( dl.da     * da.deta,
-                  dl.dscale * dscale.deta )
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))),
-
+    
+    myderiv <- if ( .lss ) {
+      c(w) * cbind(dl.dscale * dscale.deta,
+                   dl.da * da.deta)
+    } else {
+      c(w) * cbind(dl.da * da.deta,
+                   dl.dscale * dscale.deta)
+    }
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list(  .lscale    = lscale   , .lshape1.a = lshape1.a,
+             .escale    = escale   , .eshape1.a = eshape1.a,
+             .lss = lss ))),
   weight = eval(substitute(expression({
-    ned2l.da <- (1 + parg + qq +
-                 parg * qq * (trigamma(parg) + trigamma(qq) +
-                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 - 
-                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1 + parg + qq))
-    ned2l.dscale <- aa^2 * parg * qq / (scale^2 * (1 + parg + qq))
-    ned2l.dascale <- (parg - qq -
-                      parg * qq * (temp3a -temp3b)) / (scale *
-                     (1 + parg + qq))
-
-    wz <- matrix(as.numeric(NA), n, dimm(M))  # M==3 means 6=dimm(M)
-    wz[, iam(1, 1, M)] <- ned2l.da * da.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dscale * dscale.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dascale * da.deta * dscale.deta
-    wz <- c(w) * wz
+    temp5  <- trigamma(parg + qq)
+    temp5a <- trigamma(parg)
+    temp5b <- trigamma(qq)
+
+    ned2l.da <- (1 + parg + qq + parg * qq * (temp5a + temp5b +
+                (temp3b - temp3a + (parg-qq)/(parg*qq))^2 -
+                (parg^2 + qq^2) / (parg*qq)^2)) / (aa^2 * (1+parg+qq))
+    ned2l.dscale  <- (aa^2) * parg * qq / ((1+parg+qq) * Scale^2)
+    ned2l.dascale <- (parg - qq - parg * qq *
+                     (temp3a -temp3b)) / (Scale*(1 + parg+qq))
+    wz <- if ( .lss ) {
+      array(c(c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    } else {
+      array(c(c(w) * ned2l.da * da.deta^2,
+              c(w) * ned2l.dscale * dscale.deta^2,
+              c(w) * ned2l.dascale * da.deta * dscale.deta),
+            dim = c(n, M/M1, M1*(M1+1)/2))
+    }
+    wz <- arwz2wz(wz, M = M, M1 = M1)
     wz
-  }), list( .lshape1.a = lshape1.a, .lscale = lscale,
-            .eshape1.a = eshape1.a, .escale = escale))))
+  }), list( .lscale    = lscale   , .lshape1.a = lshape1.a,
+            .escale    = escale   , .eshape1.a = eshape1.a, 
+            .lss = lss ))))
 }
 
 
@@ -4640,141 +6216,8 @@ dinv.paralogistic <- function(x, scale = 1, shape1.a, log = FALSE)
 
 
 
- if (FALSE)
- genlognormal <- function(link.sigma = "loge", link.r = "loge",
-                          init.sigma = 1, init.r = 1, zero = NULL) {
-warning("20040402; does not work, possibly because first derivs are ",
-        "not continuous (sign() is used). Certainly, the derivs wrt ",
-        "mymu are problematic (run with maxit=4:9 and look at weight ",
-        "matrices). Possibly fundamentally cannot be estimated by IRLS. ",
-        "Pooling does not seem to help")
-
-
-
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
-
-
-
-  link.sigma <- as.list(substitute(link.sigma))
-  esigma <- link2list(link.sigma)
-  link.sigma <- attr(esigma, "function.name")
-
-  link.r <- as.list(substitute(link.r))
-  er <- link2list(link.r)
-  link.r <- attr(er, "function.name")
-
-
-  new("vglmff",
-  blurb = c("Three-parameter generalized lognormal distribution\n\n",
-          "Links:    ",
-          "loc; ",
-          namesof("sigma", link.sigma, earg = esigma, tag = TRUE), ", ",
-          namesof("r",     link.r,     earg = er,     tag = TRUE)),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
-  }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-    if (ncol(cbind(y)) != 1)
-      stop("response must be a vector or a one-column matrix")
-    predictors.names <-
-    c(namesof("loc", "identitylink", earg = list(),     tag = FALSE),
-      namesof("sigma", .link.sigma, earg = .esigma, tag = FALSE),
-      namesof("r",     .link.r,     earg = .er,     tag = FALSE))
-
-    if (!length( .init.sigma) || !length( .init.r)) {
-      init.r <- if (length( .init.r)) .init.r else 1
-      sigma.init <- (0.5 * sum(abs(log(y) -
-                               mean(log(y )))^init.r))^(1/init.r)
-    }
-    if (any(y <= 0))
-      stop("y must be positive")
-
-    if (!length(etastart)) {
-      sigma.init <- rep(if (length( .init.sigma)) .init.sigma else
-                        sigma.init, length.out = n)
-      r.init <- if (length( .init.r)) .init.r else init.r
-      etastart <-
-        cbind(mu = rep(log(median(y)), length.out = n),
-              sigma = sigma.init,
-              r = r.init)
-    }
-  }), list( .link.sigma = link.sigma, .link.r = link.r,
-            .init.sigma = init.sigma, .init.r = init.r ))),
 
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    mymu  <- eta2theta(eta[, 1], "identitylink" , earg = list())
-    sigma <- eta2theta(eta[, 2], .link.sigma , earg = .esigma )
-    rrrrr <- eta2theta(eta[, 3], .link.r , earg = .er )
-    rrrrr
-  }, list( .link.sigma = link.sigma, .link.r = link.r ))),
 
-  last = eval(substitute(expression({
-    misc$link = c(loc = "identitylink",
-                  "sigma" = .link.sigma,
-                  r = .link.r )
-    misc$expected = TRUE
-  }), list( .link.sigma = link.sigma, .link.r = link.r ))),
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    mymu <- eta2theta(eta[, 1], "identitylink", earg = list())
-    sigma <- eta2theta(eta[, 2], .link.sigma, earg = .esigma)
-    r <- eta2theta(eta[, 3], .link.r, earg = .er)
-    temp89 <- (abs(log(y)-mymu)/sigma)^r
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * (-log(r^(1/r) * sigma) - lgamma(1+1/r) - temp89/r)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .link.sigma = link.sigma, .link.r = link.r ))),
-  vfamily = c("genlognormal3"),
-  deriv = eval(substitute(expression({
-    mymu  <- eta2theta(eta[, 1], "identitylink", earg = list())
-    sigma <- eta2theta(eta[, 2], .link.sigma, earg = .esigma)
-
-    r <- eta2theta(eta[, 3], .link.r, earg = .er)
-    ss <- 1 + 1/r
-    temp33 <- (abs(log(y)-mymu)/sigma)
-    temp33r1 <- temp33^(r-1)
-
-    dl.dmymu <- temp33r1 * sign(log(y)-mymu) / sigma
-    dl.dsigma <- (temp33*temp33r1 - 1) / sigma
-    dl.dr <- (log(r) - 1 + digamma(ss) + temp33*temp33r1)/r^2 -
-             temp33r1 * log(temp33r1) / r
-
-    dmymu.deta <- dtheta.deta(mymu, "identitylink", earg = list())
-    dsigma.deta <- dtheta.deta(sigma, .link.sigma, earg = .esigma)
-    dr.deta <- dtheta.deta(r, .link.r, earg = .er)
-
-    c(w) * cbind(dl.dmymu * dmymu.deta, 
-                 dl.dsigma * dsigma.deta, 
-                 dl.dr * dr.deta)
-  }), list( .link.sigma = link.sigma, .link.r = link.r ))),
-  weight = expression({
-    wz <- matrix(0, n, 6)  # 5 will have small savings of 1 column
-
-    B <- log(r) + digamma(ss)
-    ned2l.dmymu2 <- (r-1) * gamma(1-1/r) / (sigma^2 * r^(2/r) * gamma(ss))
-    ned2l.dsigma2 <- r / sigma^2
-    ned2l.dr2 <- (ss * trigamma(ss) + B^2 - 1) / r^3 
-    ned2l.dsigmar <- -B / (r * sigma)
-
-    wz[, iam(1, 1, M)] <- ned2l.dmymu2 * dmymu.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dsigma2 * dsigma.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dr2 * dr.deta^2
-    wz[, iam(2, 3, M)] <- ned2l.dsigmar * dsigma.deta * dr.deta
-    wz = c(w) * wz
-    wz
-  }))
-}
 
 
 
diff --git a/R/family.basics.R b/R/family.basics.R
index ab6a683..c11a563 100644
--- a/R/family.basics.R
+++ b/R/family.basics.R
@@ -177,6 +177,8 @@ subsetc <-
 
 
 
+
+
  grid.search <- function(vov, objfun, y, x, w, extraargs = NULL,
                          maximize = TRUE, abs.arg = FALSE,
                          ret.objfun = FALSE, ...) {
@@ -197,7 +199,10 @@ subsetc <-
     stop("something has gone wrong!")
   ans <- if (length(try.this) == 1)
     try.this else sample(try.this, size = 1)
-  if (ret.objfun) c(ans, objvals[ans == vov]) else ans
+
+
+  myvec <- objvals[ans == vov]  # Could be a vector
+  if (ret.objfun) c(ans, myvec[1]) else ans
 }
 
 
@@ -1403,6 +1408,15 @@ arwz2wz <- function(arwz, M = 1, M1 = 1) {
 
 
 
+
+param.names <- function(string, S) {
+  if (S == 1) string else paste(string, 1:S, sep = "")
+}
+
+
+
+
+
 vweighted.mean.default <- function (x, w, ..., na.rm = FALSE) {
   temp5 <- w.y.check(w = w, y = x, ncol.w.max = Inf, ncol.y.max = Inf,
                      out.wy = TRUE,
diff --git a/R/family.binomial.R b/R/family.binomial.R
index 9ee26c7..0ae3887 100644
--- a/R/family.binomial.R
+++ b/R/family.binomial.R
@@ -2402,184 +2402,6 @@ betabinomialff.control <- function(save.weights = TRUE, ...) {
 
 
 
-
-
-if (FALSE)
- lusted68 <- function(lrhopos = "loge", lrhoneg = "loge",
-                      erhopos = list(), erhoneg = list(),
-                      irhopos = NULL,   irhoneg = NULL,
-                      iprob1  = NULL,   iprob2  = NULL,
-                      zero = NULL) {
-
-  lrhopos <- as.list(substitute(lrhopos))
-  erhopos <- link2list(lrhopos)
-  lrhopos <- attr(erhopos, "function.name")
-
-  lrhoneg <- as.list(substitute(lrhoneg))
-  erhoneg <- link2list(lrhoneg)
-  lrhoneg <- attr(erhoneg, "function.name")
-
-
-  new("vglmff",
-  blurb = c("Lusted (1968)'s model\n",
-            "Links:    ",
-            namesof("rhopos", lrhopos, earg = erhopos), ", ",
-            namesof("rhoneg", lrhoneg, earg = erhoneg)),
-    initialize = eval(substitute(expression({
-        eval(process.binomial2.data.VGAM)
-
-
-
-
-    predictors.names <- c(
-         namesof("rhopos", .lrhopos, earg = .erhopos, short = TRUE),
-         namesof("rhoneg", .lrhoneg, earg = .erhoneg, short = TRUE))
-
-
-    if (!length(etastart)) {
-      nnn1 <- round(w * (y[, 1] + y[, 2]))
-      nnn2 <- round(w * (y[, 3] + y[, 4]))
-      init.pee1 <- if (length( .iprob1 )) rep( .iprob1 , len = n) else
-                   mu[, 1] / (mu[, 1] + mu[, 2])
-      init.pee2 <- if (length( .iprob2 )) rep( .iprob2 , len = n) else
-                   mu[, 3] / (mu[, 3] + mu[, 4])
-      init.rhopos <- pmax(1.1, init.pee1 / init.pee2)  # Should be > 1
-      init.rhoneg <- pmin(0.4, (1 - init.pee1) / (1 - init.pee2))  # c. 0
-
-      if (length( .irhopos)) init.rhopos <- rep( .irhopos , len = n)
-      if (length( .irhoneg)) init.rhoneg <- rep( .irhoneg , len = n)
-      etastart <- cbind(theta2eta(init.rhopos, .lrhopos, earg = .erhopos),
-                        theta2eta(init.rhoneg, .lrhoneg, earg = .erhoneg))
-    }
-  }), list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-            .erhopos = erhopos, .erhoneg = erhoneg,
-            .iprob1  = iprob1,  .iprob2  = iprob2,
-            .irhopos = irhopos, .irhoneg = irhoneg ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    rhopos <- eta2theta(eta[, 1], .lrhopos, earg = .erhopos)
-    rhoneg <- eta2theta(eta[, 2], .lrhoneg, earg = .erhoneg)
-    pee2 <- (1 - rhoneg) / (rhopos - rhoneg)
-    pee1 <- pee2 * rhopos
-    cbind(rhopos, rhoneg, "mu1" = pee1, "mu2" = pee2)
-  }, list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-           .erhopos = erhopos, .erhoneg = erhoneg ))),
-  last = eval(substitute(expression({
-    misc$link <-    c("rhopos" = .lrhopos, "rhoneg" = .lrhoneg )
-
-    misc$earg <- list("rhopos" = .erhopos, "rhoneg" = .erhoneg )
-
-    misc$expected <- TRUE
-  }), list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-            .erhopos = erhopos, .erhoneg = erhoneg,
-            .irhopos = irhopos, .irhoneg = irhoneg ))),
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    rhopos <- eta2theta(eta[, 1], .lrhopos, earg = .erhopos)
-    rhoneg <- eta2theta(eta[, 2], .lrhoneg, earg = .erhoneg)
-    pee2 <- (1 - rhoneg) / (rhopos - rhoneg)
-    pee1 <- pee2 * rhopos
-    if (min(pee1) <= 0.5) {
-      warning("too small pee1 values")
-      pee1[pee1 <= 0.5] <- 0.66
-    }
-    if (max(pee1) >= 1) {
-      warning("large pee1 values")
-      pee1[pee1 >= 1] <- 0.99
-    }
-    if (min(pee2) <= 0.0) {
-      warning("too small pee2 values")
-      pee2[pee2 <= 0.0] <- 0.01
-    }
-    if (max(pee2) >= 0.5) {
-      warning("too large pee2 values")
-      pee2[pee2 >= 0.5] <- 0.44
-    }
-
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-    nnn1 <- round(w * (y[, 1] + y[, 2]))
-    nnn2 <- round(w * (y[, 3] + y[, 4]))
-    index1 <- nnn1 > 0
-    index2 <- nnn2 > 0
-
- print(head(dbinom(round(w[index1] * y[index1, 1]), nnn1[index1],
-                 prob = pee1[index1], log = TRUE), 18))
-
-
-      ll.elts <-
-         (dbinom(round(w[index1] * y[index1, 1]), nnn1[index1],
-                 prob = pee1[index1], log = TRUE)) +
-         (dbinom(round(w[index2] * y[index2, 3]), nnn2[index2],
-                 prob = pee2[index2], log = TRUE))
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-           .erhopos = erhopos, .erhoneg = erhoneg,
-           .irhopos = irhopos, .irhoneg = irhoneg ))),
-  vfamily = c("lusted68", "binom2"),
-  deriv = eval(substitute(expression({
-    rhopos <- eta2theta(eta[, 1], .lrhopos, earg = .erhopos)
-    rhoneg <- eta2theta(eta[, 2], .lrhoneg, earg = .erhoneg)
-    pee2 <- (1 - rhoneg) / (rhopos - rhoneg)
-    pee1 <- pee2 * rhopos
-    nnn1 <- round(w * (y[, 1] + y[, 3]))
-    nnn2 <- round(w * (y[, 2] + y[, 4]))
-
-    rhodif <- rhopos - rhoneg
-    drhopos.deta <- dtheta.deta(rhopos, .lrhopos, earg = .erhopos)
-    drhoneg.deta <- dtheta.deta(rhoneg, .lrhoneg, earg = .erhoneg)
-
-    dl1.drhopos <-  y[, 1] /  rhopos + y[, 2] / (rhopos - 1) - 1 / rhodif
-    dl1.drhoneg <- -y[, 1] / (1 - rhoneg) + y[, 2] / rhoneg  + 1 / rhodif
-    dl2.drhopos <-  y[, 4] / (rhopos - 1) - 1 / rhodif
-    dl2.drhoneg <- -y[, 3] / (1 - rhoneg) + 1 / rhodif
-    cbind((nnn1 * dl1.drhopos + nnn2 * dl2.drhopos) * drhopos.deta,
-          (nnn1 * dl1.drhoneg + nnn2 * dl2.drhoneg) * drhoneg.deta)
-  }), list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-            .erhopos = erhopos, .erhoneg = erhoneg,
-            .irhopos = irhopos, .irhoneg = irhoneg ))),
-  weight = eval(substitute(expression({
-    wz <- matrix(0, n, dimm(M))  # 3 = dimm(2)
-
-
-    wz[, iam(1, 1, M)] <- wz[, iam(1, 1, M)] + nnn1 *
-       (pee1 / rhopos^2 + (1 - pee1) / (rhopos - 1)^2 - 1 / rhodif^2)
-
-    wz[, iam(2, 2, M)] <- wz[, iam(2, 2, M)] + nnn1 *
-       (pee1 / (1 - rhoneg)^2 + (1 - pee1) / rhoneg^2 - 1 / rhodif^2)
-
-    wz[, iam(1, 2, M)] <- wz[, iam(1, 2, M)] + nnn1 / rhodif^2
-
-    wz[, iam(1, 1, M)] <- wz[, iam(1, 1, M)] + nnn2 *
-       ((1 - pee2) / (rhopos - 1)^2 - 1 / rhodif^2)
-
-    wz[, iam(2, 2, M)] <- wz[, iam(2, 2, M)] + nnn2 *
-       (pee2 / (1 - rhoneg)^2 - 1 / rhodif^2)
-
-    wz[, iam(1, 2, M)] <- wz[, iam(1, 2, M)] + nnn2 / rhodif^2
-
-    wz[, iam(1, 1, M)] <- wz[, iam(1, 1, M)] * drhopos.deta^2
-    wz[, iam(2, 2, M)] <- wz[, iam(2, 2, M)] * drhoneg.deta^2
-    wz[, iam(1, 2, M)] <- wz[, iam(1, 2, M)] * drhopos.deta * drhoneg.deta
-
-    wz
-  }), list( .lrhopos = lrhopos, .lrhoneg = lrhoneg,
-            .erhopos = erhopos, .erhoneg = erhoneg,
-            .irhopos = irhopos, .irhoneg = irhoneg ))))
-}
-
-
-
-
-
-
-
  binom2.Rho <- function(rho = 0, imu1 = NULL, imu2 = NULL, 
                         exchangeable = FALSE, nsimEIM = NULL) {
   lmu12 <- "probit"
diff --git a/R/family.categorical.R b/R/family.categorical.R
index 1f3d22e..2eb10e4 100644
--- a/R/family.categorical.R
+++ b/R/family.categorical.R
@@ -118,7 +118,6 @@ Deviance.categorical.data.vgam <-
     smu <- ifelse(smu < double.eps, double.eps, smu)
 
 
-
     devmu[smallmu] <- smy * log(smu)
   }
   devmu[!smallmu] <- y[!smallmu] * log(mu[!smallmu])
@@ -146,7 +145,7 @@ Deviance.categorical.data.vgam <-
 
 
 dmultinomial <- function(x, size = NULL, prob, log = FALSE,
-                        dochecking = TRUE, smallno = 1.0e-7) {
+                         dochecking = TRUE, smallno = 1.0e-7) {
   if (!is.logical(log.arg <- log) || length(log) != 1)
     stop("bad input for argument 'log'")
   rm(log)
@@ -189,8 +188,8 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
 
 
  sratio <- function(link = "logit",
-                   parallel = FALSE, reverse = FALSE, zero = NULL,
-                   whitespace = FALSE) {
+                    parallel = FALSE, reverse = FALSE, zero = NULL,
+                    whitespace = FALSE) {
   link <- as.list(substitute(link))
   earg  <- link2list(link)
   link <- attr(earg, "function.name")
@@ -208,13 +207,30 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
   blurb = c("Stopping ratio model\n\n",
             "Links:    ",
             namesof(if (reverse)
-            ifelse(whitespace, "P[Y = j+1|Y <= j+1]", "P[Y=j+1|Y<=j+1]") else
-            ifelse(whitespace, "P[Y = j|Y >= j]",     "P[Y=j|Y>=j]"),
+            ifelse(whitespace, "P[Y = j+1|Y <= j+1]",
+                               "P[Y=j+1|Y<=j+1]") else
+            ifelse(whitespace, "P[Y = j|Y >= j]",
+                               "P[Y=j|Y>=j]"),
                    link, earg = earg), "\n",
             "Variance: ",
             ifelse(whitespace,
                    "mu[,j] * (1 - mu[,j]); -mu[,j] * mu[,k]",
                    "mu[,j]*(1-mu[,j]); -mu[,j]*mu[,k]")),
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = FALSE,
+         parallel = .parallel ,
+         reverse = .reverse ,
+         whitespace = .whitespace ,
+         zero = .zero ,
+         link = .link )
+  }, list( .link = link,
+           .zero = zero,
+           .parallel = parallel,
+           .reverse = reverse,
+           .whitespace = whitespace ))),
+
   constraints = eval(substitute(expression({
     constraints <- cm.VGAM(matrix(1, M, 1), x = x,
                            bool = .parallel ,
@@ -286,7 +302,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
     misc$fillerChar <- .fillerChar
     misc$whitespace <- .whitespace
 
-    extra <- list()   # kill what was used 
+    extra <- list()  # kill what was used 
   }), list( .earg = earg, .link = link, .reverse = reverse,
             .fillerChar = fillerChar,
             .whitespace = whitespace ))),
@@ -309,7 +325,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
       stop("loglikelihood residuals not implemented yet")
     } else {
       ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                 y * w # Convert proportions to counts
+                 y * w  # Convert proportions to counts
       nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
               round(w)
 
@@ -385,8 +401,10 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
   blurb = c("Continuation ratio model\n\n", 
             "Links:    ",
             namesof(if (reverse)
-            ifelse(whitespace, "P[Y < j+1|Y <= j+1]", "P[Y<j+1|Y<=j+1]") else
-            ifelse(whitespace, "P[Y > j|Y >= j]",     "P[Y>j|Y>=j]"),
+            ifelse(whitespace, "P[Y < j+1|Y <= j+1]",
+                               "P[Y<j+1|Y<=j+1]") else
+            ifelse(whitespace, "P[Y > j|Y >= j]",
+                               "P[Y>j|Y>=j]"),
                    link, earg = earg),
             "\n",
             "Variance: ",
@@ -394,12 +412,30 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
                    "mu[,j] * (1 - mu[,j]); -mu[,j] * mu[,k]",
                    "mu[,j]*(1-mu[,j]); -mu[,j]*mu[,k]")),
 
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = FALSE,
+         parallel = .parallel ,
+         reverse = .reverse ,
+         whitespace = .whitespace ,
+         zero = .zero ,
+         link = .link )
+  }, list( .link = link,
+           .zero = zero,
+           .parallel = parallel,
+           .reverse = reverse,
+           .whitespace = whitespace ))),
+
+
   constraints = eval(substitute(expression({
     constraints <- cm.VGAM(matrix(1, M, 1), x = x,
                            bool = .parallel ,
                            constraints = constraints)
     constraints <- cm.zero.VGAM(constraints, x, .zero , M)
   }), list( .parallel = parallel, .zero = zero ))),
+
+
   deviance = Deviance.categorical.data.vgam,
 
   initialize = eval(substitute(expression({
@@ -423,8 +459,8 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
     y.names <- paste("mu", 1:(M+1), sep = "")
 
     extra$mymat <- if ( .reverse )
-                    tapplymat1(y, "cumsum") else
-                    tapplymat1(y[, ncol(y):1], "cumsum")[, ncol(y):1]
+                   tapplymat1(y, "cumsum") else
+                   tapplymat1(y[, ncol(y):1], "cumsum")[, ncol(y):1]
 
     if (length(dimnames(y)))
       extra$dimnamesy2 <- dimnames(y)[[2]]
@@ -489,7 +525,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
       stop("loglikelihood residuals not implemented yet")
     } else {
       ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                 y * w # Convert proportions to counts
+                 y * w  # Convert proportions to counts
       nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
               round(w)
 
@@ -551,7 +587,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
   function(maxit = 21, panic = FALSE, ...) {
   if (maxit < 1) {
       warning("bad value of maxit; using 21 instead")
-      maxit = 21
+      maxit <- 21
   }
   list(maxit = maxit, panic = as.logical(panic)[1])
 }
@@ -562,7 +598,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
            criterion = c("aic1", "aic2", names( .min.criterion.VGAM )),
            ...) {
   if (mode(criterion) != "character" && mode(criterion) != "name")
-      criterion <- as.character(substitute(criterion))
+    criterion <- as.character(substitute(criterion))
   criterion <- match.arg(criterion,
       c("aic1", "aic2", names( .min.criterion.VGAM )))[1]
 
@@ -679,6 +715,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
     list(parallel = .parallel ,
          refLevel = .refLevel ,
          M1 = -1,
+         link = "multilogit",
          multipleResponses = FALSE,
          zero = .zero )
   }, list( .zero = zero,
@@ -763,7 +800,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
       stop("loglikelihood residuals not implemented yet")
     } else {
       ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                 y * w # Convert proportions to counts
+                 y * w  # Convert proportions to counts
       nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
               round(w)
 
@@ -794,7 +831,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
   }), list( .refLevel = refLevel ))),
   weight = eval(substitute(expression({
     mytiny <- (mu < sqrt(.Machine$double.eps)) | 
-             (mu > 1.0 - sqrt(.Machine$double.eps))
+              (mu > 1.0 - sqrt(.Machine$double.eps))
 
     use.refLevel <- if ( .refLevel < 0) M+1 else .refLevel
 
@@ -811,11 +848,11 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
         wz[, 1:M] <- wz[, 1:M] + mu[, -use.refLevel ]
     }
 
-    atiny <- (mytiny %*% rep(1, ncol(mu))) > 0 # apply(mytiny, 1, any)
+    atiny <- (mytiny %*% rep(1, ncol(mu))) > 0  # apply(mytiny, 1, any)
     if (any(atiny)) {
       if (M == 1) wz[atiny] <- wz[atiny] *
-                              (1 + .Machine$double.eps^0.5) +
-                              .Machine$double.eps else
+                               (1 + .Machine$double.eps^0.5) +
+                               .Machine$double.eps else
       wz[atiny, 1:M] <- wz[atiny, 1:M] * (1 + .Machine$double.eps^0.5) +
                        .Machine$double.eps
     }
@@ -869,16 +906,30 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
                   ifelse(whitespace, "P[Y >= j+1]", "P[Y>=j+1]") else
                   ifelse(whitespace, "P[Y <= j]",   "P[Y<=j]"),
                   link, earg = earg)),
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = .multiple.responses ,
+         parallel = .parallel ,
+         reverse = .reverse ,
+         whitespace = .whitespace ,
+         link =  .link )
+  }, list( .link = link,
+           .parallel = parallel,
+           .multiple.responses = multiple.responses,
+           .reverse = reverse,
+           .whitespace = whitespace ))),
+
   constraints = eval(substitute(expression({
     if ( .multiple.responses ) {
       if ( !length(constraints) ) {
-          Llevels <- extra$Llevels
-          NOS <- extra$NOS
-          Hk.matrix <- kronecker(diag(NOS), matrix(1,Llevels-1,1))
-          constraints <- cm.VGAM(Hk.matrix, x = x,
-                                 bool = .parallel ,
-                                 apply.int = .apply.parint ,
-                                 constraints = constraints)
+        Llevels <- extra$Llevels
+        NOS <- extra$NOS
+        Hk.matrix <- kronecker(diag(NOS), matrix(1,Llevels-1,1))
+        constraints <- cm.VGAM(Hk.matrix, x = x,
+                               bool = .parallel ,
+                               apply.int = .apply.parint ,
+                               constraints = constraints)
       }
     } else {
       constraints <- cm.VGAM(matrix(1, M, 1), x = x,
@@ -899,7 +950,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
       Llevels <- extra$Llevels
       for (iii in 1:NOS) {
         cindex <- (iii-1)*(Llevels-1) + 1:(Llevels-1)
-        aindex <- (iii-1)*(Llevels) + 1:(Llevels)
+        aindex <- (iii-1)*(Llevels  ) + 1:(Llevels)
         totdev <- totdev +
                   Deviance.categorical.data.vgam(
                     mu = mu[, aindex, drop = FALSE],
@@ -1216,7 +1267,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
 
 
  acat <- function(link = "loge", parallel = FALSE,
-                 reverse = FALSE, zero = NULL, whitespace = FALSE) {
+                  reverse = FALSE, zero = NULL, whitespace = FALSE) {
 
 
   link <- as.list(substitute(link))
@@ -1243,6 +1294,21 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
             ifelse(whitespace,
             "mu[,j] * (1 - mu[,j]); -mu[,j] * mu[,k]",
             "mu[,j]*(1-mu[,j]); -mu[,j]*mu[,k]")),
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = FALSE,
+         parallel = .parallel ,
+         reverse = .reverse ,
+         whitespace = .whitespace ,
+         zero = .zero ,
+         link = .link )
+  }, list( .link = link,
+           .zero = zero,
+           .parallel = parallel,
+           .reverse = reverse,
+           .whitespace = whitespace ))),
+
   constraints = eval(substitute(expression({
     constraints <- cm.VGAM(matrix(1, M, 1), x = x,
                            bool = .parallel ,
@@ -1329,7 +1395,7 @@ dmultinomial <- function(x, size = NULL, prob, log = FALSE,
       stop("loglikelihood residuals not implemented yet")
     } else {
       ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                 y * w # Convert proportions to counts
+                 y * w  # Convert proportions to counts
       nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
               round(w)
 
@@ -1420,11 +1486,13 @@ acat.deriv <- function(zeta, reverse, M, n) {
 
 
 
+
+
  brat <- function(refgp = "last",
                   refvalue = 1,
-                  init.alpha = 1) {
-  if (!is.Numeric(init.alpha, positive = TRUE))
-    stop("'init.alpha' must contain positive values only")
+                  ialpha = 1) {
+  if (!is.Numeric(ialpha, positive = TRUE))
+    stop("'ialpha' must contain positive values only")
   if (!is.Numeric(refvalue, length.arg = 1, positive = TRUE))
     stop("'refvalue' must be a single positive value")
 
@@ -1438,6 +1506,17 @@ acat.deriv <- function(zeta, reverse, M, n) {
   blurb = c(paste("Bradley-Terry model (without ties)\n\n"), 
             "Links:   ",
             namesof("alpha's", "loge")),
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = FALSE,
+         refvalue = .refvalue ,
+         refgp = .refgp ,
+         ialpha = .ialpha )
+  }, list( .ialpha = ialpha,
+           .refgp = refgp,
+           .refvalue = refvalue ))),
+
   initialize = eval(substitute(expression({
     are.ties <- attr(y, "are.ties")  # If Brat() was used
     if (is.logical(are.ties) && are.ties)
@@ -1447,10 +1526,10 @@ acat.deriv <- function(zeta, reverse, M, n) {
     M <- (1:length(try.index))[(try.index+1)*(try.index) == ncol(y)]
     if (!is.finite(M))
       stop("cannot determine 'M'")
-    init.alpha <- matrix(rep( .init.alpha , length.out = M),
-                         n, M, byrow = TRUE)
-    etastart <- matrix(theta2eta(init.alpha, "loge",
-                                earg = list(theta = NULL)),
+    ialpha <- matrix(rep( .ialpha , length.out = M),
+                     n, M, byrow = TRUE)
+    etastart <- matrix(theta2eta(ialpha, "loge",
+                                 earg = list(theta = NULL)),
                        n, M, byrow = TRUE)
     refgp <- .refgp
     if (!intercept.only)
@@ -1461,7 +1540,7 @@ acat.deriv <- function(zeta, reverse, M, n) {
     predictors.names <-
       namesof(paste("alpha", uindex, sep = ""), "loge", short = TRUE)
 
-  }), list( .refgp = refgp, .init.alpha = init.alpha ))),
+  }), list( .refgp = refgp, .ialpha = ialpha ))),
 
   linkinv = eval(substitute( function(eta, extra = NULL) {
     probs <- NULL
@@ -1572,12 +1651,12 @@ acat.deriv <- function(zeta, reverse, M, n) {
 
  bratt <- function(refgp = "last",
                    refvalue = 1,
-                   init.alpha = 1,
+                   ialpha = 1,
                    i0 = 0.01) {
   if (!is.Numeric(i0, length.arg = 1, positive = TRUE))
     stop("'i0' must be a single positive value")
-  if (!is.Numeric(init.alpha, positive = TRUE))
-    stop("'init.alpha' must contain positive values only")
+  if (!is.Numeric(ialpha, positive = TRUE))
+    stop("'ialpha' must contain positive values only")
   if (!is.Numeric(refvalue, length.arg = 1, positive = TRUE))
     stop("'refvalue' must be a single positive value")
 
@@ -1591,12 +1670,25 @@ acat.deriv <- function(zeta, reverse, M, n) {
   blurb = c(paste("Bradley-Terry model (with ties)\n\n"), 
             "Links:   ",
             namesof("alpha's", "loge"), ", log(alpha0)"),
+  infos = eval(substitute(function(...) {
+    list(M1 = NA,  # zz -1?
+         Q1 = NA,
+         multipleResponses = FALSE,
+         refvalue = .refvalue ,
+         refgp = .refgp ,
+         i0 = .i0 ,
+         ialpha = .ialpha )
+  }, list( .ialpha = ialpha,
+           .i0 = i0,
+           .refgp = refgp,
+           .refvalue = refvalue ))),
+
   initialize = eval(substitute(expression({
     try.index <- 1:400
     M <- (1:length(try.index))[(try.index*(try.index-1)) == ncol(y)]
     if (!is.Numeric(M, length.arg = 1, integer.valued = TRUE))
       stop("cannot determine 'M'")
-    NCo <- M # Number of contestants
+    NCo <- M  # Number of contestants
 
     are.ties <- attr(y, "are.ties")  # If Brat() was used
     if (is.logical(are.ties)) {
@@ -1608,10 +1700,10 @@ acat.deriv <- function(zeta, reverse, M, n) {
       ties <- 0 * y
     }
 
-    init.alpha <- rep( .init.alpha, len = NCo-1)
+    ialpha <- rep( .ialpha, len = NCo-1)
     ialpha0 <- .i0
     etastart <-
-      cbind(matrix(theta2eta(init.alpha,
+      cbind(matrix(theta2eta(ialpha,
                              "loge",
                              list(theta = NULL)),
                    n, NCo-1, byrow = TRUE),
@@ -1632,7 +1724,7 @@ acat.deriv <- function(zeta, reverse, M, n) {
       namesof("alpha0", "loge", short = TRUE))
   }), list( .refgp = refgp,
            .i0 = i0,
-           .init.alpha=init.alpha ))),
+           .ialpha = ialpha ))),
 
   linkinv = eval(substitute( function(eta, extra = NULL) {
     probs <- qprobs <- NULL
@@ -1693,7 +1785,7 @@ acat.deriv <- function(zeta, reverse, M, n) {
     for (ii in 1:nrow(eta)) {
       alpha <- .brat.alpha(eta2theta(eta[ii, -M], "loge",
                                      earg = list(theta = NULL)),
-                          .refvalue, .refgp )
+                           .refvalue, .refgp )
       alpha0 <- loge(eta[ii, M], inverse = TRUE)
       ymat <- InverseBrat(   y[ii, ], NCo = M, diag = 0)
       tmat <- InverseBrat(ties[ii, ], NCo = M, diag = 0)
@@ -2124,265 +2216,6 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
 
 
 
- if (FALSE)
- scumulative <- function(link = "logit", earg = list(),
-                         lscale = "loge", escale = list(),
-                         parallel = FALSE, sparallel = TRUE,
-                         reverse = FALSE,
-                         iscale = 1) {
-  stop("sorry, not working yet")
-
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
-
-
-
-  if (!is.Numeric(iscale, positive = TRUE))
-    stop("bad input for argument 'iscale'")
-  if (!is.logical(reverse) || length(reverse) != 1)
-    stop("argument 'reverse' must be a single logical")
-
-    new("vglmff",
-    blurb = c(paste("Scaled cumulative", link, "model\n\n"),
-              "Links:   ",
-              namesof(if (reverse) "P[Y>=j+1]" else "P[Y<=j]",
-                      link, earg = earg),
-              ", ",
-              namesof("scale_j", lscale, escale)),
-    constraints = eval(substitute(expression({
-        J <- M / 2
-        constraints <- cm.VGAM(matrix(1, J, 1), x = x,
-                               bool = .parallel ,
-                               apply.int = FALSE,
-                               constraints = constraints)
-        constraints[["(Intercept)"]] = rbind(constraints[["(Intercept)"]],
-            matrix(0, J, ncol(constraints[["(Intercept)"]])))
-
-        cm2 <- cm.VGAM(matrix(1, J, 1), x = x,
-                           bool = .sparallel ,
-                           apply.int = FALSE,
-                           constraints = NULL)
-
-        for (ii in 2:length(constraints))
-            constraints[[ii]] <-
-                cbind(rbind(constraints[[ii]],
-                            matrix(0, J, ncol(constraints[[ii]]))),
-                      rbind(matrix(0, J, ncol(cm2[[ii]])), cm2[[ii]]))
-
-    for (ii in 1:length(constraints))
-      constraints[[ii]] <-
-        (constraints[[ii]])[interleave.VGAM(M, M = 2),, drop = FALSE]
-  }), list( .parallel = parallel, .sparallel=sparallel ))),
-  deviance = eval(substitute(
-      function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-               summation = TRUE) {
-      answer <-
-        Deviance.categorical.data.vgam(mu = mu,
-                y = y, w = w, residuals = residuals,
-                eta = eta, extra = extra,
-                summation = summation)
-      answer
-  }, list( .earg = earg, .link = link ) )),
-  initialize = eval(substitute(expression({
-
-    if (is.factor(y) && !is.ordered(y))
-      warning("response should be ordinal---see ordered()")
-
-
-    if (intercept.only)
-        stop("use cumulative() for intercept-only models")
-
-
-    delete.zero.colns = TRUE # Cannot have FALSE since then prob(Y=jay)=0
-    eval(process.categorical.data.VGAM)
-
-
-    M <- 2*(ncol(y)-1)
-    J <- M / 2
-    extra$J <- J
-    mynames <- if ( .reverse )
-      paste("P[Y>=", 2:(1+J), "]", sep = "") else
-      paste("P[Y<=", 1:J,     "]", sep = "")
-    predictors.names <- c(
-        namesof(mynames, .link , short = TRUE, earg = .earg ),
-        namesof(paste("scale_", 1:J, sep = ""),
-                .lscale, short = TRUE, earg = .escale ))
-
-
-    y.names <- paste("mu", 1:(J+1), sep = "")
-
-    if (length(dimnames(y)))
-      extra$dimnamesy2 <- dimnames(y)[[2]]
-
-    predictors.names <- predictors.names[interleave.VGAM(M, M = 2)]
-
-  }), list( .link = link, .lscale = lscale, .reverse = reverse,
-            .earg = earg, .escale = escale ))),
-  linkinv = eval(substitute( function(eta, extra = NULL) {
-    J <- extra$J
-    M <- 2*J
-    etamat1 <- eta[, 2*(1:J)-1, drop = FALSE]
-    etamat2 <- eta[, 2*(1:J),  drop = FALSE]
-    scalemat <- eta2theta(etamat2, .lscale, earg = .escale )
-    fv.matrix <-
-    if ( .reverse ) {
-        ccump <- cbind(1,
-                      eta2theta(etamat1 / scalemat,
-                                .link , earg = .earg ))
-        cbind(-tapplymat1(ccump, "diff"), ccump[, ncol(ccump)])
-    } else {
-        cump <- cbind(eta2theta(etamat1 / scalemat,
-                               .link , earg = .earg ),
-                     1)
-        cbind(cump[, 1], tapplymat1(cump, "diff"))
-    }
-    if (length(extra$dimnamesy2))
-      dimnames(fv.matrix) <- list(dimnames(eta)[[1]],
-                                 extra$dimnamesy2)
-    fv.matrix
-  }, list( .link = link, .lscale = lscale, .reverse = reverse,
-           .earg = earg, .escale = escale ))),
-  last = eval(substitute(expression({
-    J <- extra$J
-    misc$link <-
-      c(rep( .link , length = J),
-        rep( .lscale, length = J))[interleave.VGAM(M, M = 2)]
-    names(misc$link) <- predictors.names
-    misc$earg <- vector("list", M)
-    names(misc$earg) <- names(misc$link)
-    for (ii in 1:J) misc$earg[[2*ii-1]] <- .earg
-    for (ii in 1:J) misc$earg[[2*ii  ]] <- .escale
-    misc$parameters <- mynames
-    misc$reverse <- .reverse
-    misc$parallel <- .parallel
-    misc$sparallel <- .sparallel
-  }), list( .link = link, .lscale = lscale,
-            .reverse = reverse, .parallel = parallel,
-            .sparallel = sparallel,
-            .earg = earg, .escale = escale ))),
-  linkfun = eval(substitute( function(mu, extra = NULL) {
-    cump <- tapplymat1(as.matrix(mu), "cumsum")
-    J <- ncol(as.matrix(mu)) - 1
-    M <- 2 * J
-    answer <-  cbind(
-        theta2eta(if ( .reverse ) 1-cump[, 1:J] else cump[, 1:J],
-                  .link ,
-                  earg = .earg ),
-        matrix(theta2eta( .iscale, .lscale , earg = .escale ),
-               nrow(as.matrix(mu)), J, byrow = TRUE))
-    answer <- answer[,interleave.VGAM(M, M = 2)]
-    answer
-  }, list( .link = link, .lscale = lscale, .reverse = reverse,
-           .iscale = iscale, .earg = earg, .escale = escale ))),
-  loglikelihood =
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                y * w # Convert proportions to counts
-      nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
-                round(w)
-
-      smallno <- 1.0e4 * .Machine$double.eps
-      if (max(abs(ycounts - round(ycounts))) > smallno)
-        warning("converting 'ycounts' to integer in @loglikelihood")
-      ycounts <- round(ycounts)
-
-      ll.elts <-
-        (if (is.numeric(extra$orig.w)) extra$orig.w else 1) *
-         dmultinomial(x = ycounts, size = nvec, prob = mu,
-                      log = TRUE, dochecking = FALSE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  },
-  vfamily = c("scumulative", "vcategorical"),
-  deriv = eval(substitute(expression({
-        ooz <- iter %% 2
-
-        J <- extra$J
-        mu.use = pmax(mu, .Machine$double.eps * 1.0e-0)
-
-        etamat1 <- eta[, 2*(1:J)-1, drop = FALSE]
-        etamat2 <- eta[, 2*(1:J)  , drop = FALSE]
-        scalemat <- eta2theta(etamat2, .lscale, earg = .escale )
-
-        cump <- eta2theta(etamat1 / scalemat, .link , earg = .earg )
-        dcump.deta <- dtheta.deta(cump, .link , earg = .earg )
-        dscale.deta <- dtheta.deta(scalemat, .lscale, earg = .escale )
-        dl.dcump <- (if ( .reverse ) -w  else w) * 
-                (y[, 1:J]/mu.use[, 1:J] - y[, -1]/mu.use[, -1])
-        dcump.dscale <- -dcump.deta * etamat1 / scalemat^2
-        ans <- cbind(dl.dcump * dcump.deta / scalemat,
-                    dl.dcump * dcump.dscale * dscale.deta)
-        ans <- ans[,interleave.VGAM(M, M = 2)]
-        if (ooz) ans[, c(TRUE, FALSE)] = 0 else
-                 ans[, c(FALSE, TRUE)] = 0
-        ans
-    }), list( .link = link, .lscale = lscale, .reverse = reverse,
-              .earg = earg, .escale = escale ))),
-    weight = eval(substitute(expression({
-
-        wz <- matrix(0, n, 2*(2*M-3))
-
-        wz[, 2*(1:J)-1] <- if (ooz) c(w) * (dcump.deta / scalemat)^2 *
-                         (1/mu.use[, 1:J] + 1/mu.use[, -1]) else 1
-        wz[, 2*(1:J)] <- if (ooz) 1 else
-                        c(w) * (dcump.dscale * dscale.deta)^2 *
-                       (1/mu.use[, 1:J] + 1/mu.use[, -1])
-        wz0 <- c(w) * (dcump.deta / scalemat) * 
-                  (dcump.dscale * dscale.deta) *
-                  (1/mu.use[, 1:J] + 1/mu.use[, -1])
-        wz0 <- as.matrix(wz0)
-        for (ii in 1:J)
-            wz[,iam(2*ii-1,2*ii,M = M)] <- if (ooz) wz0[, ii] else 0
-
-        if (J > 1) {
-            wz0 <- -c(w) *
-                  (dcump.deta[, -J] / scalemat[, -J]) *
-                  (dcump.deta[, -1]  / scalemat[, -1]) / mu.use[, 2:J]
-            wz0 <- as.matrix(wz0)  # Just in case J=2
-            for (ii in 1:(J-1))
-              wz[, iam(2*ii-1, 2*ii+1, M = M)] <- if (ooz) wz0[, ii] else 0
-            wz0 <- -c(w) * (dcump.dscale[, -1] * dscale.deta[, -1]) *
-                       (dcump.dscale[, -J] *
-                        dscale.deta[, -J]) / mu.use[, 2:J]
-            wz0 <- as.matrix(wz0)
-            for (ii in 1:(J-1))
-                wz[,iam(2*ii,2*ii+2,M = M)] <- if (ooz) wz0[, ii] else 0
-
-
-
-            wz0 <- -c(w) * (dcump.deta[, -J] / scalemat[, -J]) *
-                          (dcump.dscale[, -1] *
-                           dscale.deta[, -1]) / mu.use[, 2:J]
-            wz0 <- as.matrix(wz0)
-            for (ii in 1:(J-1))
-              wz[,iam(2*ii-1,2*ii+2,M = M)] <- if (ooz) wz0[, ii] else 0
-            wz0 <- -c(w) * (dcump.deta[, -1] / scalemat[, -1]) *
-                          (dcump.dscale[, -J] *
-                          dscale.deta[, -J]) / mu.use[, 2:J]
-            wz0 <- as.matrix(wz0)
-            for (ii in 1:(J-1))
-                wz[,iam(2*ii,2*ii+1,M = M)] <- if (ooz) wz0[, ii] else 0
-        }
-        wz
-    }), list( .link = link, .lscale = lscale, .earg = earg,
-              .escale = escale ))))
-}
-
-
-
 
 
  margeff <- function(object, subset = NULL) {
@@ -2408,7 +2241,7 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
             "the LM design matrix")
 
   nnn <- object at misc$n
-  M <- object at misc$M # ncol(B)  # length(pvec) - 1
+  M <- object at misc$M  # ncol(B)  # length(pvec) - 1
 
 
     if (model.multinomial) {
@@ -2417,7 +2250,7 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
     B <- if (!length(rlev)) {
         cbind(cfit, 0)
     } else {
-        if (rlev == M+1) { # Default
+        if (rlev == M+1) {  # Default
             cbind(cfit, 0)
         } else if (rlev == 1) {
             cbind(0, cfit)
@@ -2453,18 +2286,18 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
         (B - temp2) * temp3
     } else {
         if (is.logical(ii))
-            ii <- (1:nnn)[ii]
+          ii <- (1:nnn)[ii]
 
         ans <- array(0, c(ppp, M+1, length(ii)),
                      dimnames = list(dimnames(B)[[1]],
                                      dimnames(B)[[2]],
                                      dimnames(fitted(object)[ii,])[[1]]))
         for (ilocal in 1:length(ii)) {
-            pvec  <- fitted(object)[ii[ilocal],]
-            temp1 <- B * matrix(pvec, ppp, M+1, byrow = TRUE)
-            temp2 <- matrix(rowSums(temp1), ppp, M+1)
-            temp3 <- matrix(pvec, nrow(B), M+1, byrow = TRUE)
-            ans[,,ilocal] <- (B - temp2) * temp3
+          pvec  <- fitted(object)[ii[ilocal], ]
+          temp1 <- B * matrix(pvec, ppp, M+1, byrow = TRUE)
+          temp2 <- matrix(rowSums(temp1), ppp, M+1)
+          temp3 <- matrix(pvec, nrow(B), M+1, byrow = TRUE)
+          ans[ , , ilocal] <- (B - temp2) * temp3
         }
         ans
     }
@@ -2475,9 +2308,9 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
       stop("cannot handle cumulative(multiple.responses = TRUE)")
     reverse <- object at misc$reverse
     linkfunctions <- object at misc$link
-    all.eargs  <- object at misc$earg
+    all.eargs <- object at misc$earg
     B <- cfit <- coefvlm(object, matrix.out = TRUE)
-    ppp   <- nrow(B)
+    ppp <- nrow(B)
 
     hdot <- lpmat <- kronecker(predict(object), matrix(1, ppp, 1))
     resmat <- cbind(hdot, 1)
@@ -2512,11 +2345,11 @@ ordpoissonProbs <- function(extra, mu, deriv = 0) {
       return(temp1)
     } else
     if (is.numeric(ii) && (length(ii) == 1)) {
-      return(temp1[,, ii])
+      return(temp1[, , ii])
     } else {
-      return(temp1[,, ii])
-    }
+      return(temp1[, , ii])
     }
+  }
 }
 
 
@@ -2612,6 +2445,7 @@ prplot <- function(object,
 
 
 
+
 is.parallel.matrix <- function(object, ...)
   is.matrix(object) && all(!is.na(object)) &&
   all(c(object) == 1) && ncol(object) == 1
diff --git a/R/family.censored.R b/R/family.censored.R
index e472d0e..3e29ee9 100644
--- a/R/family.censored.R
+++ b/R/family.censored.R
@@ -188,8 +188,8 @@
 
 
 if (FALSE)
- cexpon <- 
- ecexpon <- function(link = "loge", location = 0) {
+ cens.exponential <- 
+ ecens.exponential <- function(link = "loge", location = 0) {
   if (!is.Numeric(location, length.arg = 1))
     stop("bad input for 'location'")
 
@@ -285,7 +285,7 @@ if (FALSE)
     sum(w[cenI] * log(-exp(-rate[cenI]*(y[cenI, 2]-extra$location))+
     exp(-rate[cenI]*(y[cenI, 1]-extra$location))))
   }, list( .link = link ))),
-  vfamily = c("ecexpon"),
+  vfamily = c("ecens.exponential"),
   deriv = eval(substitute(expression({
     rate <- 1 / (mu - extra$location)
     cen0 <- extra$uncensored
diff --git a/R/family.extremes.R b/R/family.extremes.R
index 4d20d5e..7dfa3eb 100644
--- a/R/family.extremes.R
+++ b/R/family.extremes.R
@@ -637,20 +637,23 @@ qgev <- function(p, location = 0, scale = 1, shape = 0,
         stop("cannot handle shape == 0 with a multivariate response")
 
       EulerM <- -digamma(1)
-      wz[is.zero, iam(2, 2, M)] <- (pi^2/6 + (1-EulerM)^2) / sigma[is.zero]^2
+      wz[is.zero, iam(2, 2, M)] <- (pi^2/6 + (1-EulerM)^2)/sigma[is.zero]^2
       wz[is.zero, iam(3, 3, M)] <- 2.4236
-      wz[is.zero, iam(1, 2, M)] <- (digamma(2) + 2*(EulerM-1)) / sigma[is.zero]^2
-      wz[is.zero, iam(1, 3, M)]= -(trigamma(1)/2 + digamma(1)*
-                                   (digamma(1)/2+1)) / sigma[is.zero]
-      wz[is.zero, iam(2, 3, M)] <- (-dgammadx(2, 3)/6 + dgammadx(1, 1) +
-                                   2*dgammadx(1, 2) +
-                                   2*dgammadx(1, 3)/3) / sigma[is.zero]
+      wz[is.zero, iam(1, 2, M)] <-
+        (digamma(2) + 2 * (EulerM-1)) / sigma[is.zero]^2
+      wz[is.zero, iam(1, 3, M)] <-
+       -(trigamma(1) / 2 + digamma(1) * (digamma(1)/2+1)) / sigma[is.zero]
+      wz[is.zero, iam(2, 3, M)] <-
+        (-dgammadx(2, 3)/6 + dgammadx(1, 1) +
+                           2*dgammadx(1, 2) +
+                           2*dgammadx(1, 3)/3) / sigma[is.zero]
 
       if (FALSE ) {
         wz[, iam(1, 2, M)] <- 2 * r.vec / sigma^2
         wz[, iam(2, 2, M)] <- -4 * r.vec * digamma(r.vec+1) + 2 * r.vec +
-                              (4 * dgammadx(r.vec+1, deriv.arg = 1) - 
-          3 * dgammadx(r.vec+1, deriv.arg = 2)) / gamma(r.vec)  # Not checked
+                              (4 * dgammadx(r.vec+1, deriv.arg = 1) -
+          3 * dgammadx(r.vec+1,
+                       deriv.arg = 2)) / gamma(r.vec)  # Not checked
       }
     }
 
@@ -2680,252 +2683,7 @@ frechet.control <- function(save.weights = TRUE, ...) {
 
 
 
-frechet3.control <- function(save.weights = TRUE, ...) {
-    list(save.weights = save.weights)
-}
-
-
-
-
-if (FALSE)
- frechet3 <- function(anchor = NULL,
-                     ldifference = "loge",
-                     lscale = "loge",
-                     lshape = logoff(offset = -2),
-                     ilocation = NULL, iscale = NULL, ishape = NULL,
-                     nsimEIM = 250,
-                     zero = 1) {
-  lscale <- as.list(substitute(lscale))
-  escale <- link2list(lscale)
-  lscale <- attr(escale, "function.name")
-
-  lshape <- as.list(substitute(lshape))
-  eshape <- link2list(lshape)
-  lshape <- attr(eshape, "function.name")
-
-  ldiffr <- as.list(substitute(ldifference))
-  ediffr <- link2list(ldiffr)
-  ldiffr <- attr(escale, "function.name")
-
-
-
-
-  stopifnot(nsimEIM > 10, length(nsimEIM) == 1, nsimEIM == round(nsimEIM))
-
-
-  new("vglmff",
-  blurb = c("3-parameter Frechet distribution\n",
-            "Links:    ",
-            namesof("difference", link = ldiffr, earg = ediffr), ", ", 
-            namesof("scale",      link = lscale, earg = escale), ", ",
-            namesof("shape",      link = lshape, earg = eshape)),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero, M)
-  }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-    if (ncol(cbind(y)) != 1)
-      stop("response must be a vector or a one-column matrix")
-
-    predictors.names <-
-    c(namesof("difference", .ldiffr , earg = .ediffr, short = TRUE),
-      namesof("scale",      .lscale , earg = .escale, short = TRUE),
-      namesof("shape",      .lshape , earg = .eshape, short = TRUE))
 
-    anchorpt <- if (is.Numeric( .anchor, length.arg = 1))
-               .anchor else min(y)
-    if (min(y) < anchorpt)
-      stop("anchor point is too large")
-    extra$LHSanchor <- anchorpt
-
-    if (!length(etastart)) {
-
-
-      frech.aux <- function(shapeval, y, x, w, extraargs) {
-        myprobs <- c(0.25, 0.5, 0.75)
-        myobsns <- quantile(y, probs = myprobs)
-        myquant <- (-log(myprobs))^(-1/shapeval)
-        myfit <- lsfit(x = myquant, y = myobsns, intercept = TRUE)
-        sum(myfit$resid^2)
-      } 
-
-      shape.grid <- c(100, 70, 40, 20, 12, 8, 4, 2, 1.5)
-      shape.grid <- c(1 / shape.grid, 1, shape.grid)
-      try.this <- grid.search(shape.grid, objfun = frech.aux,
-                              y = y,  x = x, w = w, maximize = FALSE,
-                              abs.arg = TRUE)
-
-      shape.init <-
-        if (length( .ishape ))
-          rep( .ishape , length.out = n) else {
-          rep(try.this , length.out = n)  # variance exists if shape > 2
-      }
-
-
-
-
-        myprobs <- c(0.25, 0.5, 0.75)
-        myobsns <- quantile(y, probs = myprobs)
-        myquant <- (-log(myprobs))^(-1/shape.init[1])
-        myfit <- lsfit(x = myquant, y = myobsns)
- plot(myobsns ~ myquant)
-
-
-      Scale.init <- if (length( .iscale )) {
-        rep( .iscale , length.out = n)
-      } else {
-        if (all(shape.init > 1)) {
-          myfit$coef[2]
-        } else {
-          rep(1.0, length.out = n)
-        }
-      }
-
-
-      locinit <- if (length( .ilocation ))
-                rep( .ilocation , length.out = n) else {
-        if (myfit$coef[1] < min(y)) {
-          rep(myfit$coef[1], length.out = n)
-        } else {
-          rep(anchorpt - 0.01 * diff(range(y)), length.out = n)
-        }
-      }
-      if (any(y <= locinit))
-        stop("initial values for 'location' are out of range")
-      if (any(anchorpt <= locinit))
-        stop("require anchor point > initial location parameter value")
-
-
-
-
-      etastart <-
-        cbind(theta2eta(anchorpt - locinit, .ldiffr),
-              theta2eta(Scale.init, .lscale), 
-              theta2eta(shape.init, .lshape))
-      }
-  }), list( .ldiffr = ldiffr, .lscale = lscale, .lshape = lshape, 
-            .ediffr = ediffr, .escale = escale, .eshape = eshape, 
-            .iscale = iscale, .ishape = ishape,
-            .ilocation = ilocation, .anchor = anchor ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    loctn <- extra$LHSanchor -
-            eta2theta(eta[, 1], .ldiffr , earg = .ediffr)
-    Scale <- eta2theta(eta[, 2], .lscale , earg = .escale )
-    shape <- eta2theta(eta[, 3], .lshape , earg = .eshape )
-    ans <- rep(as.numeric(NA), length.out = length(shape))
-    okay <- shape > 1
-    ans[okay] <- loctn[okay] + Scale[okay] * gamma(1 - 1/shape[okay])
-    ans
-  }, list( .ldiffr = ldiffr, .lscale = lscale, .lshape = lshape,
-           .ediffr = ediffr, .escale = escale, .eshape = eshape ))), 
-  last = eval(substitute(expression({
-    misc$links <- c("difference" = .ldiffr ,
-                    "scale"      = .lscale ,
-                    "shape"      = .lshape)
-
-    misc$earg <- list("difference" = .ediffr,
-                      "scale"      = .escale,
-                      "shape"      = .eshape)
-
-    misc$nsimEIM <- .nsimEIM
-
-    extra$location <- loctn   # Store the location parameter estimate here
-
-  }), list( .ldiffr = ldiffr, .lscale = lscale, .lshape = lshape,
-            .ediffr = ediffr, .escale = escale, .eshape = eshape,
-            .nsimEIM = nsimEIM ))),  
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    loctn <- extra$LHSanchor -
-             eta2theta(eta[, 1], .ldiffr , earg = .ediffr)
-    Scale <- eta2theta(eta[, 2], .lscale , earg = .escale )
-    shape <- eta2theta(eta[, 3], .lshape , earg = .eshape )
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dfrechet(x = y, location = loctn, scale = Scale,
-                                 shape = shape, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .ldiffr = ldiffr, .lscale = lscale, .lshape = lshape,
-           .ediffr = ediffr, .escale = escale, .eshape = eshape ))),
-  vfamily = c("frechet3", "vextremes"),
-  deriv = eval(substitute(expression({
-    Difrc      <- eta2theta(eta[, 1], .ldiffr , earg = .ediffr )
-    Scale      <- eta2theta(eta[, 2], .lscale , earg = .escale )
-    shape      <- eta2theta(eta[, 3], .lshape , earg = .eshape )
-
-    loctn <- extra$LHSanchor - Difrc
-    rzedd <- Scale / (y - loctn)   # reciprocial of zedd
-
-    dl.dloct <- (shape + 1) / (y - loctn) -
-               (shape / (y - loctn)) * (rzedd)^shape
-    dl.ddifff <- -dl.dloct
-    dl.dScale <- shape * (1 - rzedd^shape) / Scale
-    dl.dshape <- 1 / shape + log(rzedd) * (1 -  rzedd^shape)
-
-    dthetas.detas <- cbind(
-      ddifff.deta <- dtheta.deta(Difrc, .ldiffr , earg = .ediffr ),
-      dScale.deta <- dtheta.deta(Scale, .lscale , earg = .escale ),
-      dShape.deta <- dtheta.deta(shape, .lshape , earg = .eshape ))
-
-    ans <-
-    c(w) * cbind(dl.ddifff,
-                 dl.dScale,
-                 dl.dshape) * dthetas.detas
-
-
-    ans
-  }), list( .ldiffr = ldiffr, .lscale = lscale, .lshape = lshape,
-            .ediffr = ediffr, .escale = escale, .eshape = eshape ))),
-  weight = eval(substitute(expression({
-
-    run.varcov <- 0
-    ind1 <- iam(NA, NA, M = M, both = TRUE, diag = TRUE)
-
-    if (length( .nsimEIM )) {
-      for (ii in 1:( .nsimEIM )) {
-          ysim <- rfrechet(n, location = loctn, scale = Scale, shape = shape)
-
-          rzedd <- Scale / (ysim - loctn)   # reciprocial of zedd
-
-          dl.dloct <- (shape + 1) / (ysim - loctn) -
-                     (shape / (ysim - loctn)) * (rzedd)^shape
-          dl.ddifff <- -dl.dloct
-
-          dl.dScale <- shape * (1 - rzedd^shape) / Scale
-          dl.dshape <- 1 / shape + log(rzedd) * (1 -  rzedd^shape)
-
-          rm(ysim)
-          temp3 <- cbind(dl.ddifff, dl.dScale, dl.dshape)
-          run.varcov <- run.varcov +
-                       temp3[, ind1$row.index] *
-                       temp3[, ind1$col.index]
-      }
-      run.varcov <- run.varcov / .nsimEIM
-
-      wz <- if (intercept.only)
-          matrix(colMeans(run.varcov),
-                 n, ncol(run.varcov), byrow = TRUE) else run.varcov
-
-      wz <- c(w) * wz * dthetas.detas[, ind1$row] *
-                       dthetas.detas[, ind1$col]
-    } else {
-      stop("argument 'nsimEIM' must be numeric")
-    }
-
- print("head(wz)")
- print( head(wz) )
- print("summary(wz) ,,,,,,,,,,,,,,,,,,")
- print( summary(wz) )
-
-    wz
-  }), list( .nsimEIM = nsimEIM ))))
-}
 
 
 rec.normal.control <- function(save.weights = TRUE, ...) {
diff --git a/R/family.genetic.R b/R/family.genetic.R
index a4aece0..8f4e045 100644
--- a/R/family.genetic.R
+++ b/R/family.genetic.R
@@ -212,332 +212,6 @@
 
 
 
-if (FALSE)
- AAaa.nohw <- function(link = "logit", ipA = NULL, iF = NULL) {
-
-
-
-
-
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-  new("vglmff",
-  blurb = c("AA-Aa-aa phenotype (without Hardy-Weinberg assumption)\n\n",
-            "Links:    ",
-            namesof("pA", link, earg = earg), ", ", 
-            namesof("f",  "identitylink", tag = FALSE)),
-  deviance = Deviance.categorical.data.vgam,
-  initialize = eval(substitute(expression({
-    mustart.orig <- mustart
-
-    delete.zero.colns <- FALSE
-    eval(process.categorical.data.VGAM)
-
-    if (length(mustart.orig))
-      mustart <- mustart.orig
-
-    ok.col.ny <- c("AA","Aa","aa")
-    if (length(col.ny <- colnames(y)) == length(ok.col.ny) &&
-        setequal(ok.col.ny, col.ny)) {
-        if (!all(ok.col.ny == col.ny))
-          stop("the columns of the response matrix should have names ",
-               "(output of colnames()) ordered as c('AA','Aa','aa')")
-    }
-
-    predictors.names <-
-        c(namesof("pA", .link , earg = .earg , tag = FALSE),
-          namesof("f",  "identitylink", earg = list(), tag = FALSE))
-
-    if (is.null(etastart)) {
-      pA <- if (is.numeric( .ipA )) rep( .ipA , len = n) else
-            c(sqrt(mustart[, 1] - mustart[, 2] / 2))
-      f <- if (is.numeric( .iF )) rep( .iF , len = n) else
-           rep(0.01, len = n)  # 1- mustart[, 2]/(2*pA*(1-pA))
-      if (any(pA <= 0) || any(pA >= 1))
-        stop("bad initial value for 'pA'")
-      etastart <- cbind(theta2eta(pA, .link , earg = .earg ),
-                        theta2eta(f,  "identitylink"))
-      mustart <- NULL  # Since etastart has been computed.
-    }
-  }), list( .link = link, .ipA = ipA, .iF = iF, .earg = earg))),
-
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    pA <- eta2theta(eta[, 1], link = .link , earg = .earg )
-    f  <- eta2theta(eta[, 2], link = "identitylink", earg = list())
-    cbind(AA = pA^2+pA*(1-pA)*f,
-          Aa = 2*pA*(1-pA)*(1-f),
-          aa = (1-pA)^2 + pA*(1-pA)*f)
-  }, list( .link = link, .earg = earg))),
-
-  last = eval(substitute(expression({
-    misc$link <-    c(pA = .link , f = "identitylink")
-
-    misc$earg <- list(pA = .earg , f = list() )
-
-    misc$expected <- TRUE
-  }), list( .link = link, .earg = earg))),
-
-
-  loglikelihood = function(mu, y, w, residuals = FALSE, eta, extra = NULL)
-    if (residuals)
-      stop("loglikelihood residuals not implemented yet") else {
-      sum(dmultinomial(x = w * y, size = w, prob = mu,
-                       log = TRUE, dochecking = FALSE))
-    },
-  vfamily = c("AAaa.nohw", "vgenetic"),
-  deriv = eval(substitute(expression({
-    pA <- eta2theta(eta[, 1], link = .link , earg = .earg )
-    f  <- eta2theta(eta[, 2], link = "identitylink")
-    dP1 <- cbind(f + 2*pA*(1-f),
-                 2*(1-f)*(1-2*pA),
-                 -2*(1-pA) +f*(1-2*pA))
-    dP2 <- cbind(pA*(1-pA),
-                 -2*pA*(1-pA),
-                 pA*(1-pA))
-    dl1 <- rowSums(y * dP1 / mu)
-    dl2 <- rowSums(y * dP2 / mu)
-
-    dPP.deta <- dtheta.deta(pA, link = .link , earg = .earg )
-
-    c(w) * cbind(dPP.deta * dl1,
-                 dl2)
-  }), list( .link = link, .earg = earg))),
-  weight = eval(substitute(expression({
-    dPP <- array(c(dP1, dP2), c(n, 3, 2))
-    dPP.deta <- cbind(dtheta.deta(pA, link = .link , earg = .earg ),
-                      dtheta.deta(f,  link = "identitylink"))
-    wz <- matrix(as.numeric(NA), n, dimm(M))  # dimm(M)==3 because M==2
-    for (i1 in 1:M)
-      for (i2 in i1:M) {
-        index <- iam(i1, i2, M)
-        wz[,index] <- rowSums(dPP[,,i1,drop = TRUE] *
-                              dPP[,,i2,drop = TRUE] / mu) *
-                              dPP.deta[, i1] * dPP.deta[, i2]
-      }
-    c(w) * wz
-  }), list( .link = link, .earg = earg))))
-}
-
-
-
-
-
- if (FALSE)
- AB.Ab.aB.ab2 <- function(link = "logit", init.p = NULL) {
-
-
-
-
-
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-  new("vglmff",
-  blurb = c("AB-Ab-aB-ab2 phenotype\n\n",
-            "Links:    ",
-            namesof("p", link, earg = earg)),
-  deviance = Deviance.categorical.data.vgam,
-  initialize = eval(substitute(expression({
-    mustart.orig <- mustart
-
-    delete.zero.colns <- FALSE
-    eval(process.categorical.data.VGAM)
-    predictors.names <- namesof("p", .link , earg = .earg , tag = FALSE)
-
-    if (length(mustart.orig)) {
-      mustart <- mustart.orig
-    }
-
-    ok.col.ny <- c("AB","Ab","aB","ab")
-    if (length(col.ny <- colnames(y)) == length(ok.col.ny) &&
-           setequal(ok.col.ny, col.ny)) {
-            if (!all(ok.col.ny == col.ny))
-                stop("the columns of the response matrix should have names ",
-                     "(output of colnames()) ordered as c('AB','Ab','aB','ab')")
-    }
-
-    mustart <- (mustart + y) / 2
-
-    if (is.null(etastart)) {
-      p.init <- if (is.numeric( .init.p )) rep( .init.p , n) else
-                rep(c(1 - 2 * sqrt(weighted.mean(mustart[, 4], w))), n)
-      p.init <- ifelse(p.init < 0.01, 0.01, p.init)
-      p.init <- ifelse(p.init > 0.99, 0.99, p.init)
-      etastart <- theta2eta(p.init, .link , earg = .earg )
-      mustart <- NULL  # Since etastart has been computed.
-    }
-    }), list( .link = link, .init.p=init.p, .earg = earg))),
-    linkinv = eval(substitute(function(eta,extra = NULL) {
-        p <- eta2theta(eta, link = .link , earg = .earg )
-        cbind("AB" = (2+(1-p)^2),
-              "Ab" = (1-(1-p)^2),
-              "aB" = (1-(1-p)^2),
-              "ab" = (1-p)^2) / 4
-    }, list( .link = link, .earg = earg) )),
-
-  last = eval(substitute(expression({
-    misc$link <-    c(p = .link )
-
-    misc$earg <- list(p = .earg )
-
-    misc$expected <- TRUE
-  }), list( .link = link, .earg = earg) )),
-
-
-  loglikelihood = function(mu, y, w, residuals = FALSE, eta, extra = NULL)
-    if (residuals)
-      stop("loglikelihood residuals not implemented yet") else {
-      sum(dmultinomial(x = w * y, size = w, prob = mu,
-                       log = TRUE, dochecking = FALSE))
-    },
-  vfamily = c("AB.Ab.aB.ab2", "vgenetic"),
-  deriv = eval(substitute(expression({
-    pp <- eta2theta(eta, link = .link , earg = .earg )
-    dP1 <- cbind(-0.5*(1-pp),
-                  0.5*(1-pp),
-                  0.5*(1-pp),
-                 -0.5*(1-pp))
-    dl1 <- rowSums(y * dP1 / mu)
-    dPP.deta <- dtheta.deta(pp, link = .link , earg = .earg )
-    c(w) * dPP.deta * dl1
-  }), list( .link = link, .earg = earg) )),
-  weight = eval(substitute(expression({
-    wz <- rowSums(dP1 * dP1 / mu) * dPP.deta^2
-    c(w) * wz
-  }), list( .link = link, .earg = earg) )))
-}
-
-
-
-
-
-
-
-
-
- if (FALSE)
- A1A2A3.orig <- function(link = "logit", ip1 = NULL, ip2 = NULL) {
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-
-
-  new("vglmff",
-  blurb = c("A1A2A3 Allele System ",
-            "(A1A1, A1A2, A2A2, A1A3, A2A3, A3A3)\n\n",
-            "Links:    ",
-            namesof("pA", link, earg = earg), ", ", 
-            namesof("pB", link, earg = earg, tag = FALSE)),
-  deviance = Deviance.categorical.data.vgam,
-  infos = eval(substitute(function(...) {
-    list(M1 = 2,
-         Q1 = 6,
-         multipleResponses = FALSE,
-         expected = TRUE,
-         link = c("pA" = .link , "pB" = .link ))
-  }, list( .link = link ))),
-
-  initialize = eval(substitute(expression({
-    mustart.orig <- mustart
-
-    delete.zero.colns <- FALSE
-    eval(process.categorical.data.VGAM)
-
-    if (length(mustart.orig))
-      mustart <- mustart.orig
-
-        ok.col.ny <- c("A1A1", "A1A2", "A2A2", "A1A3", "A2A3", "A3A3")
-        if (length(col.ny <- colnames(y)) == length(ok.col.ny) &&
-           setequal(ok.col.ny, col.ny)) {
-            if (!all(ok.col.ny == col.ny))
-                stop("the columns of the response matrix should have names ",
-                     "(output of colnames()) ordered as ",
-                     "c('A1A1', 'A1A2', 'A2A2', 'A1A3', 'A2A3', 'A3A3')")
-        }
-    
-     predictors.names <-
-          c(namesof("pA", .link , earg = .earg , tag = FALSE),
-            namesof("pB", .link , earg = .earg , tag = FALSE))
-    mustart <- (y + mustart) / 2
-
-
-    if (is.null(etastart)) {
-        p1 <- if (is.numeric(.ip1 )) rep( .ip1 , len = n) else
-               rep(c(sqrt( weighted.mean(mustart[, 1], w) )), len = n)
-        p2 <- if (is.numeric(.ip2 )) rep( .ip2 , len = n) else
-               rep(c(sqrt( weighted.mean(mustart[, 3], w) )), len = n)
-        etastart <- cbind(theta2eta(p1, .link , earg = .earg ),
-                          theta2eta(p2, .link , earg = .earg ))
-        mustart <- NULL  # Since etastart has been computed.
-    }
-  }), list( .link = link, .ip1 = ip1, .ip2 = ip2, .earg = earg))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    p1 <- eta2theta(eta[, 1], link = .link , earg = .earg )
-    p2 <- eta2theta(eta[, 2], link = .link , earg = .earg )
-    qq <- abs(1 - p1 - p2)
-    cbind(A1A1 = p1*p1,
-          A1A2 = 2*p1*p2,
-          A2A2 = p2*p2,
-          A1A3 = 2*p1*qq,
-          A2A3 = 2*p2*qq,
-          A3A3 = qq*qq)
-  }, list( .link = link, .earg = earg))),
-
-  last = eval(substitute(expression({
-    misc$link <-    c(p1 = .link , p2 = .link )
-
-    misc$earg <- list(p1 = .earg , p2 = .earg )
-
-    misc$expected <- TRUE
-  }), list( .link = link, .earg = earg))),
-
-
-  loglikelihood = function(mu, y, w, residuals = FALSE,
-                           eta, extra = NULL)
-    if (residuals)
-      stop("loglikelihood residuals not implemented yet") else {
-      sum(dmultinomial(x = w * y, size = w, prob = mu,
-                       log = TRUE, dochecking = FALSE))
-    },
-  vfamily = c("A1A2A3", "vgenetic"),
-  deriv = eval(substitute(expression({
-    p1 <- eta2theta(eta[, 1], link = .link , earg = .earg )
-    p2 <- eta2theta(eta[, 2], link = .link , earg = .earg )
-
-    dl.dp1 <- (2*y[, 1]+y[, 2]+y[, 4])/p1 -
-              (2*y[,6]+y[, 4]+y[,5])/(1-p1-p2)
-    dl.dp2 <- (2*y[, 3]+y[, 2]+y[,5])/p2 -
-              (2*y[,6]+y[, 4]+y[,5])/(1-p1-p2)
-
-    dp1.deta <- dtheta.deta(p1, link = .link , earg = .earg )
-    dp2.deta <- dtheta.deta(p2, link = .link , earg = .earg )
-
-    c(w) * cbind(dl.dp1 * dp1.deta,
-              dl.dp2 * dp2.deta)
-  }), list( .link = link, .earg = earg))),
-  weight = eval(substitute(expression({
-    qq <- 1-p1-p2
-    wz <- matrix(as.numeric(NA), n, dimm(M))  # dimm(M)==3 because M==2
-    ned2l.dp12  <-  2 * (1/p1 + 1/qq)
-    ned2l.dp22  <-  2 * (1/p2 + 1/qq)
-    ned2l.dp1dp2 <-  2 / qq
-    wz[, iam(1, 1, M)] <- ned2l.dp12 * dp1.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dp22 * dp2.deta^2
-    wz[, iam(1, 2, M)] <- ned2l.dp1dp2 * dp1.deta * dp2.deta
-    c(w) * wz
-  }), list( .link = link, .earg = earg))))
-}
-
-
-
-
  MNSs <- function(link = "logit",
                   imS = NULL, ims = NULL, inS = NULL) {
 
diff --git a/R/family.glmgam.R b/R/family.glmgam.R
index deb5d44..0bc2452 100644
--- a/R/family.glmgam.R
+++ b/R/family.glmgam.R
@@ -379,8 +379,8 @@
     }
     for (ii in 1:M) {
       index500 <- !is.finite(tmp200[, ii]) |
-                   (abs(tmp200[, ii]) < .Machine$double.eps)
-      if (any(index500)) { # Diagonal 0's are bad
+                  (abs(tmp200[, ii]) < .Machine$double.eps)
+      if (any(index500)) {  # Diagonal 0s are bad
         tmp200[index500, ii] <- .Machine$double.eps
       }
     }
@@ -1539,327 +1539,6 @@ rinv.gaussian <- function(n, mu, lambda) {
 
 
 
-if (FALSE)
- matched.binomial <-
-  function(multiple.responses = NULL, link = "logit",
-           parallel = TRUE,
-           smallno = .Machine$double.eps^(3/4)) {
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-  if (!is.Numeric(smallno, positive = TRUE,
-                  length.arg = 1) ||
-      smallno > 1e-4)
-    stop("bad input for 'smallno'")
-  if (is.logical(parallel) && !parallel)
-    stop("'parallel' must be TRUE")
-
-    temp <- terms(multiple.responses)
-    multiple.responses <- attr(temp,"term.labels")
-    if (length(multiple.responses) != 1)
-      stop("cannot obtain the matching variable")
-    if (!is.character(multiple.responses) ||
-        length(multiple.responses) != 1) {
-      stop("bad input for 'multiple.responses'")
-    }
-
-    new("vglmff",
-    blurb = c("Matched binomial model (intercepts fitted)\n\n", 
-              "Link:     ", namesof("mu[,j]", link, earg = earg)),
-    constraints = eval(substitute(expression({
-        constraints <- cm.VGAM(matrix(1, M, 1), x = x,
-                               bool = .parallel ,
-                               constraints = constraints,
-                               apply.int = TRUE)
-        constraints[[extra$multiple.responses]] <- diag(M)
-
-        specialCM <- list(a = vector("list", M-1))
-        for (ii in 1:(M-1)) {
-          specialCM[[1]][[ii]] <-
-            (constraints[[extra$multiple.responses]])[, 1+ii,drop = FALSE]
-        }
-        names(specialCM) = extra$multiple.responses
-    }), list( .parallel = parallel ))),
-    initialize = eval(substitute(expression({
-        if (!all(w == 1))
-            extra$orig.w = w
-
-        multiple.responses <- .multiple.responses
-
-        NCOL <- function (x) 
-            if (is.array(x) && length(dim(x)) > 1 ||
-            is.data.frame(x)) ncol(x) else as.integer(1)
-
-        if (NCOL(y) == 1) {
-            if (is.factor(y)) y <- y != levels(y)[1]
-            nvec <- rep(1, n)
-            if (!all(y >= 0 & y <= 1))
-                stop("response values must be in [0, 1]")
-            mustart <- (0.5 + w * y) / (1 + w)
-            no.successes <- w * y
-            if (any(abs(no.successes - round(no.successes)) > 0.001))
-                stop("Number of successes must be integer-valued")
-        } else if (NCOL(y) == 2) {
-            if (any(abs(y - round(y)) > 0.001))
-                stop("Count data must be integer-valued")
-            nvec <- y[, 1] + y[, 2]
-            y <- ifelse(nvec > 0, y[, 1] / nvec, 0)
-            w <- w * nvec
-            mustart <- (0.5 + nvec * y) / (1 + nvec)
-        } else 
-             stop("Response not of the right form")
-
-    temp1 <- attr(x, "assign")
-    if (colnames(x)[1] != "(Intercept)")
-      stop("x must have an intercept")
-    M <- CCC <- length(temp1[[multiple.responses]]) +
-                (colnames(x)[1] == "(Intercept)")
-    temp9 <- x[,temp1[[multiple.responses]],drop = FALSE]
-    temp9 <- temp9 * matrix(2:CCC, n, CCC-1, byrow = TRUE)
-    temp9 <- apply(temp9, 1, max)
-      temp9[temp9 == 0] <- 1
-      extra$NoMatchedSets <- CCC
-      extra$n <- n
-      extra$M <- M
-      extra$multiple.responses <- multiple.responses
-      extra$index9 <- temp9
-
-    predictors.names <-
-      namesof("mu", .link , earg = .earg , short = TRUE)
-    predictors.names <- rep(predictors.names, len = M)
-  }), list( .link = link, .earg = earg,
-            .multiple.responses = multiple.responses ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    mu <- eta2theta(eta, link = .link , earg = .earg )
-    mu[cbind(1:extra$n, extra$index9)]
-  }, list( .link = link, .earg = earg  ))),
-  last = eval(substitute(expression({
-    misc$link <- rep( .link , length = M)
-    names(misc$link) <- if (M > 1) paste("mu(matched set ",
-        1:M, ")", sep = "") else "mu"
-
-    misc$earg <- vector("list", M)
-    names(misc$earg) <- names(misc$link)
-    for (ii in 1:M)
-      misc$earg[[ii]] <- .earg
-
-    misc$expected <- TRUE
-  }), list( .link = link, .earg = earg))),
-  linkfun = eval(substitute(function(mu, extra = NULL) {
-    temp <- theta2eta(mu, .link , earg = .earg )
-    matrix(temp, extra$n, extra$M)
-  }, list( .link = link, .earg = earg))),
-  loglikelihood =
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    if (residuals) {
-      w * (y / mu - (1-y) / (1-mu))
-    } else {
-
-      ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                 y * w # Convert proportions to counts
-      nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
-              round(w)
-
-      smallno <- 1.0e6 * .Machine$double.eps
-      if (max(abs(ycounts - round(ycounts))) > smallno)
-          warning("converting 'ycounts' to integer in @loglikelihood")
-      ycounts <- round(ycounts)
-
-      ll.elts <-
-        (if (is.numeric(extra$orig.w)) extra$orig.w else 1) *
-         dbinom(x = ycounts, size = nvec, prob = mu, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  },
-  vfamily = c("matched.binomial", "vcategorical"),
-  deriv = eval(substitute(expression({
-    answer <- if ( .link == "logit") {
-      w * (y - mu)
-    } else if ( .link == "cloglog") {
-      mu.use <- mu
-        smallno <- 100 * .Machine$double.eps
-        mu.use[mu.use < smallno] <- smallno
-        mu.use[mu.use > 1 - smallno] <- 1 - smallno
-        -w * (y - mu) * log1p(-mu.use) / mu.use
-      } else {
-        w * dtheta.deta(mu, link = .link , earg = .earg ) *
-        (y/mu - 1)/(1-mu)
-      }
-      result <- matrix(0, n, M)
-      result[cbind(1:n, extra$index9)] <- answer
-    result
-  }), list( .link = link, .earg = earg))),
-  weight = eval(substitute(expression({
-    tmp100 <- mu*(1-mu)
-    answer <- if ( .link == "logit") {
-      cbind(w * tmp100)
-    } else if ( .link == "cloglog") {
-      cbind(w * (1-mu.use) * (log1p(-mu.use))^2 / mu.use )
-    } else {
-      cbind(w * dtheta.deta(mu, link = .link ,
-                            earg = .earg )^2 / tmp100)
-    }
-
-    result <- matrix( .smallno, n, M)
-    result[cbind(1:n, extra$index9)] <- answer
-    result
-  }), list( .link = link, .earg = earg, .smallno = smallno ))))
-}
-
-
-
-
-mypool <- function(x, index) {
-  answer <- x
-  uindex <- unique(index)
-  for (ii in uindex) {
-    ind0 <- (index == ii)
-    answer[ind0] <- sum(x[ind0])
-  }
-  answer
-}
-
-
-
-
-
-
-
-
-
- if (FALSE)
- mbino <- function() {
-    link <- "logit"
-    earg <- list()
-    parallel <- TRUE
-
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-    if (is.logical(parallel) && !parallel)
-        stop("'parallel' must be TRUE")
-
-
-    new("vglmff",
-    blurb = c("Matched binomial model (intercepts not fitted)\n\n", 
-              "Link:     ", namesof("mu[,j]", link, earg = earg)),
-    constraints = eval(substitute(expression({
-        constraints <- cm.VGAM(matrix(1, M, 1), x = x,
-                           bool = .parallel , 
-                           constraints = constraints,
-                               apply.int = FALSE)
-    }), list( .parallel = parallel ))),
-    initialize = eval(substitute(expression({
-        if (colnames(x)[1] == "(Intercept)")
-            stop("the model matrix must not have an intercept")
-
-        NCOL = function (x) 
-            if (is.array(x) && length(dim(x)) > 1 ||
-            is.data.frame(x)) ncol(x) else as.integer(1)
-
-        if (NCOL(y) == 1) {
-            if (is.factor(y)) y = y != levels(y)[1]
-            nvec = rep(1, n)
-            if (!all(y >= 0 & y <= 1))
-                stop("response values must be in [0, 1]")
-            mustart = (0.5 + w * y) / (1 + w)
-            no.successes = w * y
-            if (any(abs(no.successes - round(no.successes)) > 0.001))
-                stop("Number of successes must be integer-valued")
-        } else if (NCOL(y) == 2) {
-            if (any(abs(y - round(y)) > 0.001))
-                stop("Count data must be integer-valued")
-            nvec = y[, 1] + y[, 2]
-            y = ifelse(nvec > 0, y[, 1] / nvec, 0)
-            w = w * nvec
-            mustart = (0.5 + nvec * y) / (1 + nvec)
-        } else 
-             stop("Response not of the right form")
-
-        if (!length(etastart))
-          etastart <- theta2eta(mustart, link = "logit", earg = list())
-
-        temp1 = attr(x, "assign")
-        multiple.responses = extra$multiple.responses
-        if (length(multiple.responses) != n)
-          stop("input extra$multiple.responses doesn't look right")
-
-        if (any(y != 0 & y != 1))
-            stop("response vector must have 0 or 1 values only")
-        xrle = rle(multiple.responses)
-        if (length(unique(multiple.responses)) != length(xrel$length))
-          stop("extra$multiple.responses must take on contiguous values")
-
-        temp9 = factor(multiple.responses)
-        extra$NoMatchedSets = levels(temp9)
-        extra$n = n
-        extra$M = M
-        extra$rlex = xrle
-        extra$index9 = temp9
-        predictors.names <-
-          namesof("mu", .link , earg = .earg , short = TRUE)
-  }), list( .link = link, .earg = earg,
-            .multiple.responses = multiple.responses ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-      denominator <- exp(eta)
-      numerator <- mypool(denominator, extra$multiple.responses)
-      numerator / denominator
-  }, list( .link = link, .earg = earg  ))),
-  last = eval(substitute(expression({
-    misc$link <-     c(mu = .link )
-
-    misc$earg <-  list(mu = .earg )
-
-    misc$expected <- TRUE
-  }), list( .link = link, .earg = earg))),
-  loglikelihood =
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    if (residuals) {
-      c(w) * (y/mu - (1-y)/(1-mu))
-    } else {
-      ll.elts <- c(w) * (y*log(mu) + (1-y)*log1p(-mu))
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  },
-  vfamily = c("mbin", "vcategorical"),
-  deriv = eval(substitute(expression({
-    answer <-
-      if ( .link == "logit") {
-        w * (y - mu)
-      } else stop("can only handle the logit link")
-        answer
-  }), list( .link = link, .earg = earg))),
-  weight = eval(substitute(expression({
-    tmp100 <- mu*(1-mu)
-    answer <- if ( .link == "logit") {
-        cbind(w * tmp100)
-    } else stop("can only handle the logit link")
-
-    result <- matrix( .smallno, n, M)
-    result[cbind(1:n, extra$index9)] <- answer
-    result
-  }), list( .link = link, .earg = earg, .smallno = smallno ))))
-}
-
-
-
-
-
-
 
 
 
diff --git a/R/family.nonlinear.R b/R/family.nonlinear.R
index 1e3a21d..385272a 100644
--- a/R/family.nonlinear.R
+++ b/R/family.nonlinear.R
@@ -312,8 +312,8 @@ micmen.control <- function(save.weights = TRUE, ...) {
 
     if ( .firstDeriv == "rpar") {
       if (FALSE) {
-        wz <-  dmus.dthetas[,index$row] *  dmus.dthetas[,index$col] *
-               dthetas.detas[,index$row] * dthetas.detas[,index$col]
+        wz <-   dmus.dthetas[, index$row] *  dmus.dthetas[, index$col] *
+               dthetas.detas[, index$row] * dthetas.detas[, index$col]
         if (M > 1)
           wz[, 2:M] <- wz[, 2:M] + rpar
       } else {
diff --git a/R/family.normal.R b/R/family.normal.R
index e354cac..54fa678 100644
--- a/R/family.normal.R
+++ b/R/family.normal.R
@@ -9,6 +9,9 @@
 
 
 
+
+
+
 VGAM.weights.function <- function(w, M, n) {
 
 
@@ -1055,17 +1058,17 @@ rfoldnorm <- function(n, mean = 0, sd = 1, a1 = 1, a2=1) {
 
 
  if (FALSE) {
-        if ((ncol(cbind(w)) != 1) || any(w != round(w)))
-            stop("'weights' must be a vector or a one-column matrix ",
-                 "with integer values")
-            m1d <- meany <- weighted.mean(y, w)
-            m2d <- weighted.mean(y^2, w)
-            stddev <- sqrt( sum(c(w) * junk$resid^2) / junk$df.residual )
-            Ahat <- m1d^2 / m2d
-            thetahat <- sqrt(max(1/Ahat -1, 0.1))
-            mean.init <- rep(if (length( .imean)) .imean else
+      if ((ncol(cbind(w)) != 1) || any(w != round(w)))
+        stop("'weights' must be a vector or a one-column matrix ",
+             "with integer values")
+      m1d <- meany <- weighted.mean(y, w)
+      m2d <- weighted.mean(y^2, w)
+      stddev <- sqrt( sum(c(w) * junk$resid^2) / junk$df.residual )
+      Ahat <- m1d^2 / m2d
+      thetahat <- sqrt(max(1/Ahat -1, 0.1))
+      mean.init <- rep(if (length( .imean)) .imean else
                 thetahat * sqrt((stddev^2 + meany^2) * Ahat), len = n)
-            sd.init <- rep(if (length( .isd)) .isd else
+      sd.init <- rep(if (length( .isd)) .isd else
                 sqrt((stddev^2 + meany^2) * Ahat), len = n)
 }
 
@@ -1587,8 +1590,16 @@ tobit.control <- function(save.weights = TRUE, ...) {
       sd.init <- matrix(0.0, n, ncoly)
       for (ii in 1:ncol(y)) {
         use.i11 <- i11[, ii]
-        mylm <- lm.wfit(x = cbind(x[!use.i11, ]),
-                        y = y[!use.i11, ii], w = w[!use.i11, ii])
+
+        if (sum(!use.i11) < ncol(x)) {
+          use.i11 <- rep(FALSE, length = n)
+        }
+        mylm <- lm.wfit(x = x[!use.i11,    , drop = FALSE],
+                        y = y[!use.i11, ii],
+                        w = w[!use.i11, ii])
+
+                     
+
         sd.init[, ii] <- sqrt( sum(w[!use.i11, ii] * mylm$resid^2)
                               / mylm$df.residual ) * 1.5
         mu.init[!use.i11, ii] <- mylm$fitted.values
@@ -2159,21 +2170,18 @@ tobit.control <- function(save.weights = TRUE, ...) {
 
   last = eval(substitute(expression({
     M1 <- extra$M1
-    misc$link <- c(rep( .lmean , length = ncoly),
-                   rep( .lsdev , length = ncoly))
-    misc$link <- misc$link  [interleave.VGAM(M1 * ncoly, M = M1)]
+
     temp.names <- c(mynames1, mynames2)
     temp.names <- temp.names[interleave.VGAM(M1 * ncoly, M = M1)]
-    names(misc$link) <- temp.names
-
-
+    misc$link <- rep( .lmean , length = M1 * ncoly)
     misc$earg <- vector("list", M1 * ncoly)
-    names(misc$earg) <- temp.names
+    names(misc$link) <- names(misc$earg) <- temp.names
     for (ii in 1:ncoly) {
+      misc$link[ M1*ii-1 ] <- .lmean
+      misc$link[ M1*ii   ] <- if ( .var.arg ) .lvare else .lsdev
       misc$earg[[M1*ii-1]] <- .emean
       misc$earg[[M1*ii  ]] <- if ( .var.arg ) .evare else .esdev
     }
-    names(misc$earg) <- temp.names
 
     misc$var.arg <- .var.arg
     misc$M1 <- M1
@@ -3067,191 +3075,6 @@ tobit.control <- function(save.weights = TRUE, ...) {
 
 
 
-if (FALSE)
- lognormal3 <- function(lmeanlog = "identitylink", lsdlog = "loge",
-                        powers.try = (-3):3,
-                        delta = NULL, zero = 2) {
-
-
-  if (length(delta) &&
-      !is.Numeric(delta, positive = TRUE))
-    stop("bad input for argument argument 'delta'")
-
-
-
-  lmulog <- as.list(substitute(lmeanlog))
-  emulog <- link2list(lmulog)
-  lmulog <- attr(emulog, "function.name")
-
-  lsdlog <- as.list(substitute(lsdlog))
-  esdlog <- link2list(lsdlog)
-  lsdlog <- attr(esdlog, "function.name")
-
-
-
-
-  if (length(zero) &&
-     (!is.Numeric(zero, integer.valued = TRUE, positive = TRUE) ||
-     zero > 3))
-    stop("bad input for argument argument 'zero'")
-
-
-
-
-
-  new("vglmff",
-  blurb = c("Three-parameter (univariate) lognormal distribution\n\n",
-          "Links:    ",
-          namesof("meanlog", lmulog, earg = emulog, tag = TRUE), "; ",
-          namesof("sdlog",   lsdlog, earg = esdlog, tag = TRUE), "; ",
-          namesof("lambda", "identitylink", earg = list(), tag = TRUE)),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
-  }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
-
-    w.y.check(w = w, y = y)
-
-
-
-    predictors.names <- 
-       c(namesof("meanlog", .lmulog , earg = .emulog, tag = FALSE), 
-         namesof("sdlog",   .lsdlog , earg = .esdlog, tag = FALSE),
-         "lambda")
-
-    if (!length(etastart)) {
-      miny <- min(y)
-      if (length( .delta)) {
-        lambda.init <- rep(miny- .delta, length = n)
-      } else {
-        pvalue.vec <- NULL
-        powers.try <- .powers.try
-        for (delta in 10^powers.try) {
-          pvalue.vec <- c(pvalue.vec,
-                         shapiro.test(sample(log(y-miny+delta),
-                         size=min(5000, length(y ))))$p.value) 
-        }
-        index.lambda <- (1:length(powers.try))[pvalue.vec ==
-                                              max(pvalue.vec)]
-        lambda.init <- miny - 10^powers.try[index.lambda]
-      }
-      mylm <- lm.wfit(x = x, y = c(log(y - lambda.init)), w = c(w))
-      sdlog.y.est <- sqrt( sum(c(w) * mylm$resid^2) / mylm$df.residual )
-      etastart <-
-        cbind(mu     = log(median(y - lambda.init)),
-              sdlog  = rep(theta2eta(sdlog.y.est, .lsdlog , earg = .esdlog ),
-                           length = n),
-              lambda = lambda.init)
-    }
-  }), list( .lmulog = lmulog, .lsdlog = lsdlog,
-            .emulog = emulog, .esdlog = esdlog,
-            .delta = delta, .powers.try = powers.try ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    mymu    <- eta2theta(eta[, 1], .lmulog , earg = .emulog )
-    sdlog   <- eta2theta(eta[, 2], .lsdlog , earg = .esdlog )
-    lambda  <- eta2theta(eta[, 3], "identitylink", earg = list(theta = NULL))
-    lambda + exp(mymu + 0.5 * sdlog^2)
-  }, list( .lmulog = lmulog, .lsdlog = lsdlog,
-           .emulog = emulog, .esdlog = esdlog ))),
-  last = eval(substitute(expression({
-    misc$link <-    c("meanlog" = .lmulog ,
-                      "sdlog"   = .lsdlog ,
-                      "lambda"  = "identitylink")
-
-    misc$earg <- list("meanlog" = .emulog,
-                      "sdlog"   = .esdlog,
-                      "lambda"  = list())
-
-    misc$expected <- TRUE
-  }), list( .lmulog = lmulog, .lsdlog = lsdlog,
-            .emulog = emulog, .esdlog = esdlog ))),
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    mymu   <- eta2theta(eta[, 1], .lmulog , earg = .emulog )
-    sdlog  <- eta2theta(eta[, 2], .lsdlog , earg = .esdlog )
-    lambda <- eta2theta(eta[, 3], "identitylink", earg = list(theta = NULL))
-    if (any(y < lambda))
-      warning("bad 'y'")
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dlnorm(y - lambda, meanlog = mymu,
-                               sdlog = sdlog, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .lmulog = lmulog, .lsdlog = lsdlog,
-           .emulog = emulog, .esdlog = esdlog ))),
-  vfamily = c("lognormal3"),
-
-
-
-  simslot = eval(substitute(
-  function(object, nsim) {
-
-    pwts <- if (length(pwts <- object at prior.weights) > 0)
-              pwts else weights(object, type = "prior")
-    if (any(pwts != 1)) 
-      warning("ignoring prior weights")
-    eta <- predict(object)
-    mymu   <- eta2theta(eta[, 1], .lmulog , earg = .emulog )
-    sdlog  <- eta2theta(eta[, 2], .lsdlog , earg = .esdlog )
-    lambda <- eta2theta(eta[, 3], "identitylink", earg = list(theta = NULL))
-    rlnorm(nsim * length(mymu),
-           meanlog = mymu, sdlog = sdlog) + lambda
-  }, list( .lmulog = lmulog, .lsdlog = lsdlog,
-           .emulog = emulog, .esdlog = esdlog ))),
-
-
-
-  deriv = eval(substitute(expression({
-    mymu   <- eta2theta(eta[, 1], .lmulog ,    earg = .emulog )
-    sdlog  <- eta2theta(eta[, 2], .lsdlog ,    earg = .esdlog )
-    lambda <- eta2theta(eta[, 3], "identitylink", earg = list(theta = NULL))
-
-    if (any(y < lambda))
-        warning("bad 'y'")
-
-    dl.dmymu <- (log(y-lambda)-mymu) / sdlog^2
-    dl.dsdlog <- -1/sdlog + (log(y-lambda)-mymu)^2 / sdlog^3
-    dl.dlambda <- (1 + (log(y-lambda)-mymu) / sdlog^2) / (y-lambda)
-
-    dmymu.deta <- dtheta.deta(mymu, .lmulog , earg = .emulog )
-    dsdlog.deta <- dtheta.deta(sdlog, .lsdlog , earg = .esdlog )
-    dlambda.deta <- dtheta.deta(lambda, "identitylink", earg = list())
-
-    c(w) * cbind(dl.dmymu   *   dmymu.deta, 
-                 dl.dsdlog  *  dsdlog.deta, 
-                 dl.dlambda * dlambda.deta)
-  }), list( .lmulog = lmulog, .lsdlog = lsdlog,
-            .emulog = emulog, .esdlog = esdlog ))),
-  weight = expression({
-    wz <- matrix(0, n, dimm(M))
-
-    ned2l.dmymu2 <- 1 / sdlog^2
-    ned2l.dsdlog <- 2 / sdlog^2
-    temp9 <- exp(-mymu + sdlog^2 / 2)
-    ned2l.dlambda2 <- exp(2*(-mymu+sdlog^2)) * (1+sdlog^2) / sdlog^2
-
-    wz[, iam(1, 1, M)] <- ned2l.dmymu2 * dmymu.deta^2
-    wz[, iam(2, 2, M)] <- ned2l.dsdlog * dsdlog.deta^2
-    wz[, iam(3, 3, M)] <- ned2l.dlambda2 * dlambda.deta^2
-    wz[, iam(1, 3, M)] <- temp9 * dmymu.deta * dlambda.deta / sdlog^2
-    wz[, iam(2, 3, M)] <- -2 * temp9 / sdlog * dsdlog.deta * dlambda.deta
-    wz <- c(w) * wz
-    wz
-  }))
-}
-
-
-
-
-
 dskewnorm <- function(x, location = 0, scale = 1, shape = 0, log = FALSE) {
 
   if (!is.logical(log.arg <- log) || length(log) != 1)
@@ -3462,315 +3285,5 @@ rskewnorm <- function(n, location = 0, scale = 1, shape = 0) {
 
 
 
-if (FALSE)
- halfuninormal <-
-  function(lsd = "loge", lvar = "loge",
-           var.arg = FALSE,
-           imethod = 1,
-           isd = NULL,
-           parallel = FALSE,
-           apply.parint = FALSE,
-           zero = NULL) {
-
-
-
- warning("20121101; not working; yettodo: finish it!")
-
-  lsd <- as.list(substitute(lsd))
-  esd <- link2list(lsd)
-  lsd <- attr(esd, "function.name")
-
-  lvar <- as.list(substitute(lvar))
-  evar <- link2list(lvar)
-  lvar <- attr(evar, "function.name")
-
-  emean <- list()
-
-
-  lmean <- "identitylink"
-
-
-
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE))
-      stop("bad input for argument 'zero'")
-
-
-  if (!is.Numeric(imethod, length.arg = 1,
-                  integer.valued = TRUE, positive = TRUE) ||
-     imethod > 4)
-      stop("argument 'imethod' must be 1 or 2 or 3 or 4")
-
-  if (!is.logical(var.arg) ||
-      length(var.arg) != 1)
-    stop("argument 'var.arg' must be a single logical")
-  if (!is.logical(apply.parint) ||
-      length(apply.parint) != 1)
-    stop("argument 'apply.parint' must be a single logical")
-
-
-  if (is.logical(parallel) && parallel && length(zero))
-    stop("set 'zero = NULL' if 'parallel = TRUE'")
-
-
-  new("vglmff",
-  blurb = c("Half-normal distribution\n\n",
-            "Links:    ",
-            if (var.arg)
-            namesof("var",  lvar, earg = evar, tag = TRUE) else
-            namesof("sd" ,  lsd,  earg = esd,  tag = TRUE),
-            "\n",
-            if (var.arg) "Variance: var zz" else "Variance: sd^2 zz"),
-
-
-  constraints = eval(substitute(expression({
-
-    constraints <- cm.VGAM(matrix(1, M, 1), x = x,
-                           bool = .parallel , 
-                           constraints = constraints,
-                           apply.int = .apply.parint )
-
-    dotzero <- .zero
-    M1 <- 2
-    eval(negzero.expression.VGAM)
-  }), list( .zero = zero,
-            .parallel = parallel, .apply.parint = apply.parint ))),
-
-  infos = eval(substitute(function(...) {
-    list(M1 = 1,
-         zero = .zero)
-  }, list( .zero = zero ))),
-
-  initialize = eval(substitute(expression({
-    orig.y <- y
-
-
-
-
-
-
-
-
-    if (length(attr(orig.y, "Prior.Weights"))) {
-      if (any(c(w) != 1))
-        warning("replacing the 'weights' argument by the 'Prior.Weights'",
-                "attribute of the response (probably due to Qvar()")
-
-
-      w <- attr(orig.y, "Prior.Weights")
-
-
-      extra$attributes.y <- attributes(orig.y)
-
-    } else {
-    }
-
-
-
-
-
-
-    temp5 <-
-    w.y.check(w = w, y = y,
-              ncol.w.max = Inf,
-              ncol.y.max = Inf,
-              out.wy = TRUE,
-              colsyperw = 1,
-              maximize = TRUE)
-    w <- temp5$w
-    y <- temp5$y
-
-
-
-    ncoly <- ncol(y)
-    M1 <- 1
-    extra$ncoly <- ncoly
-    extra$M1 <- M1
-    M <- M1 * ncoly
-
-
-
-    mynames2 <- paste(if ( .var.arg ) "var" else "sd",
-                      if (ncoly > 1) 1:ncoly else "", sep = "")
-    predictors.names <-
-        c(if ( .var.arg ) 
-          namesof(mynames2, .lvar  , earg = .evar  , tag = FALSE) else
-          namesof(mynames2, .lsd   , earg = .esd   , tag = FALSE))
-    extra$predictors.names <- predictors.names
-
-
-    if (!length(etastart)) {
-      sdev.init <- mean.init <- matrix(0, n, ncoly)
-      for (jay in 1:ncoly) {
-        jfit <- lm.wfit(x = x,  y = y[, jay], w = w[, jay])
-        mean.init[, jay] <- if ( .lmean == "loge")
-                            pmax(1/1024, y[, jay]) else
-          if ( .imethod == 1) median(y[, jay]) else
-          if ( .imethod == 2) weighted.mean(y[, jay], w = w[, jay]) else
-          if ( .imethod == 3) weighted.mean(y[, jay], w = w[, jay]) *
-                             0.5 + y[, jay] * 0.5 else
-                                 mean(jfit$fitted)
-
-        sdev.init[, jay] <-
-          if ( .imethod == 1) {
-            sqrt( sum(w[, jay] *
-                (y[, jay] - mean.init[, jay])^2) / sum(w[, jay]) )
-          } else if ( .imethod == 2) {
-            if (jfit$df.resid > 0)
-              sqrt( sum(w[, jay] * jfit$resid^2) / jfit$df.resid ) else
-              sqrt( sum(w[, jay] * jfit$resid^2) / sum(w[, jay]) )
-          } else if ( .imethod == 3) {
-            sqrt( sum(w[, jay] * 
-                  (y[, jay] - mean.init[, jay])^2) / sum(w[, jay]) )
-          } else {
-            sqrt( sum(w[, jay] * abs(y[, jay] -
-                                     mean.init[, jay])) / sum(w[, jay]) )
-          }
-
-        if (any(sdev.init[, jay] <= sqrt( .Machine$double.eps ) ))
-          sdev.init[, jay] <- 1.01
-
-      }
-
-
-      if (length( .isd )) {
-        sdev.init <- matrix( .isd , n, ncoly, byrow = TRUE)
-      }
-
-
-      etastart <-
-        cbind(if ( .var.arg )
-              theta2eta(sdev.init^2, .lvar , earg = .evar ) else
-              theta2eta(sdev.init  , .lsd  , earg = .esd  ))
-
-      colnames(etastart) <- predictors.names
-    }
-  }), list( .lsd = lsd, .lvar = lvar,
-            .esd = esd, .evar = evar,
-            .lmean = lmean,
-            .isd = isd,
-            .var.arg = var.arg, .imethod = imethod ))),
-
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    M1 <- extra$M1
-    ncoly <- extra$ncoly
-    eta2theta(eta[, M1*(1:ncoly) - 1], .lmean , earg = .emean )
-  }, list( .esd = esd , .evar = evar,
-           .emean = emean,
-           .lmean = lmean ))),
-
-  last = eval(substitute(expression({
-    M1 <- extra$M1
-    misc$link <- c(rep( .lsd , length = ncoly))
-    temp.names <- c(mynames2)
-    names(misc$link) <- temp.names
-
-
-    misc$earg <- vector("list", M1 * ncoly)
-    names(misc$earg) <- temp.names
-    for (ii in 1:ncoly) {
-      misc$earg[[M1*ii  ]] <- if ( .var.arg ) .evar else .esd
-    }
-    names(misc$earg) <- temp.names
-
-    misc$var.arg <- .var.arg
-    misc$M1 <- M1
-    misc$expected <- TRUE
-    misc$imethod <- .imethod
-    misc$multipleResponses <- TRUE
-    misc$parallel <- .parallel
-    misc$apply.parint <- .apply.parint
-  }), list( .lsd = lsd, .lvar = lvar,
-            .esd = esd, .evar = evar,
-            .parallel = parallel, .apply.parint = apply.parint,
-            .var.arg = var.arg, .imethod = imethod ))),
-
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta,
-             extra = NULL,
-             summation = TRUE) {
-    ncoly <- extra$ncoly
-    M1 <- extra$M1
-    if ( .var.arg ) {
-      Varm <- eta2theta(eta[, M1*(1:ncoly)], .lvar , earg = .evar )
-      sdev <- sqrt(Varm)
-    } else {
-      sdev <- eta2theta(eta[, M1*(1:ncoly)], .lsd  , earg = .esd  )
-    }
-    if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      ll.elts <- c(w) * dnorm(y, m = mu, sd = sdev, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .lsd = lsd, .lvar = lvar,
-           .esd = esd, .evar = evar,
-           .var.arg = var.arg ))),
-  vfamily = c("halfuninormal"),
-  deriv = eval(substitute(expression({
-    ncoly <- extra$ncoly
-    M1 <- extra$M1
-
-    mymu <- zz
-    if ( .var.arg ) {
-      Varm <- eta2theta(eta[, M1*(1:ncoly)    ], .lvar , earg = .evar )
-      sdev <- sqrt(Varm)
-    } else {
-      sdev <- eta2theta(eta[, M1*(1:ncoly)    ], .lsd  , earg = .esd  )
-    }
-
-    dl.dmu <- zz * (y - mymu) / sdev^2
-    if ( .var.arg ) {
-      dl.dva <- -0.5 / Varm + 0.5 * (y - mymu)^2 / sdev^4
-    } else {
-      dl.dsd <- -1.0 / sdev +       (y - mymu)^2 / sdev^3
-    }
-
-    dmu.deta <- dtheta.deta(mymu, .lmean , earg = .emean )
-    if ( .var.arg ) {
-      dva.deta <- dtheta.deta(Varm, .lvar , earg = .evar )
-    } else {
-      dsd.deta <- dtheta.deta(sdev, .lsd  , earg = .esd )
-    }
-
-    ans <- c(w) *
-           cbind(if ( .var.arg ) dl.dva * dva.deta else
-                                 dl.dsd * dsd.deta)
-    ans
-  }), list( .lmean = lmean, .lsd = lsd, .lvar = lvar,
-            .emean = emean, .esd = esd, .evar = evar,
-            .var.arg = var.arg ))),
-  weight = eval(substitute(expression({
-    wz <- matrix(as.numeric(NA), n, M)  # diag matrix; y is 1-column too
-
-    ned2l.dmu2 <- 1 / sdev^2
-    if ( .var.arg ) {
-      ned2l.dva2 <- 0.5 / Varm^2
-    } else {
-      ned2l.dsd2 <- 2 / sdev^2
-    }
-
-    wz[, M1*(1:ncoly)    ] <- if ( .var.arg ) {
-      ned2l.dva2 * dva.deta^2
-    } else {
-      ned2l.dsd2 * dsd.deta^2
-    }
-
-
-    wz
-  }), list( .var.arg = var.arg ))))
-}
-
-
-
-
-
-
-
-
 
 
diff --git a/R/family.positive.R b/R/family.positive.R
index 8456add..7f369ef 100644
--- a/R/family.positive.R
+++ b/R/family.positive.R
@@ -1283,7 +1283,8 @@ if (length(extra$tau)) {
     R[lower.tri(R)] <- 0
     tmp6 <- N.hat.posbernoulli(eta = eta, link = .link , earg = .earg ,
                                R = R, w = w,
-                               X.vlm = X.vlm.save, Hlist = constraints,
+                               X.vlm = X.vlm.save,
+                               Hlist = Hlist,  # 20150428; bug fixed here
                                extra = extra, model.type = "0")
     extra$N.hat    <- tmp6$N.hat
     extra$SE.N.hat <- tmp6$SE.N.hat
@@ -1575,7 +1576,8 @@ if (length(extra$tau)) {
     R[lower.tri(R)] <- 0
     tmp6 <- N.hat.posbernoulli(eta = eta, link = .link , earg = .earg ,
                                R = R, w = w,
-                               X.vlm = X.vlm.save, Hlist = constraints,
+                               X.vlm = X.vlm.save,
+                               Hlist = Hlist,  # 20150428; bug fixed here
                                extra = extra, model.type = "t")
     extra$N.hat    <- tmp6$N.hat
     extra$SE.N.hat <- tmp6$SE.N.hat
@@ -1908,7 +1910,8 @@ if (length(extra$tau)) {
     R[lower.tri(R)] <- 0
     tmp6 <- N.hat.posbernoulli(eta = eta, link = .link , earg = .earg ,
                                R = R, w = w,
-                               X.vlm = X.vlm.save, Hlist = constraints,
+                               X.vlm = X.vlm.save,
+                               Hlist = Hlist,  # 20150428; bug fixed here
                                extra = extra, model.type = "b")
     extra$N.hat    <- tmp6$N.hat
     extra$SE.N.hat <- tmp6$SE.N.hat
@@ -2339,7 +2342,8 @@ if (length(extra$tau)) {
     R[lower.tri(R)] <- 0
     tmp6 <- N.hat.posbernoulli(eta = eta, link = .link , earg = .earg ,
                                R = R, w = w,
-                               X.vlm = X.vlm.save, Hlist = constraints,
+                               X.vlm = X.vlm.save,
+                               Hlist = Hlist,  # 20150428; bug fixed here
                                extra = extra, model.type = "tb")
     extra$N.hat    <- tmp6$N.hat
     extra$SE.N.hat <- tmp6$SE.N.hat
diff --git a/R/family.quantal.R b/R/family.quantal.R
deleted file mode 100644
index 9162af1..0000000
--- a/R/family.quantal.R
+++ /dev/null
@@ -1,575 +0,0 @@
-# These functions are
-# Copyright (C) 1998-2015 T.W. Yee, University of Auckland.
-# All rights reserved.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- abbott <- function(link0 = "logit",
-                    link1 = "logit",
-                    iprob0 = NULL, iprob1 = NULL,
-                    type.fitted = c("observed", "treatment", "control"),
-                    mux.offdiagonal = 0.98,
-                    zero = 1) {
-
-
-  type.fitted <- match.arg(type.fitted,
-                           c("observed", "treatment", "control"),
-                           several.ok = TRUE)
-
-
-  link0 <- as.list(substitute(link0))
-  earg0 <- link2list(link0)
-  link0 <- attr(earg0, "function.name")
-
-  link1 <- as.list(substitute(link1))
-  earg1 <- link2list(link1)
-  link1 <- attr(earg1, "function.name")
-
-
-
-
-  if (!is.Numeric(mux.offdiagonal, length.arg = 1) ||
-      mux.offdiagonal >= 1 ||
-      mux.offdiagonal <  0)
-    stop("argument 'mux.offdiagonal' must be in the interval [0, 1)")
-
-
-  new("vglmff",
-  blurb = c("Abbott's model for binary responses\n",
-            "mu = prob0 + (1 - prob0) * prob1\n",
-            "where 'prob0' is the 'control' mortality and\n",
-            "'prob1' is the 'treatment' mortality and\n",
-            "'mu' is the 'observed' mortality\n\n",
-            "Links: ",
-            namesof("prob0", link0, earg = earg0), ",  ",
-            namesof("prob1", link1, earg = earg1)),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero, M)
-  }), list( .zero = zero # ,
-           ))),
-
-  initialize = eval(substitute(expression({
-    eval(binomialff(link = .link0 )@initialize)  # w, y, mustart are assigned
-
-
-    predictors.names <-
-      c(namesof("prob0", .link0, earg = .earg0, short =  TRUE),
-        namesof("prob1", .link1, earg = .earg1, short =  TRUE))
-
-
-    if (is.null(etastart)) {
-      prob0.init <- if (length( .iprob0 )) {
-        rep( .iprob0, length.out = n)
-      } else {
-        mustart / 2
-      }
-
-      prob1.init <- if (length( .iprob1 )) {
-        rep( .iprob1, length.out = n)
-      } else {
-        mustart / 2
-      }
-
-
-      mustart <- NULL
-
-
-        etastart <-
-         cbind(theta2eta(prob0.init, link = .link0 , earg = .earg0 ),
-               theta2eta(prob1.init, link = .link1 , earg = .earg1 ))
-    }
-  }), list( .link0 = link0, .earg0 = earg0,
-            .link1 = link1, .earg1 = earg1,
-            .iprob0 = iprob0, .iprob1 = iprob1 ))),
-
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    prob0 <- eta2theta(eta[, 1], .link0 , earg = .earg0 )
-    prob1 <- eta2theta(eta[, 2], .link1 , earg = .earg1 )
-
-    con.fv <- prob0
-    trt.fv <- prob1
-    obs.fv <- prob0 + (1 - prob0) * prob1
-
-
-
-    ans <- cbind("observed"  = obs.fv,
-                "treatment" = trt.fv,
-                "control"   = con.fv)
-
-                   
-    ans[, .type.fitted , drop = FALSE]
-  }, list( .link0 = link0, .earg0 = earg0,
-           .link1 = link1, .earg1 = earg1,
-           .type.fitted = type.fitted ))),
-
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-
-
-    prob0 <- eta2theta(eta[, 1], .link0 , earg = .earg0 )
-    prob1 <- eta2theta(eta[, 2], .link1 , earg = .earg1 )
-    mymu <- prob0 + (1 - prob0) * prob1
-
-
-    if (residuals) {
-      w * (y / mymu - (1 - y) / (1 - mymu))
-    } else {
-      ycounts <- if (is.numeric(extra$orig.w)) y * w / extra$orig.w else
-                  y * w # Convert proportions to counts
-      nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
-                  round(w)
-      smallno <- 1.0e6 * .Machine$double.eps
-      smallno <- sqrt(.Machine$double.eps)
-      if (max(abs(ycounts - round(ycounts))) > smallno)
-        warning("converting 'ycounts' to integer in @loglikelihood")
-      ycounts <- round(ycounts)
-
-      ll.elts <-
-        (if (is.numeric(extra$orig.w)) extra$orig.w else 1) *
-        dbinom(x = ycounts, size = nvec, prob = mymu, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .link0 = link0, .earg0 = earg0,
-           .link1 = link1, .earg1 = earg1 ))),
-
-
-  last = eval(substitute(expression({
-    misc$link <-    c(prob0 = .link0 , prob1 = .link1 )
-    misc$earg <- list(prob0 = .earg0 , prob1 = .earg1 )
-
-    misc$mux.offdiagonal <- .mux.offdiagonal
-    misc$type.fitted <- .type.fitted
-    misc$true.mu <- ( .type.fitted == "observed")
-
-
-  }), list( .link0 = link0, .earg0 = earg0,
-            .link1 = link1, .earg1 = earg1,
-            .mux.offdiagonal = mux.offdiagonal,
-            .type.fitted = type.fitted
-          ))),
-  vfamily = c("abbott", "vquantal"),
-  deriv = eval(substitute(expression({
-    prob0 <- eta2theta(eta[, 1], .link0, earg = .earg0 )
-    prob1 <- eta2theta(eta[, 2], .link1, earg = .earg1 )
-    dprob0.deta <- dtheta.deta(prob0, .link0 , earg = .earg0 )
-    dprob1.deta <- dtheta.deta(prob1, .link1 , earg = .earg1 )
-
-
-    mymu <- prob0 + (1 - prob0) * prob1
-
-
-    dl.dmu <- y / mymu - (1 - y) / (1 - mymu)
-    dmu.dprob0 <- 1 - prob1
-    dmu.dprob1 <- 1 - prob0
-    dl.dprob0 <- dl.dmu * dmu.dprob0 
-    dl.dprob1 <- dl.dmu * dmu.dprob1 
-
-
-    c(w) * cbind(dl.dprob0 * dprob0.deta,
-                 dl.dprob1 * dprob1.deta)
-  }), list( .link0 = link0, .earg0 = earg0,
-            .link1 = link1, .earg1 = earg1 ))),
-  weight = eval(substitute(expression({
-
-
-    ned2l.dmu2 <- 1 / (mymu * (1-mymu))
-    ned2l.dprob02     <- ned2l.dmu2 * dmu.dprob0^2
-    ned2l.dprob12     <- ned2l.dmu2 * dmu.dprob1^2
-    ned2l.dprob1prob2 <-              ( 1)  # seems sort of ok but slow cvgc
-    ned2l.dprob1prob2 <-              ( 0)  # kill it
-    ned2l.dprob1prob2 <- ned2l.dmu2 * ( 1)  # dont seem to work
-
-    ned2l.dprob1prob2 <- ned2l.dmu2 * dmu.dprob1 * dmu.dprob0 *
-                         .mux.offdiagonal
-
-    od2l.dmu2 <- y / mymu^2 + (1 - y) / (1 - mymu)^2
-    od2l.dprob02     <- od2l.dmu2 * dmu.dprob0^2
-    od2l.dprob12     <- od2l.dmu2 * dmu.dprob1^2
-    od2l.dprob1prob2 <- od2l.dmu2 * dmu.dprob1 * dmu.dprob0 + dl.dmu
-
-
-    wz <- cbind(ned2l.dprob02 * dprob0.deta^2,
-                ned2l.dprob12 * dprob1.deta^2,
-                ned2l.dprob1prob2 * dprob1.deta * dprob0.deta)
-
-
-
-
-    c(w) * wz
-  }), list( .link0 = link0, .earg0 = earg0,
-            .link1 = link1, .earg1 = earg1,
-             .mux.offdiagonal = mux.offdiagonal ))))
-}
-
-
-
-
-
-
-
-
-if (FALSE)
- Abbott <- function(lprob1 = extlogit(min = 0, max = 1),  # For now, that is
-                   lprob0 = "logit",
-                   iprob0 = NULL, iprob1 = NULL,
-                   nointercept = 2,  # NULL,
-                   zero = 1) {
-
-
-
-
- stop("does not work")
-
-
-  lprob1 <- as.list(substitute(lprob1))
-  eprob1 <- link2list(lprob1)
-  lprob1 <- attr(eprob1, "function.name")
-
-  lprob0 <- as.list(substitute(lprob0))
-  eprob0 <- link2list(lprob0)
-  lprob0 <- attr(eprob0, "function.name")
-
-
-
-  new("vglmff",
-  blurb = c("Abbott's model for binary response\n",
-            "mu = prob0 + prob1\n",
-            "where 'prob0' is the control mortality and\n",
-            "'prob1' is the treatment mortality\n\n",
-            "Links: ",
-            namesof("prob0", lprob0, earg = eprob0), ",  ",
-            namesof("prob1", lprob1, earg = eprob1)),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero, M)
-    constraints <- cm.nointercept.VGAM(constraints, x, .nointercept, M)
-  }), list( .zero = zero,
-            .nointercept = nointercept ))),
-
-
-  initialize = eval(substitute(expression({
- print("here1")
-
-    eval(binomialff(link = .lprob1)@initialize)  # w, y, mustart are assigned
-
- print("here2")
- print("summary(mustart)")
- print( summary(mustart) )
-
-    predictors.names <-
-      c(namesof("prob0", .lprob0, earg = .eprob0, short =  TRUE),
-        namesof("prob1", .lprob1, earg = .eprob1, short =  TRUE))
-
-
-    if (is.null(etastart)) {
-      prob0.init <- if (length( .iprob0 )) {
-          rep( .iprob0, len = n)
-        } else {
-          mustart / 2
-        }
-
-      prob1.init <- if (length( .iprob1 )) {
-          rep( .iprob1, len = n)
-        } else {
-          mustart * 1 / 4
-        }
-
-
-      mustart <- NULL
-
-
- print("prob0.init ")
- print( sort(prob0.init) )
- print("prob1.init ")
- print( sort(prob1.init) )
-
-
-        eprob1 <- list(min = prob0.init, max = 1)
-        etastart <-
-         cbind(theta2eta(prob0.init, link = .lprob0 , earg = .eprob0 ),
-               theta2eta(prob1.init, link = .lprob1 , earg =  eprob1 ))
- print("head(etastart)")
- print( head(etastart) )
-    }
-  }), list( .lprob1 = lprob1, .eprob1 = eprob1,
-            .lprob0 = lprob0, .eprob0 = eprob0,
-            .iprob0 = iprob0, .iprob1 = iprob1 ))),
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    prob0 <- eta2theta(eta[, 1], .lprob0 , earg = .eprob0)
-
-    eprob1 <- list(min = prob0, max = 1)
-
-    prob1 <- eta2theta(eta[, 2], .lprob1 , earg =  eprob1)
-    prob0 + prob1
-  }, list( .lprob1 = lprob1, .eprob1 = eprob1,
-           .lprob0 = lprob0, .eprob0 = eprob0 ))),
-  last = eval(substitute(expression({
-    eprob1 <- list(min = prob0, max = 1)
-    misc$link <-    c(prob0 = .lprob0, prob1 = .lprob1)
-
-    misc$earg <- list(prob0 = .eprob0, prob1 =  eprob1)
-
-    misc$nointercept = .nointercept
-  }), list( .lprob1 = lprob1, .eprob1 = eprob1,
-            .lprob0 = lprob0, .eprob0 = eprob0,
-            .nointercept = nointercept ))),
-  vfamily = c("Abbott", "vquantal"),
-  deriv = eval(substitute(expression({
-    prob0 <- eta2theta(eta[,1], .lprob0, earg = .eprob0)
-
-    eprob1 <- list(min = prob0, max = 1)
-    prob1 <- eta2theta(eta[,2], .lprob1, earg =  eprob1)
-    dprob0.deta <- dtheta.deta(prob0, .lprob0 , earg = .eprob0 )
-    dprob1.deta <- dtheta.deta(prob1, .lprob1 , earg =  eprob1 )
-
-    dl.dmu <- y / mu - (1 - y) / (1 - mu)
-    dmu.dprob0 <- 1 # - prob1
-    dmu.dprob1 <- 1 # - prob0
-    dl.dprob0 <- dl.dmu * dmu.dprob0 
-    dl.dprob1 <- dl.dmu * dmu.dprob1 
-
-    c(w) * cbind(dl.dmu * dmu.dprob0 * dprob0.deta,
-                 dl.dmu * dmu.dprob1 * dprob1.deta)
-  }), list( .lprob1 = lprob1, .eprob1 = eprob1,
-            .lprob0 = lprob0, .eprob0 = eprob0 ))),
-    weight = eval(substitute(expression({
-
-
-    ned2l.dmu2 <- 1 / (mu * (1-mu))
-    ned2l.dprob02 <- ned2l.dmu2 * dmu.dprob0^2
-    ned2l.dprob12 <- ned2l.dmu2 * dmu.dprob1^2
-
-    wz <- cbind(ned2l.dprob02 * dprob0.deta^2,
-                ned2l.dprob12 * dprob1.deta^2)
-
- print("head(wz)")
- print( head(wz) )
-    c(w) * wz
-  }), list( .lprob1 = lprob1, .eprob1 = eprob1,
-            .lprob0 = lprob0, .eprob0 = eprob0 ))))
-}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-abbott.EM.control <- function(maxit = 1000, ...) {
-  list(maxit = maxit)
-}
-
-
- abbott.EM <-
-  function(link = "probit",
-           b1.arg = 0, b2.arg = 0,
-           imethod = 1, ilambda = 0.5,
-           iprob = NULL) {
-
-
-  link <- as.list(substitute(link))
-  earg <- link2list(link)
-  link <- attr(earg, "function.name")
-
-
-  if (!is.Numeric(b1.arg,  # length.arg = 1,
-                  integer.valued = TRUE) ||
-      b1.arg < 0)
-    stop("argument 'b1.arg' must be a vector of non-negative integers")
-
-
-  if (!is.Numeric(b2.arg,  # length.arg = 1,
-                  integer.valued = TRUE) ||
-      b2.arg < 0)
-    stop("argument 'b2.arg' must be a vector of non-negative integers")
-
-
-  if (!is.Numeric(imethod, length.arg = 1,
-                  integer.valued = TRUE, positive = TRUE) ||
-     imethod > 3)
-    stop("argument 'imethod' must be 1 or 2 or 3")
-
-
-  zero <- NULL
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
-
-
-  new("vglmff",
-  blurb = c("Probit regression with nonzero background (EM algorithm)\n",
-            "P[Y=1] = mu = prob0 + (1 - prob0) * linkinv(eta)\n\n",
-            "Link:     ",
-            namesof("pi", link, earg = earg), "\n",
-            "Mean:     mu"),
-  constraints = eval(substitute(expression({
-    dotzero <- .zero
-    M1 <- 1
-    eval(negzero.expression.VGAM)
-  }), list( .zero = zero ))),
-
-  infos = eval(substitute(function(...) {
-    list(M1 = 1,
-         Q1 = 1,
-         zero = .zero )
-  }, list( .zero = zero ))),
-
-  initialize = eval(substitute(expression({
-
-    temp5 <-
-    w.y.check(w = w, y = y,
-              Is.nonnegative.y = TRUE,
-              Is.integer.y = TRUE,
-              ncol.w.max = Inf,
-              ncol.y.max = Inf,
-              out.wy = TRUE,
-              colsyperw = 1,
-              maximize = TRUE)
-    w <- temp5$w
-    y <- temp5$y
-
-
-    if (length(table(y)) != 2 || max(y) > 1)
-      stop("response must be a vector of 0s and 1s only")
-
-
-    ncoly <- ncol(y)
-    M1 <- 1
-    extra$ncoly <- ncoly
-    extra$M1 <- M1
-    M <- M1 * ncoly
-    extra$lambda <- matrix( .ilambda , n, M, byrow = TRUE)
-    extra$orig.w <- w
-
-
-    mynames1 <- paste("prob0", if (ncoly > 1) 1:ncoly else "", sep = "")
-    predictors.names <-
-      namesof(mynames1, .link , earg = .earg , tag = FALSE)
-
-
-    if (!length(etastart)) {
-      prob.init <- if ( .imethod == 2)
-                      1 / (1 + y + 1/16) else
-                  if ( .imethod == 3)
-                      1 / (1 + apply(y, 2, median) + 1/16) else
-                      rnorm(n * M, mean = 0.5, sd = 0.01)  # Mean 0.5
-
-      if (!is.matrix(prob.init))
-        prob.init <- matrix(prob.init, n, M, byrow = TRUE)
-
-
-      if (length( .iprob ))
-        prob.init <- matrix( .iprob , n, M, byrow = TRUE)  # Mean 0.5
-
-
-        etastart <- theta2eta(prob.init, .link , earg = .earg )  # Mean 0
-    }
-  }), list( .link = link, .earg = earg,
-            .ilambda = ilambda,
-            .imethod = imethod, .iprob = iprob ))),
-
-  linkinv = eval(substitute(function(eta, extra = NULL) {
-    prob <- eta2theta(eta, .link , earg = .earg )
-    mymu <- extra$lambda + (1 - extra$lambda) * prob  # Eqn (3)
-    mymu
-  }, list( .link = link, .earg = earg ))),
-
-  last = eval(substitute(expression({
-    M1 <- extra$M1
-    misc$link <- c(rep( .link , length = ncoly))
-    names(misc$link) <- mynames1
-
-    misc$earg <- vector("list", M)
-    names(misc$earg) <- mynames1
-    for (ii in 1:ncoly) {
-      misc$earg[[ii]] <- .earg
-    }
-
-    misc$M1 <- M1
-    misc$multipleResponses <- TRUE
-    misc$imethod <- .imethod
-    misc$iprob <- .iprob
-    misc$b1.arg <- .b1.arg
-    misc$b2.arg <- .b2.arg
-
-    extra$lambda <- extra$lambda[1, ]  # Now a vector
-  }), list( .link = link, .earg = earg,
-            .iprob = iprob,
-            .b1.arg = b1.arg, .b2.arg = b2.arg,
-            .imethod = imethod ))),
-  loglikelihood = eval(substitute(
-    function(mu, y, w, residuals = FALSE, eta, extra = NULL,
-             summation = TRUE) {
-    prob <- eta2theta(eta, .link , earg = .earg )
-    mymu <- extra$lambda + (1 - extra$lambda) * prob  # Eqn (3)
-
-   if (residuals) {
-      stop("loglikelihood residuals not implemented yet")
-    } else {
-      nvec <- if (is.numeric(extra$orig.w)) round(w / extra$orig.w) else
-              round(w)
-      ll.elts <- c(w) * dbinom(x = y, prob = mymu,
-                               size = nvec, log = TRUE)
-      if (summation) {
-        sum(ll.elts)
-      } else {
-        ll.elts
-      }
-    }
-  }, list( .link = link, .earg = earg ))),
-  vfamily = c("abbott.EM"),
-  deriv = eval(substitute(expression({
-    prob <- eta2theta(eta, .link , earg = .earg )
-
-    mymu <- extra$lambda + (1 - extra$lambda) * prob  # Eqn (3)
-
-    wz <- cbind((1 - extra$lambda) * prob / mymu)  # Eqn (4)
-
-    Deriv1 <- ifelse(y == 0, -dnorm(eta) / pnorm(eta, lower.tail = FALSE),
-                              dnorm(eta) / pnorm(eta))
-
-    c(w) * wz * Deriv1
-  }), list( .link = link, .earg = earg ))),
-
-  weight = eval(substitute(expression({
-
-    extra$lambda <-
-      matrix((colSums((1 - wz) * y) + .b1.arg ) / (n + .b1.arg + .b2.arg ),
-             n, M, byrow = TRUE)  # Between eqns (6),(7)
-
-
-
-
-    c(w) * wz
-  }), list( .link = link, .earg = earg,
-            .b1.arg = b1.arg, .b2.arg = b2.arg ))))
-}
-
-
-
-
-
diff --git a/R/family.rcim.R b/R/family.rcim.R
index 7c31a24..3331b36 100644
--- a/R/family.rcim.R
+++ b/R/family.rcim.R
@@ -839,7 +839,7 @@ plota21 <- function(rrvglm2, show.plot = TRUE, nseq.a21 = 31,
 
   if (show.plot) {
     plot(a21.matrix[ ,1], a21.matrix[ ,2], type = "l",
-            col = "blue",
+            col = "blue", cex.lab = 1.1,
             xlab = expression(a[21]), ylab = "Log-likelihood")  # ...
 
     abline(v = (Hlist.orig[[length(Hlist.orig)]])[2, 1],
diff --git a/R/family.rcqo.R b/R/family.rcqo.R
index 36a393a..b9a8fcf 100644
--- a/R/family.rcqo.R
+++ b/R/family.rcqo.R
@@ -105,6 +105,8 @@ rcqo <- function(n, p, S,
       on.exit(assign(".Random.seed", R.seed, envir = .GlobalEnv))
     }
   })
+
+
   change.seed.expression <- expression({
       if (length(seed)) set.seed(seed)
   })
@@ -160,20 +162,23 @@ rcqo <- function(n, p, S,
         optimums[, r] <- seq(-AA, AA, len = S^(1/Rank))
       }
     } else if (Rank == 2) {
-      optimums <- expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
-                            latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)))
+      optimums <-
+        expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
+                    latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)))
     } else if (Rank == 3) {
-      optimums <- expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
-                            latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)),
-                            latvar3 = seq(-AA[3], AA[3], len = S^(1/Rank)))
+      optimums <-
+        expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
+                    latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)),
+                    latvar3 = seq(-AA[3], AA[3], len = S^(1/Rank)))
     } else {
-      optimums <- expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
-                            latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)),
-                            latvar3 = seq(-AA[3], AA[3], len = S^(1/Rank)),
-                            latvar4 = seq(-AA[4], AA[4], len = S^(1/Rank)))
+      optimums <-
+        expand.grid(latvar1 = seq(-AA[1], AA[1], len = S^(1/Rank)),
+                    latvar2 = seq(-AA[2], AA[2], len = S^(1/Rank)),
+                    latvar3 = seq(-AA[3], AA[3], len = S^(1/Rank)),
+                    latvar4 = seq(-AA[4], AA[4], len = S^(1/Rank)))
     }
     if (Rank > 1)
-      optimums <- matrix(unlist(optimums), S, Rank)  # Make sure its a matrix
+    optimums <- matrix(unlist(optimums), S, Rank)  # Make sure its a matrix
   } else {
     optimums <- matrix(1, S, Rank)
     eval(change.seed.expression)
@@ -194,7 +199,8 @@ rcqo <- function(n, p, S,
   ynames <- paste("y", 1:S, sep = "")
   Kvector <- rep(Kvector, len = S)
   names(Kvector) <- ynames
-  latvarnames <- if (Rank == 1) "latvar" else paste("latvar", 1:Rank, sep = "")
+  latvarnames <- if (Rank == 1) "latvar" else
+                 paste("latvar", 1:Rank, sep = "")
   Tols <- if (eq.tolerances) {
     matrix(1, S, Rank)
   } else {
@@ -220,12 +226,12 @@ rcqo <- function(n, p, S,
   names(log.maximums) <- ynames
   etamat <- matrix(log.maximums, n, S, byrow = TRUE)
   for (jay in 1:S) {
-      optmat <- matrix(optimums[jay, ], nrow = n, ncol = Rank, byrow = TRUE)
-      tolmat <- matrix(  Tols[jay, ], nrow = n, ncol = Rank, byrow = TRUE)
-      temp <- cbind((latvarmat - optmat) / tolmat)
-      for (r in 1:Rank)
-        etamat[, jay] <- etamat[, jay] -
-                         0.5 * (latvarmat[, r] - optmat[jay, r]) * temp[, r]
+    optmat <- matrix(optimums[jay, ], nrow = n, ncol = Rank, byrow = TRUE)
+    tolmat <- matrix(    Tols[jay, ], nrow = n, ncol = Rank, byrow = TRUE)
+    temp <- cbind((latvarmat - optmat) / tolmat)
+    for (r in 1:Rank)
+      etamat[, jay] <- etamat[, jay] -
+                       0.5 * (latvarmat[, r] - optmat[jay, r]) * temp[, r]
   }
 
   rootdist <- switch(family,
@@ -271,7 +277,7 @@ rcqo <- function(n, p, S,
   attr(ans, "concoefficients") <- Ccoefs
   attr(ans, "Crow1positive") <- Crow1positive
   attr(ans, "family") <- family
-  attr(ans, "formula") <- myform # Useful for running cqo() on the data
+  attr(ans, "formula") <- myform  # Useful for running cqo() on the data
   attr(ans, "Rank") <- Rank
   attr(ans, "family") <- family
   attr(ans, "Kvector") <- Kvector
@@ -286,7 +292,7 @@ rcqo <- function(n, p, S,
   attr(ans, "eq.maximums") <- eq.maximums ||
                               all(lo.abundance == hi.abundance)
   attr(ans, "es.optimums") <- es.optimums
-  attr(ans, "seed") <- seed # RNGstate
+  attr(ans, "seed") <- seed  # RNGstate
   attr(ans, "sd.tolerances") <- sd.tolerances
   attr(ans, "sd.latvar") <- if (scale.latvar) sd.latvar else sd.latvarr
   attr(ans, "sd.optimums") <- sd.optimums
@@ -314,7 +320,7 @@ dcqo <-
            sd.optimums = 1,
            nlevels = 4,  # ignored unless family = "ordinal"
            seed = NULL) {
- warning("12/6/06; needs a lot of work based on rcqo()")
+ warning("20060612; needs a lot of work based on rcqo()")
 
 
   if (mode(family) != "character" && mode(family) != "name")
diff --git a/R/family.ts.R b/R/family.ts.R
index f223a11..27aa4b1 100644
--- a/R/family.ts.R
+++ b/R/family.ts.R
@@ -515,3 +515,500 @@ setMethod("show", "Coef.rrar",
 
 
 
+
+ AR1.control <- function(criterion = "coefficients",
+                         stepsize = 0.33,
+                         maxit = 100, ...) {
+  list(criterion = criterion,
+       stepsize  = stepsize,
+       maxit     = maxit)
+}
+
+
+
+ AR1 <-
+  function(ldrift = "identitylink",
+           lsd  = "loge",
+           lvar = "loge",
+           lrho = "rhobit",
+           idrift  = NULL,
+           isd  = NULL,
+           ivar = NULL,
+           irho = NULL,
+           ishrinkage = 0.9, 
+           type.likelihood = c("exact", "conditional"),
+           var.arg = FALSE,  # TRUE,
+           almost1 = 0.99,
+           zero = c(-2, -3)) {
+  imethod <- 1
+  type.likelihood <- match.arg(type.likelihood,
+                           c("exact", "conditional"))[1]
+
+  if (!is.Numeric(almost1, length.arg = 1) || almost1 < 0.9 ||
+      almost1 >= 1)
+    stop("Bad input for argument 'almost1'")
+  
+  if (length(isd) && !is.Numeric(isd, positive = TRUE))
+    stop("Bad input for argument 'isd'")
+  
+  if (length(ivar) && !is.Numeric(ivar, positive = TRUE))
+    stop("Bad input for argument 'ivar'")
+  
+  if (length(irho) &&
+     (!is.Numeric(irho) || any(abs(irho) > 1.0)))
+    stop("Bad input for argument 'irho'")
+  
+
+
+
+
+  if (!is.logical(var.arg) ||
+      length(var.arg) != 1)
+    stop("argument 'var.arg' must be a single logical")
+
+  if(length(zero) && !is.Numeric(zero, integer.valued = TRUE))
+    stop("Bad input for argument 'zero'.")
+  ismn <- idrift
+  lsmn <- as.list(substitute(ldrift))
+  esmn <- link2list(lsmn)
+  lsmn <- attr(esmn, "function.name")     
+
+  lsdv <- as.list(substitute(lsd))
+  esdv <- link2list(lsdv)
+  lsdv <- attr(esdv, "function.name")
+
+  lvar  <- as.list(substitute(lvar))
+  evar  <- link2list(lvar)
+  lvar  <- attr(evar, "function.name")
+
+  lrho <- as.list(substitute(lrho))
+  erho <- link2list(lrho)
+  lrho <- attr(erho, "function.name")     
+
+  n.sc <- if (var.arg) "var" else "sd"
+  l.sc <- if (var.arg) lvar else lsdv
+  e.sc <- if (var.arg) evar else esdv
+
+
+  new("vglmff", 
+  blurb = c("Three-parameter autoregressive process of order-1\n\n",
+            "Links:        ",
+            namesof("drift", lsmn, earg = esmn), ", ",
+            namesof(n.sc     , l.sc, earg = e.sc), ", ",
+            namesof("ARcoef1", lrho, earg = erho), "\n",
+            "Model:       Y_t = drift + rho * Y_{t-1} + error_{t},", "\n",
+            "             where 'error_{2:n}' ~ N(0, sigma^2) independently",
+                          "\n",
+            "Mean:        drift / (1 - rho)", "\n",
+            "Correlation: rho = ARcoef1", "\n",
+            "Variance:    sd^2 / (1 - rho^2)"),
+  constraints = eval(substitute(expression({
+
+    M1 <- 3
+    dotzero <- .zero
+    eval(negzero.expression.VGAM)
+  }), list( .zero = zero ))),
+  infos = eval(substitute(function(...) {
+    list(M1 = 3, 
+         Q1 = 1, 
+         expected = TRUE, 
+         multipleResponse = TRUE,
+         type.likelihood = .type.likelihood ,
+         ldrift = .lsmn ,
+         edrift = .esmn ,
+         lvar = .lvar ,
+         lsd  = .lsdv ,
+         evar = .evar ,
+         esd  = .esdv ,
+         lrho = .lrho ,
+         erho = .erho ,
+         almost1 = .almost1 ,
+         zero = .zero )
+  }, list( .lsmn = lsmn, .lvar = lvar, .lsdv = lsdv, .lrho = lrho,
+           .esmn = esmn, .evar = evar, .esdv = esdv, .erho = erho,
+           .type.likelihood = type.likelihood,
+           .almost1 = almost1, .zero = zero))),
+  initialize = eval(substitute(expression({
+    extra$M1 <- M1 <- 3
+    check <- w.y.check(w = w, y = y,
+                       Is.positive.y = FALSE,
+                       ncol.w.max = Inf,
+                       ncol.y.max = Inf,
+                       out.wy = TRUE,
+                       colsyperw = 1, 
+                       maximize = TRUE)
+    w <- check$w
+    y <- check$y
+    if ( .type.likelihood == "conditional")
+      w[1, ] <- 1.0e-6
+
+    
+    NOS <- ncoly <- ncol(y)
+    M <- M1*NOS
+    var.names <- param.names("var",     NOS)
+    sdv.names <- param.names("sd",      NOS)
+    smn.names <- param.names("drift", NOS)
+    rho.names <- param.names("rho",     NOS)
+
+    mynames1 <- smn.names
+    mynames2 <- if ( .var.arg ) var.names else sdv.names 
+    mynames3 <- rho.names
+
+    predictors.names <-
+      c(namesof(smn.names, .lsmn , earg = .esmn , tag = FALSE),
+        if ( .var.arg ) 
+        namesof(var.names, .lvar , earg = .evar , tag = FALSE) else
+        namesof(sdv.names, .lsdv , earg = .esdv , tag = FALSE),
+        namesof(rho.names, .lrho , earg = .erho , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
+
+
+    if (!length(etastart)) {
+      init.smn <- if (length( .ismn ))
+                    matrix( .ismn , n, NOS, byrow = TRUE) else
+                    (1 - .ishrinkage ) * y +
+                         .ishrinkage   * matrix(colMeans(y),
+                                                n, ncoly, byrow = TRUE)
+      init.rho <- matrix(if (length( .irho )) .irho else 0.05,
+                         n, NOS, byrow = TRUE)
+      init.sdv <- matrix(if (length( .isdv )) .isdv else 1.0,
+                         n, NOS, byrow = TRUE)
+      init.var <- matrix(if (length( .ivar )) .ivar else 1.0,
+                         n, NOS, byrow = TRUE)
+      if ( .imethod == 1 ) {
+        for (spp. in 1: NOS) {
+          mycor <- cor(y[-1, spp.], y[-n, spp.])
+          init.smn[-1, spp.] <- init.smn[-1, spp.] * (1 - mycor)
+          if (!length( .irho ))
+            init.rho[, spp.] <- sign(mycor) * min(0.95, abs(mycor))
+          if (!length( .ivar ))
+            init.var[, spp.] <- var(y[, spp.]) * (1 - mycor^2)
+          if (!length( .isdv ))
+            init.sdv[, spp.] <- sqrt(init.var[, spp.])
+        }
+      }  
+
+      etastart <-
+        cbind(theta2eta(init.smn, .lsmn , earg = .esmn ),
+              if ( .var.arg ) 
+              theta2eta(init.var, .lvar , earg = .evar ) else
+              theta2eta(init.sdv, .lsdv , earg = .esdv ),
+              theta2eta(init.rho, .lrho , earg = .erho ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1), drop = FALSE]
+    }  # end of etastart
+  }), list( .lsmn = lsmn, .lrho = lrho, .lsdv = lsdv, .lvar = lvar,
+            .esmn = esmn, .erho = erho, .esdv = esdv, .evar = evar,
+            .ismn = ismn, .irho = irho, .isdv = isd , .ivar = ivar,
+            .type.likelihood = type.likelihood, .ishrinkage = ishrinkage,
+            .var.arg = var.arg,
+            .imethod = imethod ))),
+  linkinv = eval(substitute(function(eta, extra = NULL) {
+    M1  <- 3
+    NOS <- ncol(eta)/M1
+    ar.smn <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                        .lsmn , earg = .esmn )
+    ar.rho <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                        .lrho , earg = .erho )
+    ar.smn / (1 - ar.rho)
+  }, list ( .lsmn = lsmn, .lrho = lrho , .lsdv = lsdv, .lvar = lvar ,
+            .var.arg = var.arg, .type.likelihood = type.likelihood,
+            .esmn = esmn, .erho = erho , .esdv = esdv, .evar = evar ))),
+  last = eval(substitute(expression({
+    if (any(abs(ar.rho) > 1)) 
+      warning("Regularity conditions are violated at the final",
+              "IRLS iteration, since 'abs(rho) > 1")
+
+    M1 <- extra$M1
+
+    temp.names <- c(mynames1, mynames2, mynames3)
+    temp.names <- temp.names[interleave.VGAM(M1 * ncoly, M = M1)]
+
+    misc$link <- rep( .lrho , length = M1 * ncoly)
+    misc$earg <- vector("list", M1 * ncoly)
+    names(misc$link) <-
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      misc$link[ M1*ii-2 ] <- .lsmn
+      misc$link[ M1*ii-1 ] <- if ( .var.arg ) .lvar else .lsdv
+      misc$link[ M1*ii   ] <- .lrho
+      misc$earg[[M1*ii-2]] <- .esmn
+      misc$earg[[M1*ii-1]] <- if ( .var.arg ) .evar else .esdv
+      misc$earg[[M1*ii  ]] <- .erho
+    }
+
+    misc$type.likelihood <- .type.likelihood
+    misc$var.arg <- .var.arg
+    misc$M1 <- M1
+    misc$expected <- TRUE
+    misc$imethod <- .imethod
+    misc$multipleResponses <- TRUE
+
+
+  }), list( .lsmn = lsmn, .lrho = lrho, .lsdv = lsdv, .lvar = lvar,
+            .esmn = esmn, .erho = erho, .esdv = esdv, .evar = evar,
+            .irho = irho, .isdv = isd , .ivar = ivar,
+            .var.arg = var.arg, .type.likelihood = type.likelihood,
+            .imethod = imethod ))),
+  loglikelihood = eval(substitute(
+    function(mu, y, w, residuals= FALSE, eta, 
+             extra = NULL, summation = TRUE) {
+      M1  <- 3
+      NOS <- ncol(eta)/M1
+
+      if ( .var.arg ) {
+        ar.var <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                            .lvar , earg = .evar )
+      } else {
+        ar.sdv <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                            .lsdv , earg = .esdv )
+        ar.var <- ar.sdv^2
+      }  
+      ar.smn <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                          .lsmn , earg = .esmn )
+      ar.rho <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lrho , earg = .erho )
+    
+      if (residuals) {
+        stop("Loglikelihood not implemented yet to handle",
+             "residuals.")
+      } else {
+        loglik.terms <- c(w) * dAR1(x = y,
+                                    drift = ar.smn,
+                                    var.error = ar.var,
+                                    type.likelihood = .type.likelihood ,
+                                    ARcoef1 = ar.rho, log = TRUE)
+        loglik.terms <- as.matrix(loglik.terms)
+        if (summation) {
+          sum(if ( .type.likelihood == "exact") loglik.terms else
+                   loglik.terms[-1, ])
+        } else {
+          loglik.terms
+        }
+      }
+          
+    }, list( .lsmn = lsmn, .lrho = lrho , .lsdv = lsdv, .lvar = lvar ,
+             .var.arg = var.arg, .type.likelihood = type.likelihood,
+             .esmn = esmn, .erho = erho , .esdv = esdv, .evar = evar ))),
+
+  vfamily = c("AR1"),
+
+
+  simslot = eval(substitute(
+  function(object, nsim) {
+
+      pwts <- if (length(pwts <- object at prior.weights) > 0)
+                pwts else weights(object, type = "prior")
+      if (any(pwts != 1))
+        warning("ignoring prior weights")
+      eta <- predict(object)
+      fva <- fitted(object)      
+      M1  <- 3
+      NOS <- ncol(eta)/M1
+
+      if ( .var.arg ) {
+        ar.var <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                            .lvar , earg = .evar )
+      } else {
+        ar.sdv <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                            .lsdv , earg = .esdv )
+        ar.var <- ar.sdv^2
+      }  
+      ar.smn <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                          .lsmn , earg = .esmn )
+      ar.rho <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                          .lrho , earg = .erho )
+
+      ans <- array(0, c(nrow(eta), NOS, nsim))
+    for (jay in 1:NOS) {
+      ans[1, jay, ] <- rnorm(nsim, m = fva[1, jay],  # zz
+                             sd = sqrt(ar.var[1, jay]))
+      for (ii in 2:nrow(eta))
+        ans[ii, jay, ] <- ar.smn[ii, jay] +
+                          ar.rho[ii, jay] * ans[ii-1, jay, ] +
+                          rnorm(nsim, sd = sqrt(ar.var[ii, jay]))
+    }
+    ans <- matrix(c(ans), c(nrow(eta) * NOS, nsim))
+    ans
+  }, list( .lsmn = lsmn, .lrho = lrho , .lsdv = lsdv, .lvar = lvar ,
+           .var.arg = var.arg, .type.likelihood = type.likelihood,
+           .esmn = esmn, .erho = erho , .esdv = esdv, .evar = evar ))),
+
+
+
+
+  deriv = eval(substitute(expression({
+    M1  <- 3
+    NOS <- ncol(eta)/M1
+    ncoly <- ncol(as.matrix(y))
+    
+    if ( .var.arg ) {
+      ar.var <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                          .lvar , earg = .evar )
+    } else {
+      ar.sdv <- eta2theta(eta[, M1*(1:NOS) - 1, drop = FALSE],
+                          .lsdv , earg = .esdv )
+      ar.var <- ar.sdv^2
+    }  
+    
+    ar.smn <- eta2theta(eta[, M1*(1:NOS) - 2, drop = FALSE],
+                        .lsmn , earg = .esmn )
+    ar.rho <- eta2theta(eta[, M1*(1:NOS)    , drop = FALSE],
+                        .lrho , earg = .erho )
+
+
+    temp1 <- y[-1, , drop = FALSE] -
+             ar.smn[-1, , drop = FALSE] -
+             y[-n, , drop = FALSE] * ar.rho[-1, , drop = FALSE]
+    temp5 <- 1 - ar.rho^2
+
+    dl.dsmn <- rbind(rep(0, length = ncoly),
+                     temp1 / ar.var[-1, , drop = FALSE])
+    if ( .var.arg ) {
+      dl.dvar <- rbind(rep(0, length = ncoly),
+                       0.5 * (temp1 / ar.var[-1, , drop = FALSE])^2 -
+                       0.5 / ar.var[-1, , drop = FALSE])
+    } else {
+      dl.dsdv <- rbind(rep(0, length = ncoly),
+                       temp1^2 / (ar.sdv[-1, , drop = FALSE])^3 -
+                       1 / ar.sdv[-1, , drop = FALSE])
+    }
+    dl.drho <- rbind(rep(0, length = ncoly),
+                     y[-n, , drop = FALSE] *
+                     temp1 / ar.var[-1, , drop = FALSE])
+    dl.dsmn[1, ] <- (y[1, ] - mu[1, ]) * (1 + ar.rho[1, ]) / ar.var[1, ]
+    dl.drho[1, ] <- ar.rho[1, ] * (y[1, ] - mu[1, ])^2 / ar.var[1, ] -
+                    ar.rho[1, ] / temp5[1, ]
+    if ( .var.arg ) {
+      dl.dvar[1, ] <- -0.5 / ar.var[1, ] +
+                       0.5 *  temp5[1, ] * ((y[1, ] - mu[1, ]) / ar.var[1, ])^2
+    } else {
+      dl.dsdv[1, ] <- -1 / ar.sdv[1, ] +
+                       temp5[1, ] * (y[1, ] - mu[1, ])^2 / (ar.sdv[1, ])^3
+    }
+
+
+
+        
+    dsmn.deta <- dtheta.deta(ar.smn, .lsmn , earg = .esmn )
+    drho.deta <- dtheta.deta(ar.rho, .lrho , earg = .erho )
+    if ( .var.arg ) {
+      dvar.deta <- dtheta.deta(ar.var, .lvar , earg = .evar )
+    } else {
+      dsdv.deta <- dtheta.deta(ar.sdv, .lsdv , earg = .esdv )
+    }
+    myderiv <-
+      c(w) * cbind(dl.dsmn * dsmn.deta,
+                   if ( .var.arg ) dl.dvar * dvar.deta else
+                                   dl.dsdv * dsdv.deta,
+                   dl.drho * drho.deta)
+                   
+    myderiv[, interleave.VGAM(M, M = M1)]
+  }), list( .lsmn = lsmn, .lrho = lrho, .lsdv = lsdv, .lvar = lvar,
+            .esmn = esmn, .erho = erho, .esdv = esdv, .evar = evar,
+            .var.arg = var.arg, .type.likelihood = type.likelihood ))),
+  weight = eval(substitute(expression({
+    if ( .var.arg ) {
+      ned2l.dvar    <- 0.5 / ar.var^2
+      ned2l.drhovar <-  matrix(0, n, ncoly)
+      ned2l.drhovar[1, ] <- .almost1 * ar.rho[1, ] / (ar.var[1, ] * temp5[1, ])
+    } else {
+      ned2l.dsdv    <- 2 / ar.var
+      ned2l.drhosdv <-  matrix(0, n, ncoly)
+      ned2l.drhosdv[1, ] <- 2 *
+                            .almost1 * ar.rho[1, ] / (ar.sdv[1, ] * temp5[1, ])
+    }
+    
+    ned2l.dsmn <- 1 / ar.var
+    ned2l.dsmn[1, ] <- (1 + ar.rho[1, ]) / ((1 - ar.rho[1, ]) * ar.var[1, ])
+
+    ned2l.dsmnrho <- mu / ar.var
+    ned2l.dsmnrho[1, ] <- 0
+
+    ned2l.drho <- ((    mu[-n, , drop = FALSE])^2 +
+                    ar.var[-n, , drop = FALSE] /
+                     temp5[-1, , drop = FALSE]) / ar.var[-1, , drop = FALSE]
+    ned2l.drho <- rbind(0, ned2l.drho)
+    ned2l.drho[1, ]  <- 2 * (ar.rho[1, ] / temp5[1, ])^2
+    
+    wz <- matrix(0, n, M + (M - 1) + (M - 2))
+    wz[, M1*(1:NOS) - 2] <- ned2l.dsmn * dsmn.deta^2
+    wz[, M1*(1:NOS) - 1] <-
+      if ( .var.arg ) ned2l.dvar * dvar.deta^2 else
+                      ned2l.dsdv * dsdv.deta^2
+    wz[, M1*(1:NOS)    ] <- ned2l.drho * drho.deta^2
+
+    wz[, M1*(1:NOS) + M + (M - 1) - M1 + 1] <- ned2l.dsmnrho *
+                                               dsmn.deta * drho.deta
+    wz[, M1*(1:NOS) + M - 1] <-
+      if ( .var.arg ) ned2l.drhovar * drho.deta * dvar.deta else
+                      ned2l.drhosdv * drho.deta * dsdv.deta
+
+
+    wz <- w.wz.merge(w = w, wz = wz, n = n, M = M + (M - 1) + (M - 2),
+                     ndepy = NOS)
+    wz
+  }), list( .var.arg = var.arg, .type.likelihood = type.likelihood,
+            .almost1 = almost1)))
+  )
+}  # End of function 'AR1'
+
+
+
+
+
+
+
+
+
+
+dAR1 <- function(x,
+                 drift = 0,  # Stationarity is the default
+                 var.error = 1, ARcoef1 = 0.0,
+                 type.likelihood = c("exact", "conditional"),
+                 log = FALSE) {
+
+  type.likelihood <- match.arg(type.likelihood,
+                           c("exact", "conditional"))[1]
+
+  is.vector.x <- is.vector(x)
+
+  x <- as.matrix(x)
+  drift <- as.matrix(drift)
+  var.error <- as.matrix(var.error)
+  ARcoef1 <- as.matrix(ARcoef1)
+  LLL <- max(nrow(x), nrow(drift), nrow(var.error), nrow(ARcoef1))
+  UUU <- max(ncol(x), ncol(drift), ncol(var.error), ncol(ARcoef1))
+  x          <- matrix(x,         LLL, UUU)
+  drift    <- matrix(drift,   LLL, UUU)
+  var.error  <- matrix(var.error, LLL, UUU)
+  rho        <- matrix(ARcoef1,   LLL, UUU)
+
+  if (any(abs(rho) > 1))
+    warning("Values of argument 'ARcoef1' are greater ",
+            "than 1 in absolute value")
+
+  if (!is.logical(log.arg <- log) || length(log) != 1) 
+    stop("Bad input for argument 'log'.")
+  rm(log)
+
+  ans <- matrix(0.0, LLL, UUU)
+  var.noise <- var.error / (1 - rho^2)
+
+  ans[ 1, ] <- dnorm(x    = x[1, ],
+                     mean = drift[ 1, ] / (1 - rho[1, ]), 
+                     sd   = sqrt(var.noise[1, ]), log = log.arg)
+  ans[-1, ] <- dnorm(x    = x[-1, ],
+                     mean = drift[-1, ] + rho[-1, ] * x[-nrow(x), ],
+                     sd   = sqrt(var.noise[-1, ]), log = log.arg)
+
+  if (type.likelihood == "conditional")
+    ans[1, ] <- NA
+  
+  if (is.vector.x) as.vector(ans) else ans
+}
+
+
+
+
+
+
diff --git a/R/family.univariate.R b/R/family.univariate.R
index fa02e3e..17596ed 100644
--- a/R/family.univariate.R
+++ b/R/family.univariate.R
@@ -711,7 +711,7 @@ rhzeta <- function(n, alpha) {
 
 
 
-dirmul.old <- function(link = "loge", init.alpha = 0.01,
+dirmul.old <- function(link = "loge", ialpha = 0.01,
                        parallel = FALSE, zero = NULL) {
 
   link <- as.list(substitute(link))
@@ -723,8 +723,8 @@ dirmul.old <- function(link = "loge", init.alpha = 0.01,
     !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
     stop("bad input for argument 'zero'")
 
-  if (!is.Numeric(init.alpha, positive = TRUE))
-    stop("'init.alpha' must contain positive values only")
+  if (!is.Numeric(ialpha, positive = TRUE))
+    stop("'ialpha' must contain positive values only")
 
 
   new("vglmff",
@@ -753,12 +753,12 @@ dirmul.old <- function(link = "loge", init.alpha = 0.01,
       extra$y  <- y
 
       if (!length(etastart)) {
-        yy <- if (is.numeric( .init.alpha))
-            matrix( .init.alpha, n, M, byrow = TRUE) else
+        yy <- if (is.numeric( .ialpha))
+            matrix( .ialpha , n, M, byrow = TRUE) else
             matrix(runif(n*M), n, M)
         etastart <- theta2eta(yy, .link , earg = .earg )
     }
-  }), list( .link = link, .earg = earg, .init.alpha = init.alpha ))),
+  }), list( .link = link, .earg = earg, .ialpha = ialpha ))),
   linkinv = eval(substitute(function(eta, extra = NULL) {
     shape <- eta2theta(eta, .link , earg = .earg )
     M <- if (is.matrix(eta)) ncol(eta) else 1
@@ -1175,7 +1175,7 @@ dzeta <- function(x, p, log = FALSE) {
          Q1 = 1,
          multipleResponses = TRUE,
          zero = .zero ,
-         link = .link)
+         link = .link )
   }, list( .link = link,
            .zero = zero ))),
   initialize = eval(substitute(expression({
@@ -4094,7 +4094,7 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
   function(lmu = "loge", lsize = "loge",
            imu = NULL,   isize = NULL,
            probs.y = 0.75,
-           nsimEIM = 250, cutoff.prob = 0.995, # Maxiter = 5000,
+           nsimEIM = 250, cutoff.prob = 0.995,  # Maxiter = 5000,
            max.qnbinom = 1000,
            max.chunk.Mb = 20,  # max.memory = Inf is allowed
            deviance.arg = FALSE, imethod = 1,
@@ -4154,9 +4154,7 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
                "greater than 10, say")
 
 
-    if (!is.logical( parallel ) || length( parallel ) != 1)
-      stop("argument 'parallel' must be TRUE or FALSE")
-    if ( parallel  && length(zero))
+    if (is.logical( parallel ) && parallel  && length(zero))
       stop("need to set 'zero = NULL' when parallel = TRUE")
 
 
@@ -4176,26 +4174,27 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
 
   constraints = eval(substitute(expression({
 
-    dotzero <- .zero
-    M1 <- 2
-    eval(negzero.expression.VGAM)
 
-    if ( .parallel && ncol(cbind(y)) > 1)
-      stop("univariate responses needed if 'parallel = TRUE'")
+
+
     constraints <- cm.VGAM(matrix(1, M, 1), x = x,
                            bool = .parallel , 
                            constraints = constraints)
+
+    dotzero <- .zero
+    M1 <- 2
+    eval(negzero.expression.VGAM)
   }), list( .parallel = parallel, .zero = zero ))),
 
 
 
   infos = eval(substitute(function(...) {
-    list(M1 = 2,
-         Q1 = 1,
+    list(M1    = 2,
+         Q1    = 1,
          multipleResponses = TRUE,
-         lmu = .lmuuu ,
+         lmu   = .lmuuu ,
          lsize = .lsize ,
-         zero = .zero )
+         zero  = .zero )
   }, list( .zero = zero, .lsize = lsize, .lmuuu = lmuuu ))),
 
 
@@ -4262,7 +4261,7 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
           medabsres <- median(abs(y[, jay] - use.this)) + 1/32
           allowfun <- function(z, maxtol = 1) sign(z) * pmin(abs(z), maxtol)
           mu.init[, jay] <- use.this + (1 - .ishrinkage ) *
-                           allowfun(y[, jay] - use.this, maxtol = medabsres)
+                            allowfun(y[, jay] - use.this, maxtol = medabsres)
 
           mu.init[, jay] <- abs(mu.init[, jay]) + 1 / 1024
         }
@@ -4272,8 +4271,8 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
         kay.init <- matrix( .k.init , nrow = n, ncol = NOS, byrow = TRUE)
       } else {
         negbinomial.Loglikfun <- function(kmat, y, x, w, extraargs) {
-            mu <- extraargs
-            sum(c(w) * dnbinom(x = y, mu = mu, size = kmat, log = TRUE))
+          mu <- extraargs
+          sum(c(w) * dnbinom(x = y, mu = mu, size = kmat, log = TRUE))
         }
         k.grid <- .gsize
         kay.init <- matrix(0, nrow = n, ncol = NOS)
@@ -4285,8 +4284,6 @@ negbinomial.control <- function(save.weights = TRUE, ...) {
         }
       }
 
-
-
     newemu <- .emuuu
     if ( .lmuuu == "nbcanlink") {
       newemu$size <- kay.init
@@ -5248,7 +5245,8 @@ polyaR.control <- function(save.weights = TRUE, ...) {
         } else {
           medabsres <- median(abs(y[, iii] - use.this)) + 1/32
           allowfun <- function(z, maxtol = 1) sign(z) * pmin(abs(z), maxtol)
-          mu.init[, iii] <- use.this + (1 - .ishrinkage ) * allowfun(y[, iii] -
+          mu.init[, iii] <- use.this +
+                            (1 - .ishrinkage ) * allowfun(y[, iii] -
                           use.this, maxtol = medabsres)
 
           mu.init[, iii] <- abs(mu.init[, iii]) + 1 / 1024
@@ -5276,12 +5274,12 @@ polyaR.control <- function(save.weights = TRUE, ...) {
       }
 
       prob.init <- if (length(PROB.INIT)) PROB.INIT else
-                  kayy.init / (kayy.init + mu.init)
+                   kayy.init / (kayy.init + mu.init)
 
 
       etastart <-
-        cbind(theta2eta(kayy.init, .lsize , earg = .esize),
-              theta2eta(prob.init, .lprob , earg = .eprob))
+        cbind(theta2eta(kayy.init, .lsize , earg = .esize ),
+              theta2eta(prob.init, .lprob , earg = .eprob ))
               
       etastart <-
         etastart[, interleave.VGAM(M, M = M1), drop = FALSE]
@@ -7260,11 +7258,47 @@ rsimplex <- function(n, mu = 0.5, dispersion = 1) {
 
 
 
- genpoisson <- function(llambda = extlogit(min = -1, max = 1),
+
+dgenpois <- function(x, lambda = 0, theta, log = FALSE) {
+  if (!is.logical(log.arg <- log) || length(log) != 1)
+    stop("bad input for argument 'log'")
+  rm(log)
+
+  LLL <- max(length(x), length(lambda), length(theta))
+  if (length(x)      != LLL) x      <- rep(x,      len = LLL)
+  if (length(lambda) != LLL) lambda <- rep(lambda, len = LLL)
+  if (length(theta)  != LLL) theta  <- rep(theta,  len = LLL)
+
+  llans <- -x*lambda - theta + (x-1) * log(theta + x*lambda) +
+           log(theta) - lgamma(x+1)
+  llans[x < 0] <- log(0)
+  llans[x != round(x)] <- log(0)  # x should be integer-valued
+  llans[lambda > 1] <- NaN
+  if (any(ind1 <- (lambda < 0))) {
+    epsilon <- 1.0e-9  # Needed to handle a "<" rather than a "<=".
+    mmm <- pmax(4, floor(theta/abs(lambda) - epsilon))
+    llans[ind1 & mmm < pmax(-1, -theta/mmm)] <- NaN
+    llans[ind1 & mmm < x] <- log(0)  # probability 0, not NaN
+  }
+  if (log.arg) {
+    llans
+  } else {
+    exp(llans)
+  }
+}
+
+
+
+
+
+ genpoisson <- function(llambda = "rhobit",
                         ltheta = "loge",
                         ilambda = NULL, itheta = NULL,
                         use.approx = TRUE,
-                        imethod = 1, zero = 1) {
+                        imethod = 1,
+                        ishrinkage = 0.95,
+                        zero = -1) {
+
 
 
   llambda <- as.list(substitute(llambda))
@@ -7276,15 +7310,16 @@ rsimplex <- function(n, mu = 0.5, dispersion = 1) {
   ltheta <- attr(etheta, "function.name")
 
 
-  if (length(zero) &&
-      !is.Numeric(zero, integer.valued = TRUE, positive = TRUE))
-    stop("bad input for argument 'zero'")
+  if (!is.Numeric(ishrinkage, length.arg = 1) ||
+     ishrinkage < 0 ||
+     ishrinkage > 1)
+    stop("bad input for argument 'ishrinkage'")
 
 
   if (!is.Numeric(imethod, length.arg = 1,
                   integer.valued = TRUE, positive = TRUE) ||
-     imethod > 2)
-    stop("argument 'imethod' must be 1 or 2")
+     imethod > 3)
+    stop("argument 'imethod' must be 1 or 2 or 3")
 
   if (!is.logical(use.approx) || length(use.approx) != 1)
     stop("'use.approx' must be logical value")
@@ -7296,82 +7331,131 @@ rsimplex <- function(n, mu = 0.5, dispersion = 1) {
   blurb = c("Generalized Poisson distribution\n\n",
             "Links:    ",
             namesof("lambda", llambda, earg = elambda), ", ", 
-            namesof("theta", ltheta, earg = etheta), "\n", 
+            namesof("theta",  ltheta,  earg = etheta ), "\n", 
             "Mean:     theta / (1-lambda)\n",
             "Variance: theta / (1-lambda)^3"),
-  constraints = eval(substitute(expression({
-    constraints <- cm.zero.VGAM(constraints, x, .zero , M)
+ constraints = eval(substitute(expression({
+
+    M1 <- 2
+    dotzero <- .zero
+    eval(negzero.expression.VGAM)
   }), list( .zero = zero ))),
-  initialize = eval(substitute(expression({
 
+  infos = eval(substitute(function(...) {
+    list(M1 = 2,
+         Q1 = 1,
+         multipleResponses = TRUE,
+         imethod = .imethod ,
+         zero = .zero )
+  }, list( .zero = zero,
+           .imethod = imethod ))),
+
+  initialize = eval(substitute(expression({
+    temp5 <-
     w.y.check(w = w, y = y,
-              ncol.w.max = 1,
-              ncol.y.max = 1)
+              ncol.w.max = Inf,  # 1,
+              ncol.y.max = Inf,  # 1,
+              Is.integer.y = TRUE,
+              out.wy = TRUE,
+              colsyperw = 1,
+              maximize = TRUE)
+    w <- temp5$w
+    y <- temp5$y
 
+    extra$ncoly <- ncoly <- NOS <- ncol(y)
+    extra$M1 <- M1 <- 2
+    M <- M1 * ncoly
+    mynames1 <- param.names("lambda", NOS)
+    mynames2 <- param.names("theta",  NOS)
 
+    predictors.names <-
+       c(namesof(mynames1, .llambda , earg = .elambda , tag = FALSE),
+         namesof(mynames2, .ltheta  , earg = .etheta  , tag = FALSE))
+    predictors.names <- predictors.names[interleave.VGAM(M, M = M1)]
 
+    init.lambda <- init.theta <- matrix(0, n, NOS)
+    for (spp. in 1: NOS) {
+      init.lambda[, spp.] <- if ( .imethod == 1) {
+        min(max(0.05,
+                1 - sqrt(weighted.mean(y[, spp.],
+                                       w[, spp.]) / var(y[, spp.]))),
+            0.95)
+      } else if ( .imethod == 2) {
+        runif(n, max = 0.1)
+      } else {
+        runif(n, max = 0.7)
+      }
 
+      init.theta[, spp.]  <- if ( .imethod == 2) {
+        (y[, spp.] + weighted.mean(y[, spp.], w[, spp.])) / 2
+      } else if ( .imethod == 3) {
+        (y[, spp.] + median(y[, spp.])) / 2
+      } else {
+        (1 - .ishrinkage ) * y[, spp.] +
+             .ishrinkage   * weighted.mean(y[, spp.], w[, spp.])
+      }
+    }
 
-    predictors.names <-
-       c(namesof("lambda", .llambda , earg = .elambda , tag = FALSE),
-         namesof("theta",  .ltheta ,  earg = .etheta,  tag = FALSE))
-    init.lambda <- if ( .imethod == 1)
-      1 - sqrt(weighted.mean(y, w) / var(y)) else 0.5
-    init.theta  <- if ( .imethod == 1)
-      sqrt((0.01 + weighted.mean(y, w)^3) / var(y)) else
-      median(y) * (1-init.lambda)
-    if (init.theta <= 0)
-      init.theta <- 0.1
-    cutpt <- if (init.lambda < 0) {
-      mmm <- max(trunc(-init.theta / init.lambda), 4)
-      max(-1, -init.theta /mmm)
-    } else -1
-    if (init.lambda <= cutpt)
-      init.lambda <- cutpt + 0.1
-    if (init.lambda >= 1)
-      init.lambda <- 0.9
     if (!length(etastart)) {
-      lambda <- rep(if (length( .ilambda)) .ilambda else
-                   init.lambda, length = n)
-      theta <- rep(if (length( .itheta)) .itheta else init.theta ,
-                  length = n)
+      init.lambda <- if (length( .ilambda ))
+                       matrix( .ilambda , n, NOS, byrow = TRUE) else
+                       init.lambda
+      init.theta  <- if (length( .itheta ))
+                       matrix( .itheta  , n, NOS, byrow = TRUE) else
+                       init.theta
       etastart <-
-        cbind(theta2eta(lambda, .llambda , earg = .elambda ),
-              theta2eta(theta,  .ltheta ,  earg = .etheta ))
+        cbind(theta2eta(init.lambda, .llambda , earg = .elambda ),
+              theta2eta(init.theta,  .ltheta  , earg = .etheta  ))
+      etastart <- etastart[, interleave.VGAM(M, M = M1), drop = FALSE]
     }
   }), list( .ltheta = ltheta, .llambda = llambda,
             .etheta = etheta, .elambda = elambda,
-            .imethod = imethod,
+            .imethod = imethod, .ishrinkage = ishrinkage,
             .itheta = itheta, .ilambda = ilambda )) ),
   linkinv = eval(substitute(function(eta, extra = NULL) {
-    lambda <- eta2theta(eta[, 1], .llambda , earg = .elambda )
-    theta  <- eta2theta(eta[, 2], .ltheta , earg = .etheta )
+    lambda <- eta2theta(eta[, c(TRUE, FALSE)], .llambda , earg = .elambda )
+    theta  <- eta2theta(eta[, c(FALSE, TRUE)], .ltheta  , earg = .etheta  )
     theta / (1 - lambda)
   }, list( .ltheta = ltheta, .llambda = llambda,
            .etheta = etheta, .elambda = elambda ))),
   last = eval(substitute(expression({
-    misc$link <-    c(lambda = .llambda , theta = .ltheta )
-    misc$earg <- list(lambda = .elambda , theta = .etheta )
-    if (! .use.approx )
-      misc$pooled.weight <- pooled.weight
+    M1 <- extra$M1
+
+    temp.names <- c(mynames1, mynames2)
+    temp.names <- temp.names[interleave.VGAM(M1 * ncoly, M = M1)]
+
+    misc$link <- rep( .llambda , length = M1 * ncoly)
+    misc$earg <- vector("list", M1 * ncoly)
+    names(misc$link) <-
+    names(misc$earg) <- temp.names
+    for (ii in 1:ncoly) {
+      misc$link[ M1*ii-1 ] <- .llambda
+      misc$link[ M1*ii   ] <- .ltheta
+      misc$earg[[M1*ii-1]] <- .elambda
+      misc$earg[[M1*ii  ]] <- .etheta
+    }
+
+    misc$M1 <- M1
+    misc$expected <- TRUE
+    misc$imethod <- .imethod
+    misc$multipleResponses <- TRUE
+
+      misc$use.approx <- .use.approx
   }), list( .ltheta = ltheta, .llambda = llambda,
-            .use.approx = use.approx,
+            .use.approx = use.approx, .imethod = imethod,
             .etheta = etheta, .elambda = elambda ))),
   loglikelihood = eval(substitute(
     function(mu, y, w, residuals = FALSE, eta,
              extra = NULL,
              summation = TRUE) {
-    lambda <- eta2theta(eta[, 1], .llambda , earg = .elambda )
-    theta  <- eta2theta(eta[, 2], .ltheta  , earg = .etheta  )
+    lambda <- eta2theta(eta[, c(TRUE, FALSE)], .llambda , earg = .elambda )
+    theta  <- eta2theta(eta[, c(FALSE, TRUE)], .ltheta  , earg = .etheta  )
     index <- (y == 0)
     if (residuals) {
       stop("loglikelihood residuals not implemented yet")
     } else {
-      ll.elts <-
-        (w[index] * (-theta[index])) +
-        (w[!index] * (-y[!index]*lambda[!index]-theta[!index] +
-         (y[!index]-1)*log(theta[!index]+y[!index]*lambda[!index]) +
-         log(theta[!index]) - lgamma(y[!index]+1)) )
+      ll.elts <- dgenpois(x = y, lambda = lambda, theta = theta,
+                          log = TRUE)
       if (summation) {
         sum(ll.elts)
       } else {
@@ -7382,50 +7466,50 @@ rsimplex <- function(n, mu = 0.5, dispersion = 1) {
            .etheta = etheta, .elambda = elambda ))),
   vfamily = c("genpoisson"),
   deriv = eval(substitute(expression({
-    lambda <- eta2theta(eta[, 1], .llambda , earg = .elambda )
-    theta <- eta2theta(eta[, 2], .ltheta , earg = .etheta )
-    dl.dlambda <- -y + y*(y-1)/(theta+y*lambda)
-    dl.dtheta <- -1 + (y-1)/(theta+y*lambda) + 1/theta
-    dTHETA.deta <- dtheta.deta(theta, .ltheta , earg = .etheta )
+    M1  <- 2
+    NOS <- ncol(eta)/M1
+
+    lambda <- eta2theta(eta[, c(TRUE, FALSE)], .llambda , earg = .elambda )
+    theta  <- eta2theta(eta[, c(FALSE, TRUE)], .ltheta  , earg = .etheta  )
+    dl.dlambda <- -y + y*(y-1) / (theta+y*lambda)
+    dl.dtheta  <- -1 +   (y-1) / (theta+y*lambda) + 1/theta
+    dTHETA.deta  <- dtheta.deta(theta,  .ltheta  , earg = .etheta  )
     dlambda.deta <- dtheta.deta(lambda, .llambda , earg = .elambda )
-    c(w) * cbind(dl.dlambda * dlambda.deta,
-                 dl.dtheta * dTHETA.deta )
+    myderiv <- c(w) * cbind(dl.dlambda * dlambda.deta,
+                            dl.dtheta  * dTHETA.deta )
+    myderiv[, interleave.VGAM(M, M = M1)]
   }), list( .ltheta = ltheta, .llambda = llambda,
             .etheta = etheta, .elambda = elambda ))),
   weight = eval(substitute(expression({
-    wz <- matrix(as.numeric(NA), n, dimm(M))  #3=dimm(M)
+    wz <- matrix(0, n, M + M-1)  # Tridiagonal
+
     if ( .use.approx ) {
-        BBB <- (theta+2)*(theta+2*lambda-theta*lambda)-(theta^2)*(1-lambda)
-        d2l.dlambda2 <- 2 * theta * (theta+2) / ((1-lambda) * BBB)
-        d2l.dtheta2 <- 2 * (1 + lambda * (2/theta - 1)) / BBB
-        d2l.dthetalambda <-  2 * theta / BBB
-        wz[, iam(1, 1, M)] <- d2l.dlambda2 * dlambda.deta^2
-        wz[, iam(2, 2, M)] <- d2l.dtheta2 * dTHETA.deta^2
-        wz[, iam(1, 2, M)] <- d2l.dthetalambda * dTHETA.deta * dlambda.deta
-        wz <- c(w) * wz
+      BBB <- (theta+2)*(theta+2*lambda-theta*lambda)-(theta^2)*(1-lambda)
+      d2l.dlambda2 <- 2 * theta * (theta+2) / ((1-lambda) * BBB)
+      d2l.dtheta2 <- 2 * (1 + lambda * (2/theta - 1)) / BBB
+      d2l.dthetalambda <-  2 * theta / BBB
+      wz[, M1*(1:NOS) - 1    ] <- d2l.dlambda2 * dlambda.deta^2
+      wz[, M1*(1:NOS)        ] <- d2l.dtheta2  * dTHETA.deta^2
+      wz[, M1*(1:NOS) + M - 1] <- d2l.dthetalambda * dTHETA.deta  *
+                                                     dlambda.deta
     } else {
-        d2l.dlambda2 <- -y^2 * (y-1) / (theta+y*lambda)^2
-        d2l.dtheta2 <- -(y-1)/(theta+y*lambda)^2 - 1 / theta^2
-        d2l.dthetalambda <-  -y * (y-1) / (theta+y*lambda)^2 
-        wz[, iam(1, 1, M)] <- -d2l.dlambda2 * dlambda.deta^2
-        wz[, iam(2, 2, M)] <- -d2l.dtheta2 * dTHETA.deta^2
-        wz[, iam(1, 2, M)] <- -d2l.dthetalambda * dTHETA.deta * dlambda.deta
-
-        d2THETA.deta2 <- d2theta.deta2(theta, .ltheta , earg = .etheta )
-        d2lambdadeta2 <- d2theta.deta2(lambda, .llambda , earg = .elambda )
-        wz[, iam(1, 1, M)] <- wz[, iam(1, 1, M)] - dl.dlambda * d2lambdadeta2
-        wz[, iam(2, 2, M)] <- wz[, iam(2, 2, M)] - dl.dtheta * d2THETA.deta2
-        wz <- c(w) * wz
-
-        if (intercept.only) {
-          sumw <- sum(w)
-          for (ii in 1:ncol(wz))
-            wz[, ii] <- sum(wz[, ii]) / sumw
-          pooled.weight <- TRUE
-          wz <- c(w) * wz   # Put back the weights
-        } else
-          pooled.weight <- FALSE
-        }
+      d2l.dlambda2 <- -y^2 * (y-1) / (theta+y*lambda)^2
+      d2l.dtheta2 <- -(y-1)/(theta+y*lambda)^2 - 1 / theta^2
+      d2l.dthetalambda <-  -y * (y-1) / (theta+y*lambda)^2 
+      wz[, M1*(1:NOS) - 1    ] <- -d2l.dlambda2 * dlambda.deta^2
+      wz[, M1*(1:NOS)        ] <- -d2l.dtheta2 * dTHETA.deta^2
+      wz[, M1*(1:NOS) + M - 1] <- -d2l.dthetalambda * dTHETA.deta * dlambda.deta
+
+      d2THETA.deta2 <- d2theta.deta2(theta,  .ltheta  , earg = .etheta  )
+      d2lambdadeta2 <- d2theta.deta2(lambda, .llambda , earg = .elambda )
+      wz[, M1*(1:NOS) - 1    ] <-
+      wz[, M1*(1:NOS) - 1    ] - dl.dlambda * d2lambdadeta2
+      wz[, M1*(1:NOS)        ] <-
+      wz[, M1*(1:NOS)        ] - dl.dtheta * d2THETA.deta2
+    }
+
+    wz <- w.wz.merge(w = w, wz = wz, n = n, M = M + (M - 1),
+                     ndepy = NOS)
     wz
   }), list( .ltheta = ltheta, .llambda = llambda,
             .use.approx = use.approx,
diff --git a/R/family.zeroinf.R b/R/family.zeroinf.R
index dc6b77f..e03dcd9 100644
--- a/R/family.zeroinf.R
+++ b/R/family.zeroinf.R
@@ -1949,44 +1949,6 @@ zanegbinomialff.control <- function(save.weights = TRUE, ...) {
 
 
 
- if (FALSE)
-rposnegbin <- function(n, munb, size) {
-  if (!is.Numeric(size, positive = TRUE))
-    stop("argument 'size' must be positive")
-  if (!is.Numeric(munb, positive = TRUE))
-    stop("argument 'munb' must be positive")
-  if (!is.Numeric(n, positive = TRUE, integer.valued = TRUE,
-                  length.arg = 1))
-    stop("argument 'n' must be a positive integer")
-  ans <- rnbinom(n = n, mu = munb, size = size)
-  munb <- rep(munb, length = n)
-  size <- rep(size, length = n)
-  index <- ans == 0
-  while (any(index)) {
-    more <- rnbinom(n = sum(index), mu = munb[index], size = size[index])
-    ans[index] <- more
-    index <- ans == 0
-  }
-  ans
-}
-
- if (FALSE)
-dposnegbin <- function(x, munb, size, log = FALSE) {
-  if (!is.Numeric(size, positive = TRUE))
-    stop("argument 'size' must be positive")
-  if (!is.Numeric(munb, positive = TRUE))
-    stop("argument 'munb' must be positive")
-  ans <- dnbinom(x = x, mu = munb, size = size, log=log)
-  ans0 <- dnbinom(x=0, mu = munb, size = size, log = FALSE)
-  ans <- if (log) ans - log1p(-ans0) else ans/(1-ans0)
-  ans[x == 0] <- if (log) -Inf else 0
-  ans
-}
-
-
-
-
-
 
 
 
diff --git a/R/vgam.R b/R/vgam.R
index 142069b..353a6d6 100644
--- a/R/vgam.R
+++ b/R/vgam.R
@@ -380,3 +380,54 @@ shadowvgam <-
 
 
 
+
+
+is.buggy.vlm <- function(object, each.term = FALSE, ...) {
+
+
+    
+  Hk.list <- constraints(object)
+  ncl <- names(Hk.list)
+  TFvec <- rep(FALSE, length = length(ncl))
+  names(TFvec) <- ncl
+
+
+
+  if (!is(object, "vgam")) {
+    return(if (each.term) TFvec else any(TFvec))
+  }
+  if (!length(object at nl.chisq)) {
+    return(if (each.term) TFvec else any(TFvec))
+  }
+
+  for (kay in 1:length(ncl)) {
+    cmat <- Hk.list[[kay]]
+    if (ncol(cmat) > 1 && substring(ncl[kay], 1, 2) == "s(") {
+      CMat <- crossprod(cmat)  # t(cmat) %*% cmat
+      TFvec[kay] <- any(CMat[lower.tri(CMat)] != 0 |
+                        CMat[upper.tri(CMat)] != 0)
+    }
+  }
+  if (each.term) TFvec else any(TFvec)
+}
+
+
+
+if (!isGeneric("is.buggy"))
+  setGeneric("is.buggy", function(object, ...)
+             standardGeneric("is.buggy"),
+             package = "VGAM")
+
+
+
+setMethod("is.buggy", signature(object = "vlm"),
+          function(object, ...)
+          is.buggy.vlm(object, ...))
+
+
+
+
+
+
+
+
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..aed19ee
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/data/Huggins89.t1.rda b/data/Huggins89.t1.rda
index 9518e99..08db65e 100644
Binary files a/data/Huggins89.t1.rda and b/data/Huggins89.t1.rda differ
diff --git a/data/Huggins89table1.rda b/data/Huggins89table1.rda
index ee81ec0..5e8462f 100644
Binary files a/data/Huggins89table1.rda and b/data/Huggins89table1.rda differ
diff --git a/data/alclevels.rda b/data/alclevels.rda
index b3c90f1..ad1edbc 100644
Binary files a/data/alclevels.rda and b/data/alclevels.rda differ
diff --git a/data/alcoff.rda b/data/alcoff.rda
index 6b90a83..4fced4c 100644
Binary files a/data/alcoff.rda and b/data/alcoff.rda differ
diff --git a/data/auuc.rda b/data/auuc.rda
index b483f23..0e31d6a 100644
Binary files a/data/auuc.rda and b/data/auuc.rda differ
diff --git a/data/backPain.rda b/data/backPain.rda
index 8d3d08a..efece85 100644
Binary files a/data/backPain.rda and b/data/backPain.rda differ
diff --git a/data/beggs.rda b/data/beggs.rda
index 201e207..8c11c3d 100644
Binary files a/data/beggs.rda and b/data/beggs.rda differ
diff --git a/data/car.all.rda b/data/car.all.rda
index dc42cb4..302d432 100644
Binary files a/data/car.all.rda and b/data/car.all.rda differ
diff --git a/data/cfibrosis.rda b/data/cfibrosis.rda
index ceffc54..a99d26c 100644
Binary files a/data/cfibrosis.rda and b/data/cfibrosis.rda differ
diff --git a/data/corbet.rda b/data/corbet.rda
index 735b925..c590311 100644
Binary files a/data/corbet.rda and b/data/corbet.rda differ
diff --git a/data/crashbc.rda b/data/crashbc.rda
index 2561fe8..8cc43af 100644
Binary files a/data/crashbc.rda and b/data/crashbc.rda differ
diff --git a/data/crashf.rda b/data/crashf.rda
index 033a40d..6da8d17 100644
Binary files a/data/crashf.rda and b/data/crashf.rda differ
diff --git a/data/crashi.rda b/data/crashi.rda
index 1b1491b..38f5bbe 100644
Binary files a/data/crashi.rda and b/data/crashi.rda differ
diff --git a/data/crashmc.rda b/data/crashmc.rda
index 43bd5c8..3ff885e 100644
Binary files a/data/crashmc.rda and b/data/crashmc.rda differ
diff --git a/data/crashp.rda b/data/crashp.rda
index 8e13f89..3c750ec 100644
Binary files a/data/crashp.rda and b/data/crashp.rda differ
diff --git a/data/crashtr.rda b/data/crashtr.rda
index 09e18c2..6e25c0f 100644
Binary files a/data/crashtr.rda and b/data/crashtr.rda differ
diff --git a/data/deermice.rda b/data/deermice.rda
index 896593c..148801f 100644
Binary files a/data/deermice.rda and b/data/deermice.rda differ
diff --git a/data/ducklings.rda b/data/ducklings.rda
index eb2ec67..7c20f4b 100644
Binary files a/data/ducklings.rda and b/data/ducklings.rda differ
diff --git a/data/finney44.rda b/data/finney44.rda
index 229e54c..9444e49 100644
Binary files a/data/finney44.rda and b/data/finney44.rda differ
diff --git a/data/flourbeetle.rda b/data/flourbeetle.rda
index 81775d3..2651903 100644
Binary files a/data/flourbeetle.rda and b/data/flourbeetle.rda differ
diff --git a/data/hspider.rda b/data/hspider.rda
index 42bab6c..b8ccf81 100644
Binary files a/data/hspider.rda and b/data/hspider.rda differ
diff --git a/data/lakeO.rda b/data/lakeO.rda
index d40ec1c..6d9b8a6 100644
Binary files a/data/lakeO.rda and b/data/lakeO.rda differ
diff --git a/data/leukemia.rda b/data/leukemia.rda
index 74667fa..a098722 100644
Binary files a/data/leukemia.rda and b/data/leukemia.rda differ
diff --git a/data/marital.nz.rda b/data/marital.nz.rda
index 852416b..568bab1 100644
Binary files a/data/marital.nz.rda and b/data/marital.nz.rda differ
diff --git a/data/melbmaxtemp.rda b/data/melbmaxtemp.rda
index c55b82b..e7306a2 100644
Binary files a/data/melbmaxtemp.rda and b/data/melbmaxtemp.rda differ
diff --git a/data/pneumo.rda b/data/pneumo.rda
index 6deb34e..9fafa1d 100644
Binary files a/data/pneumo.rda and b/data/pneumo.rda differ
diff --git a/data/prinia.rda b/data/prinia.rda
index f36e640..94afdad 100644
Binary files a/data/prinia.rda and b/data/prinia.rda differ
diff --git a/data/ruge.rda b/data/ruge.rda
index da3d073..ee3472e 100644
Binary files a/data/ruge.rda and b/data/ruge.rda differ
diff --git a/data/toxop.rda b/data/toxop.rda
index 48482bf..f75c1ed 100644
Binary files a/data/toxop.rda and b/data/toxop.rda differ
diff --git a/data/venice.rda b/data/venice.rda
index e82fa48..d8e3738 100644
Binary files a/data/venice.rda and b/data/venice.rda differ
diff --git a/data/venice90.rda b/data/venice90.rda
index 15452b9..ca21185 100644
Binary files a/data/venice90.rda and b/data/venice90.rda differ
diff --git a/data/wine.rda b/data/wine.rda
index 608584d..b95b5bf 100644
Binary files a/data/wine.rda and b/data/wine.rda differ
diff --git a/inst/CITATION b/inst/CITATION
index 56af1a0..c9a82b0 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -80,4 +80,27 @@ citEntry(entry = "Manual",
 
 
 
+citEntry(entry = "Article",
+  title        = "The {VGAM} Package for Capture-Recapture Data Using the Conditional Likelihood",
+  author       = personList(as.person("Thomas W. Yee"),
+                   as.person("Jakub Stoklosa"),
+                   as.person("Richard M. Huggins")),
+  journal      = "Journal of Statistical Software",
+  year         = "2015",
+  volume       = "65",
+  number       = "5",
+  pages        = "1--33",
+  url          = "http://www.jstatsoft.org/v65/i05/",
+
+  textVersion  =
+  paste("Thomas W. Yee, Jakub Stoklosa, Richard M. Huggins (2015).",
+        "The VGAM Package for Capture-Recapture Data Using the Conditional Likelihood.",
+        "Journal of Statistical Software, 65(5), 1-33.",
+        "URL http://www.jstatsoft.org/v65/i05/.")
+)
+
+
+
+
+
 
diff --git a/inst/doc/categoricalVGAM.R b/inst/doc/categoricalVGAM.R
new file mode 100644
index 0000000..badcc3c
--- /dev/null
+++ b/inst/doc/categoricalVGAM.R
@@ -0,0 +1,278 @@
+### R code from vignette source 'categoricalVGAM.Rnw'
+
+###################################################
+### code chunk number 1: categoricalVGAM.Rnw:84-90
+###################################################
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+
+
+###################################################
+### code chunk number 2: pneumocat
+###################################################
+pneumo <- transform(pneumo, let = log(exposure.time))
+fit <- vgam(cbind(normal, mild, severe) ~ s(let, df = 2),
+            cumulative(reverse = TRUE, parallel = TRUE), data = pneumo)
+
+
+###################################################
+### code chunk number 3: categoricalVGAM.Rnw:903-907
+###################################################
+journal <- c("Biometrika", "Comm.Statist", "JASA", "JRSS-B")
+squaremat <- matrix(c(NA, 33, 320, 284,   730, NA, 813, 276,
+                      498, 68, NA, 325,   221, 17, 142, NA), 4, 4)
+dimnames(squaremat) <- list(winner = journal, loser = journal)
+
+
+###################################################
+### code chunk number 4: categoricalVGAM.Rnw:1007-1011
+###################################################
+abodat <- data.frame(A = 725, B = 258, AB = 72, O = 1073)
+fit <- vglm(cbind(A, B, AB, O) ~ 1, ABO, data = abodat)
+coef(fit, matrix = TRUE)
+Coef(fit)  # Estimated pA and pB
+
+
+###################################################
+### code chunk number 5: categoricalVGAM.Rnw:1289-1291
+###################################################
+head(marital.nz, 4)
+summary(marital.nz)
+
+
+###################################################
+### code chunk number 6: categoricalVGAM.Rnw:1294-1296
+###################################################
+fit.ms <- vgam(mstatus ~ s(age, df = 3), multinomial(refLevel = 2),
+               data = marital.nz)
+
+
+###################################################
+### code chunk number 7: categoricalVGAM.Rnw:1300-1302
+###################################################
+head(depvar(fit.ms), 4)
+colSums(depvar(fit.ms))
+
+
+###################################################
+### code chunk number 8: categoricalVGAM.Rnw:1311-1323
+###################################################
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+par(mfrow = c(2, 2))
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlayed
+#par(mfrow=c(1,1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+
+
+###################################################
+### code chunk number 9: categoricalVGAM.Rnw:1366-1379
+###################################################
+getOption("SweaveHooks")[["fig"]]()
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+ par(mfrow = c(2, 2))
+ par(mar = c(4.2, 4.0, 1.2, 2.2) + 0.1)
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlaid
+#par(mfrow = c(1, 1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+
+
+###################################################
+### code chunk number 10: categoricalVGAM.Rnw:1399-1400
+###################################################
+plot(fit.ms, deriv=1, lcol=mycol, scale=0.3)
+
+
+###################################################
+### code chunk number 11: categoricalVGAM.Rnw:1409-1413
+###################################################
+getOption("SweaveHooks")[["fig"]]()
+# Plot output
+ par(mfrow = c(1, 3))
+ par(mar = c(4.5, 4.0, 0.2, 2.2) + 0.1)
+plot(fit.ms, deriv = 1, lcol = mycol, scale = 0.3)
+
+
+###################################################
+### code chunk number 12: categoricalVGAM.Rnw:1436-1448
+###################################################
+foo <- function(x, elbow = 50)
+  poly(pmin(x, elbow), 2)
+
+clist <- list("(Intercept)" = diag(3),
+             "poly(age, 2)" = rbind(1, 0, 0),
+             "foo(age)"     = rbind(0, 1, 0),
+             "age"          = rbind(0, 0, 1))
+fit2.ms <-
+    vglm(mstatus ~ poly(age, 2) + foo(age) + age,
+         family = multinomial(refLevel = 2),
+         constraints = clist,
+         data = marital.nz)
+
+
+###################################################
+### code chunk number 13: categoricalVGAM.Rnw:1451-1452
+###################################################
+coef(fit2.ms, matrix = TRUE)
+
+
+###################################################
+### code chunk number 14: categoricalVGAM.Rnw:1456-1463
+###################################################
+par(mfrow = c(2, 2))
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol=mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+
+
+###################################################
+### code chunk number 15: categoricalVGAM.Rnw:1474-1483
+###################################################
+getOption("SweaveHooks")[["fig"]]()
+# Plot output
+par(mfrow=c(2,2))
+ par(mar=c(4.5,4.0,1.2,2.2)+0.1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol = mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+
+
+###################################################
+### code chunk number 16: categoricalVGAM.Rnw:1501-1502
+###################################################
+deviance(fit.ms) - deviance(fit2.ms)
+
+
+###################################################
+### code chunk number 17: categoricalVGAM.Rnw:1508-1509
+###################################################
+(dfdiff <- df.residual(fit2.ms) - df.residual(fit.ms))
+
+
+###################################################
+### code chunk number 18: categoricalVGAM.Rnw:1512-1513
+###################################################
+pchisq(deviance(fit.ms) - deviance(fit2.ms), df = dfdiff, lower.tail = FALSE)
+
+
+###################################################
+### code chunk number 19: categoricalVGAM.Rnw:1526-1537
+###################################################
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo, ],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",  # main="Marital status amongst NZ Male Europeans",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,  # x="topright",
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+
+
+###################################################
+### code chunk number 20: categoricalVGAM.Rnw:1552-1565
+###################################################
+getOption("SweaveHooks")[["fig"]]()
+ par(mfrow = c(1,1))
+ par(mar = c(4.5,4.0,0.2,0.2)+0.1)
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo,],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2.1)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+
+
+###################################################
+### code chunk number 21: categoricalVGAM.Rnw:1599-1603
+###################################################
+# Scale the variables? Yes; the Anderson (1984) paper did (see his Table 6).
+head(backPain, 4)
+summary(backPain)
+backPain <- transform(backPain, sx1 = -scale(x1), sx2 = -scale(x2), sx3 = -scale(x3))
+
+
+###################################################
+### code chunk number 22: categoricalVGAM.Rnw:1607-1608
+###################################################
+bp.rrmlm1 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain)
+
+
+###################################################
+### code chunk number 23: categoricalVGAM.Rnw:1611-1612
+###################################################
+Coef(bp.rrmlm1)
+
+
+###################################################
+### code chunk number 24: categoricalVGAM.Rnw:1640-1641
+###################################################
+set.seed(123)
+
+
+###################################################
+### code chunk number 25: categoricalVGAM.Rnw:1644-1646
+###################################################
+bp.rrmlm2 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain, Rank = 2,
+                   Corner = FALSE, Uncor = TRUE)
+
+
+###################################################
+### code chunk number 26: categoricalVGAM.Rnw:1654-1658
+###################################################
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1, 6), ylim = c(-1.2, 4),  # Use this if not scaled
+       xlim = c(-4.5, 2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+
+
+###################################################
+### code chunk number 27: categoricalVGAM.Rnw:1690-1698
+###################################################
+getOption("SweaveHooks")[["fig"]]()
+# Plot output
+ par(mfrow=c(1,1))
+ par(mar=c(4.5,4.0,0.2,2.2)+0.1)
+
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1,6), ylim = c(-1.2,4),  # Use this if not scaled
+       xlim = c(-4.5,2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+
+
+###################################################
+### code chunk number 28: categoricalVGAM.Rnw:1812-1813
+###################################################
+iam(NA, NA, M = 4, both = TRUE, diag = TRUE)
+
+
diff --git a/inst/doc/categoricalVGAM.Rnw b/inst/doc/categoricalVGAM.Rnw
new file mode 100644
index 0000000..8394144
--- /dev/null
+++ b/inst/doc/categoricalVGAM.Rnw
@@ -0,0 +1,2325 @@
+\documentclass[article,shortnames,nojss]{jss}
+\usepackage{thumbpdf}
+%% need no \usepackage{Sweave.sty}
+
+\SweaveOpts{engine=R,eps=FALSE}
+%\VignetteIndexEntry{The VGAM Package for Categorical Data Analysis}
+%\VignetteDepends{VGAM}
+%\VignetteKeywords{categorical data analysis, Fisher scoring, iteratively reweighted least squares, multinomial distribution, nominal and ordinal polytomous responses, smoothing, vector generalized linear and additive models, VGAM R package}
+%\VignettePackage{VGAM}
+
+%% new commands
+\newcommand{\sVLM}{\mbox{\scriptsize VLM}}
+\newcommand{\sformtwo}{\mbox{\scriptsize F2}}
+\newcommand{\pr}{\mbox{$P$}}
+\newcommand{\logit}{\mbox{\rm logit}}
+\newcommand{\bzero}{{\bf 0}}
+\newcommand{\bone}{{\bf 1}}
+\newcommand{\bid}{\mbox{\boldmath $d$}}
+\newcommand{\bie}{\mbox{\boldmath $e$}}
+\newcommand{\bif}{\mbox{\boldmath $f$}}
+\newcommand{\bix}{\mbox{\boldmath $x$}}
+\newcommand{\biy}{\mbox{\boldmath $y$}}
+\newcommand{\biz}{\mbox{\boldmath $z$}}
+\newcommand{\biY}{\mbox{\boldmath $Y$}}
+\newcommand{\bA}{\mbox{\rm \bf A}}
+\newcommand{\bB}{\mbox{\rm \bf B}}
+\newcommand{\bC}{\mbox{\rm \bf C}}
+\newcommand{\bH}{\mbox{\rm \bf H}}
+\newcommand{\bI}{\mbox{\rm \bf I}}
+\newcommand{\bX}{\mbox{\rm \bf X}}
+\newcommand{\bW}{\mbox{\rm \bf W}}
+\newcommand{\bY}{\mbox{\rm \bf Y}}
+\newcommand{\bbeta}{\mbox{\boldmath $\beta$}}
+\newcommand{\boldeta}{\mbox{\boldmath $\eta$}}
+\newcommand{\bmu}{\mbox{\boldmath $\mu$}}
+\newcommand{\bnu}{\mbox{\boldmath $\nu$}}
+\newcommand{\diag}{ \mbox{\rm diag} }
+\newcommand{\Var}{ \mbox{\rm Var} }
+\newcommand{\R}{{\textsf{R}}}
+\newcommand{\VGAM}{\pkg{VGAM}}
+
+
+\author{Thomas W. Yee\\University of Auckland}
+\Plainauthor{Thomas W. Yee}
+
+\title{The \pkg{VGAM} Package for Categorical Data Analysis}
+\Plaintitle{The VGAM Package for Categorical Data Analysis}
+
+\Abstract{
+  Classical categorical regression models such as the multinomial logit and
+  proportional odds models are shown to be readily handled by the  vector
+  generalized linear and additive model (VGLM/VGAM) framework. Additionally,
+  there are natural extensions, such as reduced-rank VGLMs for
+  dimension reduction, and allowing covariates that have values
+  specific to each linear/additive predictor,
+  e.g., for consumer choice modeling. This article describes some of the
+  framework behind the \pkg{VGAM} \R{} package, its usage and implementation
+  details.
+}
+\Keywords{categorical data analysis, Fisher scoring,
+  iteratively reweighted least squares,
+  multinomial distribution, nominal and ordinal polytomous responses,
+  smoothing, vector generalized linear and additive models,
+  \VGAM{} \R{} package}
+\Plainkeywords{categorical data analysis, Fisher scoring,
+  iteratively reweighted least squares, multinomial distribution,
+  nominal and ordinal polytomous responses, smoothing,
+  vector generalized linear and additive models, VGAM R package}
+
+\Address{
+  Thomas W. Yee \\
+  Department of Statistics \\
+  University of Auckland, Private Bag 92019 \\
+  Auckland Mail Centre \\
+  Auckland 1142, New Zealand \\
+  E-mail: \email{t.yee at auckland.ac.nz}\\
+  URL: \url{http://www.stat.auckland.ac.nz/~yee/}
+}
+
+
+\begin{document}
+
+
+<<echo=FALSE, results=hide>>=
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+@
+
+
+% ----------------------------------------------------------------------
+\section{Introduction}
+\label{sec:jsscat.intoduction}
+
+
+This is a \pkg{VGAM} vignette for categorical data analysis (CDA)
+based on \cite{Yee:2010}.
+Any subsequent features (especially non-backward compatible ones)
+will appear here.
+
+The subject of CDA is concerned with
+analyses where the response is categorical regardless of whether
+the explanatory variables are continuous or categorical. It is a
+very frequent form of data. Over the years several CDA regression
+models for polytomous responses have become popular, e.g., those
+in Table \ref{tab:cat.quantities}. Not surprisingly, the models
+are interrelated: their foundation is the multinomial distribution
+and consequently they share similar and overlapping properties which
+modellers should know and exploit. Unfortunately, software has been
+slow to reflect their commonality and this makes analyses unnecessarily
+difficult for the practitioner on several fronts, e.g., using different
+functions/procedures to fit different models which does not aid the
+understanding of their connections.
+
+
+This historical misfortune can be seen by considering \R{} functions
+for CDA. From the Comprehensive \proglang{R} Archive Network
+(CRAN, \url{http://CRAN.R-project.org/}) there is \texttt{polr()}
+\citep[in \pkg{MASS};][]{Venables+Ripley:2002} for a proportional odds
+model and \texttt{multinom()}
+\citep[in \pkg{nnet};][]{Venables+Ripley:2002} for the multinomial
+logit model. However, both of these can be considered `one-off'
+modeling functions rather than providing a unified offering for CDA.
+The function \texttt{lrm()} \citep[in \pkg{rms};][]{Harrell:2009}
+has greater functionality: it can fit the proportional odds model
+(and the forward continuation ratio model upon preprocessing). Neither
+\texttt{polr()} or \texttt{lrm()} appear able to fit the nonproportional
+odds model. There are non-CRAN packages too, such as the modeling
+function \texttt{nordr()} \citep[in \pkg{gnlm};][]{gnlm:2007}, which can fit
+the proportional odds, continuation ratio and adjacent categories models;
+however it calls \texttt{nlm()} and the user must supply starting values.
+In general these \R{} \citep{R} modeling functions are not modular
+and often require preprocessing and sometimes are not self-starting.
+The implementations can be perceived as a smattering and piecemeal
+in nature. Consequently if the practitioner wishes to fit the models
+of Table \ref{tab:cat.quantities} then there is a need to master several
+modeling functions from several packages each having different syntaxes
+etc. This is a hindrance to efficient CDA.
+
+
+ 
+\begin{table}[tt]
+\centering
+\begin{tabular}{|c|c|l|}
+\hline
+Quantity & Notation &
+%Range of $j$ &
+\VGAM{} family function \\
+\hline
+%
+$\pr(Y=j+1) / \pr(Y=j)$ &$\zeta_{j}$ &
+%$1,\ldots,M$ &
+\texttt{acat()} \\
+%
+$\pr(Y=j) / \pr(Y=j+1)$ &$\zeta_{j}^{R}$ &
+%$2,\ldots,M+1$ &
+\texttt{acat(reverse = TRUE)} \\
+%
+$\pr(Y>j|Y \geq j)$ &$\delta_{j}^*$ &
+%$1,\ldots,M$ & 
+\texttt{cratio()} \\
+%
+$\pr(Y<j|Y \leq j)$ &$\delta_{j}^{*R}$ &
+%$2,\ldots,M+1$ &
+\texttt{cratio(reverse = TRUE)} \\
+%
+$\pr(Y\leq j)$ &$\gamma_{j}$ &
+%$1,\ldots,M$ &
+\texttt{cumulative()} \\
+%
+$\pr(Y\geq j)$ &$\gamma_{j}^R$&
+%$2,\ldots,M+1$ &
+\texttt{cumulative(reverse = TRUE)} \\
+%
+$\log\{\pr(Y=j)/\pr(Y=M+1)\}$ & &
+%$1,\ldots,M$ &
+\texttt{multinomial()} \\
+%
+$\pr(Y=j|Y \geq j)$ &$\delta_{j}$ &
+%$1,\ldots,M$ &
+\texttt{sratio()} \\
+%
+$\pr(Y=j|Y \leq j)$ &$\delta_{j}^R$ &
+%$2,\ldots,M+1$ &
+\texttt{sratio(reverse = TRUE)} \\
+%
+\hline
+\end{tabular}
+\caption{
+Quantities defined in \VGAM{} for a
+categorical response $Y$ taking values $1,\ldots,M+1$.
+Covariates \bix{} have been omitted for clarity.
+The LHS quantities are $\eta_{j}$
+or $\eta_{j-1}$ for $j=1,\ldots,M$ (not reversed)
+and $j=2,\ldots,M+1$ (if reversed), respectively.
+All models are estimated by minimizing the deviance.
+All except for \texttt{multinomial()} are suited to ordinal $Y$.
+\label{tab:cat.quantities}
+}
+\end{table}
+ 
+
+
+
+\proglang{SAS} \citep{SAS} does not fare much better than \R. Indeed,
+it could be considered as having an \textit{excess} of options which
+bewilders the non-expert user; there is little coherent overriding
+structure. Its \code{proc logistic} handles the multinomial logit
+and proportional odds models, as well as exact logistic regression
+\citep[see][which is for Version 8 of \proglang{SAS}]{stok:davi:koch:2000}.
+The fact that the proportional odds model may be fitted by \code{proc
+logistic}, \code{proc genmod} and \code{proc probit} arguably leads
+to possible confusion rather than the making of connections, e.g.,
+\code{genmod} is primarily for GLMs and the proportional odds model is not
+a GLM in the classical \cite{neld:wedd:1972} sense. Also, \code{proc
+phreg} fits the multinomial logit model, and \code{proc catmod} with
+its WLS implementation adds to further potential confusion.
+
+
+This article attempts to show how these deficiencies can be addressed
+by considering the vector generalized linear and additive model
+(VGLM/VGAM) framework, as implemented by the author's \pkg{VGAM}
+package for \R{}. The main purpose of this paper is to demonstrate
+how the framework is very well suited to many `classical' regression
+models for categorical responses, and to describe the implementation and
+usage of \pkg{VGAM} for such. To this end an outline of this article
+is as follows. Section \ref{sec:jsscat.VGLMVGAMoverview} summarizes
+the basic VGLM/VGAM framework. Section \ref{sec:jsscat.vgamff}
+centers on functions for CDA in \VGAM. Given an adequate framework,
+some natural extensions of Section \ref{sec:jsscat.VGLMVGAMoverview} are
+described in Section \ref{sec:jsscat.othermodels}. Users of \pkg{VGAM}
+can benefit from Section \ref{sec:jsscat.userTopics} which shows how
+the software reflects their common theory. Some examples are given in
+Section \ref{sec:jsscat.eg}. Section \ref{sec:jsscat.implementDetails}
+contains selected topics in statistial computing that are
+more relevant to programmers interested in the underlying code.
+Section \ref{sec:jsscat.extnUtil} discusses several utilities and
+extensions needed for advanced CDA modeling, and the article concludes
+with a discussion. This document was run using \pkg{VGAM} 0.7-10
+\citep{yee:VGAM:2010} under \R 2.10.0.
+
+
+Some general references for categorical data providing
+background to this article include
+\cite{agre:2010},
+\cite{agre:2013},
+\cite{fahr:tutz:2001},
+\cite{leon:2000},
+\cite{lloy:1999},
+\cite{long:1997},
+\cite{mccu:neld:1989},
+\cite{simo:2003},
+\citet{smit:merk:2013} and
+\cite{tutz:2012}.
+An overview of models for ordinal responses is \cite{liu:agre:2005},
+and a manual for fitting common models found in \cite{agre:2002}
+to polytomous responses with various software is \cite{thom:2009}.
+A package for visualizing categorical data in \R{} is \pkg{vcd}
+\citep{Meyer+Zeileis+Hornik:2006,Meyer+Zeileis+Hornik:2009}.
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{VGLM/VGAM overview}
+\label{sec:jsscat.VGLMVGAMoverview}
+
+
+This section summarizes the VGLM/VGAM framework with a particular emphasis
+toward categorical models since the classes encapsulates many multivariate
+response models in, e.g., survival analysis, extreme value analysis,
+quantile and expectile regression, time series, bioassay data, nonlinear
+least-squares models, and scores of standard and nonstandard univariate
+and continuous distributions. The framework is partially summarized by
+Table \ref{tab:rrvglam.jss.subset}. More general details about VGLMs
+and VGAMs can be found in \cite{yee:hast:2003} and \cite{yee:wild:1996}
+respectively. An informal and practical article connecting the general
+framework with the software is \cite{Rnews:Yee:2008}.
+
+
+
+\subsection{VGLMs}
+\label{sec:wffc.appendixa.vglms}
+
+Suppose the observed response \biy{} is a $q$-dimensional vector.
+VGLMs are defined as a model for which the conditional distribution
+of $\biY$ given explanatory $\bix$ is of the form
+\begin{eqnarray}
+f(\biy | \bix ; \bB, \phi)  =  h(\biy, \eta_1,\ldots, \eta_M, \phi)
+\label{gammod}
+\end{eqnarray}
+for some known function $h(\cdot)$, where $\bB = (\bbeta_1 \,
+\bbeta_2 \, \cdots \, \bbeta_M)$ is a $p \times M$ matrix of
+unknown regression coefficients,
+and the $j$th linear predictor is
+\begin{equation}
+\eta_j  =  \eta_j(\bix)  =  \bbeta_j^{\top} \bix  = 
+\sum_{k=1}^p \beta_{(j)k} \, x_k ,  \qquad j=1,\ldots,M.
+\label{gammod2}
+\end{equation}
+Here $\bix=(x_1,\ldots,x_p)^{\top}$ with $x_1 = 1$ if there is an intercept.
+Note that (\ref{gammod2}) means that \textit{all} the parameters may be
+potentially modelled as functions of \bix. It can be seen that VGLMs are
+like GLMs but allow for multiple linear predictors, and they encompass
+models outside the small confines of the exponential family.
+In (\ref{gammod}) the quantity $\phi$ is an optional scaling parameter
+which is included for backward compatibility with common adjustments
+to overdispersion, e.g., with respect to GLMs.
+
+
+In general there is no relationship between $q$ and $M$: it
+depends specifically on the model or distribution to be fitted.
+However, for the `classical' categorical regression models of
+Table \ref{tab:cat.quantities} we have $M=q-1$ since $q$ is the number
+of levels the multi-category response $Y$ has.
+
+
+
+
+
+The $\eta_j$ of VGLMs may be applied directly to parameters of a
+distribution rather than just to a mean for GLMs. A simple example is
+a univariate distribution with a location parameter $\xi$ and a scale
+parameter $\sigma > 0$, where we may take $\eta_1 = \xi$ and $\eta_2 =
+\log\,\sigma$. In general, $\eta_{j}=g_{j}(\theta_{j})$ for some parameter
+link function $g_{j}$ and parameter $\theta_{j}$.
+For example, the adjacent categories models in
+Table \ref{tab:cat.quantities} are ratios of two probabilities, therefore
+a log link of $\zeta_{j}^{R}$ or $\zeta_{j}$ is the default.
+In \VGAM{}, there are currently over a dozen links to choose from, of
+which any can be assigned to any parameter, ensuring maximum flexibility.
+Table \ref{tab:jsscat.links} lists some of them.
+
+
+
+\begin{table}[tt]
+\centering
+%\  ~~~ \par
+\begin{tabular}{|l|l|l|l|}
+\hline
+\qquad \qquad $\boldeta$ & 
+Model & Modeling & Reference \\
+ & & function & \\
+%-------------------------------------------------------------
+\hline
+\hline
+%-------------------------------------------------------------
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} + \bB_2^{\top} \bix_{2}\ ( = \bB^{\top} \bix)$ &
+VGLM & \texttt{vglm()}
+&
+\cite{yee:hast:2003} \\[1.6ex]
+%Yee \& Hastie (2003) \\[1.6ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} +
+ \sum\limits_{k=p_1+1}^{p_1+p_2} \bH_k \, \bif_{k}^{*}(x_k)$ &
+%\sum\limits_{k=1}^{p_2} \bH_k \, \bif_k(x_k)$ &
+VGAM & \texttt{vgam()}
+&
+\cite{yee:wild:1996} \\[2.2ex]
+%Yee \& Wild (1996) \\[2.2ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} + \bA \, \bnu$ &
+RR-VGLM & \texttt{rrvglm()}
+&
+\cite{yee:hast:2003} \\[1.8ex]
+%Yee \& Hastie (2003) \\[1.8ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+See \cite{yee:hast:2003} &
+Goodman's RC & \texttt{grc()}
+&
+%\cite{yee:hast:2003} \\[1.8ex]
+\cite{good:1981} \\[1.8ex]
+%-------------------------------------------------------------
+\hline
+\end{tabular}
+\caption{
+Some of 
+the package \VGAM{} and
+its framework.
+The vector of latent variables $\bnu = \bC^{\top} \bix_2$
+where
+$\bix^{\top} = (\bix_1^{\top}, \bix_2^{\top})$.
+\label{tab:rrvglam.jss.subset}
+}
+%\medskip
+\end{table}
+
+
+
+
+
+
+VGLMs are estimated using iteratively reweighted least squares (IRLS) 
+which is particularly suitable for categorical models
+\citep{gree:1984}.
+All models in this article have a log-likelihood
+\begin{equation}
+\ell  =  \sum_{i=1}^n \, w_i \, \ell_i
+\label{eq:log-likelihood.VGAM}
+\end{equation}
+where the $w_i$ are known positive prior weights.
+Let $\bix_i$ denote the explanatory vector for the $i$th observation,
+for $i=1,\dots,n$.
+Then one can write
+\begin{eqnarray}
+\boldeta_i &=& \boldeta(\bix_i)  = 
+\left(
+\begin{array}{c}
+\eta_1(\bix_i) \\
+\vdots \\
+\eta_M(\bix_i)
+\end{array} \right)  = 
+\bB^{\top} \bix_i  =  
+\left(
+\begin{array}{c}
+\bbeta_1^{\top} \bix_i \\
+\vdots \\
+\bbeta_M^{\top} \bix_i
+\end{array} \right)
+\nonumber
+\\
+&=& 
+\left(
+\begin{array}{cccc}
+\beta_{(1)1} & \cdots & \beta_{(1)p} \\
+\vdots \\
+\beta_{(M)1} & \cdots & \beta_{(M)p} \\
+\end{array} \right)
+\bix_i  = 
+\left(
+\bbeta_{(1)} \; \cdots \; \bbeta_{(p)}
+\right)
+\bix_i .
+\label{eq:lin.pred}
+\end{eqnarray}
+In IRLS,
+an adjusted dependent vector $\biz_i = \boldeta_i + \bW_i^{-1} \bid_i$
+is regressed upon a large (VLM) model matrix, with
+$\bid_i = w_i \, \partial \ell_i / \partial \boldeta_i$.
+The working weights $\bW_i$ here are 
+$w_i \Var(\partial \ell_i / \partial \boldeta_i)$
+(which, under regularity conditions, is equal to
+$-w_i \, E[ \partial^2 \ell_i / (\partial \boldeta_i \,
+\partial \boldeta_i^{\top})]$),
+giving rise to the Fisher scoring algorithm.
+
+
+Let $\bX=(\bix_1,\ldots,\bix_n)^{\top}$ be the usual $n \times p$
+(LM) model matrix
+obtained from the \texttt{formula} argument of \texttt{vglm()}.
+Given $\biz_i$, $\bW_i$ and $\bX{}$ at the current IRLS iteration,
+a weighted multivariate regression is performed.
+To do this, a \textit{vector linear model} (VLM) model matrix 
+$\bX_{\sVLM}$ is formed from $\bX{}$ and $\bH_k$
+(see Section \ref{sec:wffc.appendixa.vgams}).
+This is has $nM$ rows, and if there are no constraints then $Mp$ columns.
+Then $\left(\biz_1^{\top},\ldots,\biz_n^{\top}\right)^{\top}$ is regressed
+upon $\bX_{\sVLM}$
+with variance-covariance matrix $\diag(\bW_1^{-1},\ldots,\bW_n^{-1})$.
+This system of linear equations is converted to one large
+WLS fit by premultiplication of the output of
+a Cholesky decomposition of the $\bW_i$.
+
+
+Fisher scoring usually has good numerical stability
+because the $\bW_i$ are positive-definite over a larger
+region of parameter space than Newton-Raphson. 
+For the categorical models in this article the expected
+information matrices are simpler than the observed
+information matrices, and are easily derived,
+therefore all the families in Table \ref{tab:cat.quantities}
+implement Fisher scoring.
+
+
+
+\subsection{VGAMs and constraint matrices}
+\label{sec:wffc.appendixa.vgams}
+
+
+VGAMs provide additive-model extensions to VGLMs, that is,
+(\ref{gammod2}) is generalized to
+\begin{equation}
+\eta_j(\bix)  =  \beta_{(j)1} +
+\sum_{k=2}^p \; f_{(j)k}(x_k), \qquad j = 1,\ldots, M,
+\label{addmod}
+\end{equation}
+a sum of smooth functions of the individual covariates, just as
+with ordinary GAMs \citep{hast:tibs:1990}. The $\bif_k =
+(f_{(1)k}(x_k),\ldots,f_{(M)k}(x_k))^{\top}$ are centered for uniqueness,
+and are estimated simultaneously using \textit{vector smoothers}.
+VGAMs are thus a visual data-driven method that is well suited to
+exploring data, and they retain the simplicity of interpretation that
+GAMs possess.
+
+
+
+An important concept, especially for CDA, is the idea of
+`constraints-on-the functions'.
+In practice we often wish to constrain the effect of a covariate to
+be the same for some of the $\eta_j$ and to have no effect for others.
+We shall see below that this constraints idea is important
+for several categorical models because of a popular parallelism assumption.
+As a specific example, for VGAMs we may wish to take
+\begin{eqnarray*}
+\eta_1 & = & \beta_{(1)1} + f_{(1)2}(x_2) + f_{(1)3}(x_3), \\
+\eta_2 & = & \beta_{(2)1} + f_{(1)2}(x_2),
+\end{eqnarray*}
+so that $f_{(1)2} \equiv f_{(2)2}$ and $f_{(2)3} \equiv 0$.
+For VGAMs, we can represent these models using
+\begin{eqnarray}
+\boldeta(\bix) & = & \bbeta_{(1)} + \sum_{k=2}^p \, \bif_k(x_k)
+\ =\ \bH_1 \, \bbeta_{(1)}^* + \sum_{k=2}^p \, \bH_k \, \bif_k^*(x_k)
+\label{eqn:constraints.VGAM}
+\end{eqnarray}
+where $\bH_1,\bH_2,\ldots,\bH_p$ are known full-column rank
+\textit{constraint matrices}, $\bif_k^*$ is a vector containing a
+possibly reduced set of component functions and $\bbeta_{(1)}^*$ is a
+vector of unknown intercepts. With no constraints at all, $\bH_1 =
+\bH_2 = \cdots = \bH_p = \bI_M$ and $\bbeta_{(1)}^* = \bbeta_{(1)}$.
+Like the $\bif_k$, the $\bif_k^*$ are centered for uniqueness.
+For VGLMs, the $\bif_k$ are linear so that
+\begin{eqnarray}
+{\bB}^{\top} &=&
+\left(
+\bH_1 \bbeta_{(1)}^*
+ \;
+\Bigg|
+ \;
+\bH_2 \bbeta_{(2)}^*
+ \;
+\Bigg|
+ \;
+\cdots
+ \;
+\Bigg|
+ \;
+\bH_p \bbeta_{(p)}^*
+\right) 
+\label{eqn:lin.coefs4}
+\end{eqnarray}
+for some vectors
+$\bbeta_{(1)}^*,\ldots,\bbeta_{(p)}^*$.
+
+
+The
+$\bX_{\sVLM}$ matrix is constructed from \bX{} and the $\bH_k$ using
+Kronecker product operations.
+For example, with trivial constraints,
+$\bX_{\sVLM} = \bX \otimes \bI_M$.
+More generally,
+\begin{eqnarray}
+\bX_{\sVLM} &=& 
+\left(
+\left( \bX \, \bie_{1} \right) \otimes \bH_1
+ \;
+\Bigg|
+ \;
+\left( \bX \, \bie_{2} \right) \otimes \bH_2
+ \;
+\Bigg|
+ \;
+\cdots
+ \;
+\Bigg|
+ \;
+\left( \bX \, \bie_{p} \right) \otimes \bH_p
+\right)
+\label{eqn:X_vlm_Hk}
+\end{eqnarray}
+($\bie_{k}$ is a vector of zeros except for a one in the $k$th position)
+so that 
+$\bX_{\sVLM}$ is $(nM) \times p^*$ where
+$p^* = \sum_{k=1}^{p} \mbox{\textrm{ncol}}(\bH_k)$ is the total number
+of columns of all the constraint matrices.
+Note that $\bX_{\sVLM}$ and \bX{} can be obtained by
+\texttt{model.matrix(vglmObject, type = "vlm")}
+and
+\texttt{model.matrix(vglmObject, type = "lm")}
+respectively.
+Equation \ref{eqn:lin.coefs4} focusses on the rows of \bB{} whereas
+\ref{eq:lin.pred} is on the columns.
+
+
+VGAMs are estimated by applying a modified vector backfitting algorithm
+\citep[cf.][]{buja:hast:tibs:1989} to the $\biz_i$.
+
+
+
+\subsection{Vector splines and penalized likelihood}
+\label{sec:ex.vspline}
+
+If (\ref{eqn:constraints.VGAM}) is estimated using a vector spline (a
+natural extension of the cubic smoothing spline to vector responses)
+then it can be shown that the resulting solution maximizes a penalized
+likelihood; some details are sketched in \cite{yee:step:2007}. In fact,
+knot selection for vector spline follows the same idea as O-splines
+\citep[see][]{wand:orme:2008} in order to lower the computational cost.
+
+
+The usage of \texttt{vgam()} with smoothing is very similar
+to \texttt{gam()} \citep{gam:pack:2009}, e.g.,
+to fit a nonparametric proportional odds model
+\citep[cf. p.179 of][]{mccu:neld:1989}
+to the pneumoconiosis data one could try
+<<label = pneumocat, eval=T>>=
+pneumo <- transform(pneumo, let = log(exposure.time))
+fit <- vgam(cbind(normal, mild, severe) ~ s(let, df = 2),
+            cumulative(reverse = TRUE, parallel = TRUE), data = pneumo)
+@
+Here, setting \texttt{df = 1} means a linear fit so that
+\texttt{df = 2} affords a little nonlinearity.
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section[VGAM family functions]{\pkg{VGAM} family functions}
+\label{sec:jsscat.vgamff}
+
+
+
+This section summarizes and comments on the \VGAM{} family functions
+of Table \ref{tab:cat.quantities} for a categorical response variable
+taking values $Y=1,2,\ldots,M+1$. In its most basic invokation, the usage
+entails a trivial change compared to \texttt{glm()}: use \texttt{vglm()}
+instead and assign the \texttt{family} argument a \VGAM{} family function.
+The use of a \VGAM{} family function to fit a specific model is far
+simpler than having a different modeling function for each model.
+Options specific to that model appear as arguments of that \VGAM{} family
+function.
+
+
+
+
+
+While writing \texttt{cratio()} it was found that various authors defined
+the quantity ``continuation ratio'' differently, therefore it became
+necessary to define a ``stopping ratio''. Table \ref{tab:cat.quantities}
+defines these quantities for \VGAM{}.
+
+
+
+
+The multinomial logit model is usually described by choosing the first or
+last level of the factor to be baseline. \VGAM{} chooses the last level
+(Table \ref{tab:cat.quantities}) by default, however that can be changed
+to any other level by use of the \texttt{refLevel} argument.
+
+
+
+
+If the proportional odds assumption is inadequate then one strategy is
+to try use a different link function (see Section \ref{sec:jsscat.links}
+for a selection). Another alternative is to add extra terms such as
+interaction terms into the linear predictor
+\citep[available in the \proglang{S} language;][]{cham:hast:1993}.
+Another is to fit the so-called \textit{partial}
+proportional odds model \citep{pete:harr:1990}
+which \VGAM{} can fit via constraint matrices.
+
+
+
+In the terminology of \cite{agre:2002},
+\texttt{cumulative()} fits the class of \textit{cumulative link models},
+e.g.,
+\texttt{cumulative(link = probit)} is a cumulative probit model.
+For \texttt{cumulative()}
+it was difficult to decide whether
+\texttt{parallel = TRUE}
+or
+\texttt{parallel = FALSE}
+should be the default.
+In fact, the latter is (for now?).
+Users need to set
+\texttt{cumulative(parallel = TRUE)} explicitly to
+fit a proportional odds model---hopefully this will alert
+them to the fact that they are making
+the proportional odds assumption and
+check its validity (\cite{pete:1990}; e.g., through a deviance or
+likelihood ratio test). However the default means numerical problems
+can occur with far greater likelihood.
+Thus there is tension between the two options.
+As a compromise there is now a \VGAM{} family function
+called \texttt{propodds(reverse = TRUE)} which is equivalent to
+\texttt{cumulative(parallel = TRUE, reverse = reverse, link = "logit")}.
+
+
+
+By the way, note that arguments such as 
+\texttt{parallel}
+can handle a slightly more complex syntax.
+A call such as
+\code{parallel = TRUE ~ x2 + x5 - 1} means the parallelism assumption
+is only applied to $X_2$ and $X_5$.
+This might be equivalent to something like
+\code{parallel = FALSE ~ x3 + x4}, i.e., to the remaining
+explanatory variables.
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Other models}
+\label{sec:jsscat.othermodels}
+
+
+Given the VGLM/VGAM framework of Section \ref{sec:jsscat.VGLMVGAMoverview}
+it is found that natural extensions are readily proposed in several
+directions. This section describes some such extensions.
+
+
+
+
+\subsection{Reduced-rank VGLMs}
+\label{sec:jsscat.RRVGLMs}
+
+
+Consider a multinomial logit model where $p$ and $M$ are both large.
+A (not-too-convincing) example might be the data frame \texttt{vowel.test}
+in the package \pkg{ElemStatLearn} \citep[see][]{hast:tibs:buja:1994}.
+The vowel recognition data set involves $q=11$ symbols produced from
+8 speakers with 6 replications of each. The training data comprises
+$10$ input features (not including the intercept) based on digitized
+utterances. A multinomial logit model fitted to these data would
+have $\widehat{\bB}$ comprising of $p \times (q-1) = 110$ regression
+coefficients for $n=8\times 6\times 11 = 528$ observations. The ratio
+of $n$ to the number of parameters is small, and it would be good to
+introduce some parsimony into the model.
+
+
+
+A simple and elegant solution is to represent $\widehat{\bB}$ by
+its reduced-rank approximation. To do this, partition $\bix$ into
+$(\bix_1^{\top}, \bix_2^{\top})^{\top}$ and $\bB = (\bB_1^{\top} \;
+\bB_2^{\top})^{\top}$ so that the reduced-rank regression is applied
+to $\bix_2$. In general, \bB{} is a dense matrix of full rank, i.e., rank
+$=\min(M,p)$, and since there are $M \times p$ regression coefficients
+to estimate this is `too' large for some models and/or data sets.
+If we approximate $\bB_2$ by a reduced-rank regression \begin{equation}
+\label{eq:rrr.BAC} \bB_2  =  \bC{} \, \bA^{\top} \end{equation} and if
+the rank $R$ is kept low then this can cut down the number of regression
+coefficients dramatically. If $R=2$ then the results may be biplotted
+(\texttt{biplot()} in \VGAM{}). Here, \bC{} and \bA{} are $p_2 \times R$
+and $M \times R$ respectively, and usually they are `thin'.
+
+
+More generally, the class of \textit{reduced-rank VGLMs} (RR-VGLMs)
+is simply a VGLM where $\bB_2$ is expressed as a product of two thin
+estimated matrices (Table \ref{tab:rrvglam.jss.subset}). Indeed,
+\cite{yee:hast:2003} show that RR-VGLMs are VGLMs with constraint
+matrices that are unknown and estimated. Computationally, this is
+done using an alternating method: in (\ref{eq:rrr.BAC}) estimate \bA{}
+given the current estimate of \bC{}, and then estimate \bC{} given the
+current estimate of \bA{}. This alternating algorithm is repeated until
+convergence within each IRLS iteration.
+
+
+Incidentally, special cases of RR-VGLMs have appeared in the
+literature. For example, a RR-multinomial logit model, is known as the
+\textit{stereotype} model \citep{ande:1984}. Another is \cite{good:1981}'s
+RC model (see Section \ref{sec:jsscat.rrr.goodman}) which is reduced-rank
+multivariate Poisson model. Note that the parallelism assumption of the
+proportional odds model \citep{mccu:neld:1989} can be thought of as a
+type of reduced-rank regression where the constraint matrices are thin
+($\bone_M$, actually) and known.
+
+
+
+The modeling function \texttt{rrvglm()} should work with any \VGAM{}
+family function compatible with \texttt{vglm()}. Of course, its
+applicability should be restricted to models where a reduced-rank
+regression of $\bB_2$ makes sense.
+
+
+
+
+
+
+
+
+
+\subsection[Goodman's R x C association model]{Goodman's $R \times C$ association model}
+\label{sec:jsscat.rrr.goodman}
+
+
+
+
+
+Let $\bY = [(y_{ij})]$ be a $n \times M$ matrix of counts.
+Section 4.2 of \cite{yee:hast:2003} shows that Goodman's RC$(R)$ association
+model \citep{good:1981} fits within the VGLM framework by setting up
+the appropriate indicator variables, structural zeros and constraint
+matrices. Goodman's model fits a reduced-rank type model to \bY{}
+by firstly assuming that $Y_{ij}$ has a Poisson distribution, and that
+\begin{eqnarray}
+\log \, \mu_{ij} &=& \mu + \alpha_{i} + \gamma_{j} + 
+\sum_{k=1}^R a_{ik} \, c_{jk} , 
+\ \ \ i=1,\ldots,n;\ \ j=1,\ldots,M,
+\label{eqn:goodmanrc}
+\end{eqnarray}
+where $\mu_{ij} = E(Y_{ij})$ is the mean of the $i$-$j$ cell, and the
+rank $R$ satisfies $R < \min(n,M)$.
+
+
+The modeling function \texttt{grc()} should work on any two-way
+table \bY{} of counts generated by (\ref{eqn:goodmanrc}) provided
+the number of 0's is not too large. Its usage is quite simple, e.g.,
+\texttt{grc(Ymatrix, Rank = 2)} fits a rank-2 model to a matrix of counts.
+By default a \texttt{Rank = 1} model is fitted.
+
+
+
+
+\subsection{Bradley-Terry models}
+\label{sec:jsscat.brat}
+
+Consider
+an experiment consists of $n_{ij}$ judges who compare
+pairs of items $T_i$, $i=1,\ldots,M+1$.
+They express their preferences between $T_i$ and $T_j$. 
+Let $N=\sum \sum_{i<j} n_{ij}$ be the total number of pairwise
+comparisons, and assume independence for ratings of the same pair
+by different judges and for ratings of different pairs by the same judge.
+Let $\pi_i$ be the \textit{worth} of item $T_i$,
+\[
+\pr(T_i > T_j)  =  p_{i/ij}  =  \frac{\pi_i}{\pi_i + \pi_j},
+\  \qquad i \neq {j},
+\]
+where ``$T_i>T_j$'' means $i$ is preferred over $j$.
+Suppose that $\pi_i > 0$.
+Let $Y_{ij}$ be the number of times that $T_i$ is preferred
+over $T_j$ in the $n_{ij}$ comparisons of the pairs.
+Then $Y_{ij} \sim {\rm Bin}(n_{ij},p_{i/ij})$.
+This is a Bradley-Terry model (without ties),
+and the \VGAM{} family function is \texttt{brat()}.
+
+
+Maximum likelihood estimation of the parameters $\pi_1,\ldots,\pi_{M+1}$
+involves maximizing
+\[
+\prod_{i<j}^{M+1}
+\left(
+\begin{array}{c}
+n_{ij} \\
+y_{ij}
+\end{array} \right)
+\left(
+\frac{\pi_i}{\pi_i + \pi_j}
+\right)^{y_{ij}}
+\left(
+\frac{\pi_j}{\pi_i + \pi_j}
+\right)^{n_{ij}-y_{ij}} .
+\]
+By default, $\pi_{M+1} \equiv 1$ is used for identifiability,
+however, this can be changed very easily.
+Note that one can define 
+linear predictors $\eta_{ij}$ of the form
+\begin{equation}
+\label{eq:bradter.logit}
+\logit 
+\left(
+\frac{\pi_i}{\pi_i + \pi_j}
+\right)  =  \log 
+\left(
+\frac{\pi_i}{\pi_j}
+\right)  =  \lambda_i - \lambda_j .
+\end{equation}
+The VGAM{} framework can handle the Bradley-Terry model only for
+intercept-only models; it has
+\begin{equation}
+\label{eq:bradter}
+\lambda_j  =  \eta_j  =  \log\, \pi_j = \beta_{(1)j},
+\ \ \ \ j=1,\ldots,M.
+\end{equation}
+
+
+As well as having many applications in the field of preferences,
+the Bradley-Terry model has many uses in modeling `contests' between
+teams $i$ and $j$, where only one of the teams can win in each
+contest (ties are not allowed under the classical model).
+The {packaging} function \texttt{Brat()} can be used to
+convert a square matrix into one that has more columns, to
+serve as input to \texttt{vglm()}.
+For example,
+for journal citation data where a citation of article B
+by article A is a win for article B and a loss for article A.
+On a specific data set,
+<<>>=
+journal <- c("Biometrika", "Comm.Statist", "JASA", "JRSS-B")
+squaremat <- matrix(c(NA, 33, 320, 284,   730, NA, 813, 276,
+                      498, 68, NA, 325,   221, 17, 142, NA), 4, 4)
+dimnames(squaremat) <- list(winner = journal, loser = journal)
+@
+then \texttt{Brat(squaremat)} returns a $1 \times 12$ matrix.
+
+
+
+
+
+
+
+\subsubsection{Bradley-Terry model with ties}
+\label{sec:cat.bratt}
+
+
+The \VGAM{} family function \texttt{bratt()} implements
+a Bradley-Terry model with ties (no preference), e.g.,
+where both $T_i$ and $T_j$ are equally good or bad.
+Here we assume
+\begin{eqnarray*}
+ \pr(T_i > T_j) &=& \frac{\pi_i}{\pi_i + \pi_j + \pi_0},
+\ \qquad
+ \pr(T_i = T_j)  =  \frac{\pi_0}{\pi_i + \pi_j + \pi_0},
+\end{eqnarray*}
+with $\pi_0 > 0$ as an extra parameter.
+It has 
+\[
+\boldeta=(\log \pi_1,\ldots, \log \pi_{M-1}, \log \pi_{0})^{\top}
+\]
+by default, where there are $M$ competitors and $\pi_M \equiv 1$.
+Like \texttt{brat()}, one can choose a different reference group
+and reference value.
+
+
+Other \R{} packages for the Bradley-Terry model
+include \pkg{BradleyTerry2}
+by H. Turner and D. Firth
+\citep[with and without ties;][]{firth:2005,firth:2008}
+and \pkg{prefmod} \citep{Hatzinger:2009}.
+
+
+
+
+\begin{table}[tt]
+\centering
+\begin{tabular}[small]{|l|c|}
+\hline
+\pkg{VGAM} family function & Independent parameters \\
+\hline
+\texttt{ABO()} & $p, q$ \\
+\texttt{MNSs()} & $m_S, m_s, n_S$ \\
+\texttt{AB.Ab.aB.ab()} & $p$ \\
+\texttt{AB.Ab.aB.ab2()} & $p$ \\
+\texttt{AA.Aa.aa()} & $p_A$ \\
+\texttt{G1G2G3()} & $p_1, p_2, f$ \\
+\hline
+\end{tabular}
+\caption{Some genetic models currently implemented
+and their unique parameters.
+\label{tab:gen.all}
+}
+\end{table}
+
+
+
+
+
+\subsection{Genetic models}
+\label{sec:jsscat.genetic}
+
+
+There are quite a number of population genetic models based on the
+multinomial distribution,
+e.g., \cite{weir:1996}, \cite{lang:2002}.
+Table \ref{tab:gen.all} lists some \pkg{VGAM} family functions for such.
+
+
+
+
+For example the ABO blood group system
+has two independent parameters $p$ and $q$, say.
+Here,
+the blood groups A, B and O form six possible combinations (genotypes)
+consisting of AA, AO, BB, BO, AB, OO
+(see Table \ref{tab:ABO}). A and B are dominant over
+bloodtype O. Let $p$, $q$ and $r$ be the probabilities
+for A, B and O respectively (so that
+$p+q+r=1$) for a given population. 
+The log-likelihood function is 
+\[
+\ell(p,q) \;=\; n_A\, \log(p^2 + 2pr) + n_B\, \log(q^2 + 2qr) + n_{AB}\,
+\log(2pq) + 2 n_O\, \log(1-p-q),
+\]
+where $r = 1 - p -q$, $p \in (\,0,1\,)$,
+$q \in (\,0,1\,)$, $p+q<1$.
+We let $\boldeta = (g(p), g(r))^{\top}$ where $g$ is the link function.
+Any $g$ from Table \ref{tab:jsscat.links} appropriate for
+a parameter $\theta \in (0,1)$ will do.
+
+
+A toy example where $p=p_A$ and $q=p_B$ is
+<<>>=
+abodat <- data.frame(A = 725, B = 258, AB = 72, O = 1073)
+fit <- vglm(cbind(A, B, AB, O) ~ 1, ABO, data = abodat)
+coef(fit, matrix = TRUE)
+Coef(fit)  # Estimated pA and pB
+@
+The function \texttt{Coef()}, which applies only to intercept-only models,
+applies to $g_{j}(\theta_{j})=\eta_{j}$
+the inverse link function $g_{j}^{-1}$ to $\widehat{\eta}_{j}$
+to give $\widehat{\theta}_{j}$.
+
+
+
+
+
+
+
+\begin{table}[tt]
+% Same as Table 14.1 of E-J, and Table 2.6 of Weir 1996
+\begin{center}
+\begin{tabular}{|l|cc|cc|c|c|}
+\hline
+Genotype   & AA  & AO  & BB  &  BO  & AB  &  OO  \\
+Probability&$p^2$&$2pr$&$q^2$&$ 2qr$&$2pq$& $r^2$\\
+Blood group&  A  &  A  &  B  &  B   &  AB &  O \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Probability table for the ABO blood group system.
+Note that $p$ and $q$ are the parameters and $r=1-p-q$.
+\label{tab:ABO}
+}
+\end{table}
+
+
+
+
+
+\subsection{Three main distributions}
+\label{sec:jsscat.3maindist}
+
+\cite{agre:2002} discusses three main distributions for categorical
+variables: binomial, multinomial, and Poisson
+\citep{thom:2009}.
+All these are well-represented in the \VGAM{} package,
+accompanied by variant forms.
+For example,
+there is a
+\VGAM{} family function named \texttt{mbinomial()}
+which implements a 
+matched-binomial (suitable for matched case-control studies),
+Poisson ordination (useful in ecology for multi-species-environmental data),
+negative binomial families,
+positive and zero-altered and zero-inflated variants,
+and the bivariate odds ratio model
+\citep[\texttt{binom2.or()}; see Section 6.5.6 of][]{mccu:neld:1989}.
+The latter has an \texttt{exchangeable} argument to allow for an
+exchangeable error structure:
+\begin{eqnarray}
+\bH_1  = 
+\left( \begin{array}{cc}
+1 & 0 \\
+1 & 0 \\
+0 & 1 \\
+\end{array} \right), \qquad
+\bH_k  = 
+\left( \begin{array}{cc}
+1 \\
+1 \\
+0 \\
+\end{array} \right), \quad k=2,\ldots,p,
+\label{eqn:blom.exchangeable}
+\end{eqnarray}
+since, for data $(Y_1,Y_2,\bix)$,
+$\logit \, P\!\left( Y_{j} = 1 \Big{|} \bix \right) = 
+\eta_{j}$ for ${j}=1,2$, and
+$\log \, \psi = \eta_{3}$
+where $\psi$ is the odds ratio,
+and so $\eta_{1}=\eta_{2}$.
+Here, \texttt{binom2.or(zero = 3)} by default meaning $\psi$ is
+modelled as an intercept-only
+(in general, \texttt{zero} may be assigned an integer vector
+such that the value $j$ means $\eta_{j} = \beta_{(j)1}$,
+i.e., the $j$th linear/additive predictor is an intercept-only).
+See the online help for all of these models.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Some user-oriented topics}
+\label{sec:jsscat.userTopics}
+
+
+Making the most of \VGAM{} requires an understanding of the general
+VGLM/VGAM framework described Section \ref{sec:jsscat.VGLMVGAMoverview}.
+In this section we connect elements of that framework with the software.
+Before doing so it is noted that
+a fitted \VGAM{} categorical model has access to the usual
+generic functions, e.g.,
+\texttt{coef()} for
+$\left(\widehat{\bbeta}_{(1)}^{*T},\ldots,\widehat{\bbeta}_{(p)}^{*T}\right)^{\top}$
+(see Equation \ref{eqn:lin.coefs4}),
+\texttt{constraints()} for $\bH_k$,
+\texttt{deviance()} for $2\left(\ell_{\mathrm{max}} - \ell\right)$,
+\texttt{fitted()} for $\widehat{\bmu}_i$,
+\texttt{logLik()} for $\ell$,
+\texttt{predict()} for $\widehat{\boldeta}_i$,
+\texttt{print()},
+\texttt{residuals(..., type = "response")} for $\biy_i - \widehat{\bmu}_i$ etc.,
+\texttt{summary()},
+\texttt{vcov()} for $\widehat{\Var}(\widehat{\bbeta})$,
+etc.
+The methods function for the extractor function
+\texttt{coef()} has an argument \texttt{matrix}
+which, when set \texttt{TRUE}, returns $\widehat{\bB}$
+(see Equation \ref{gammod}) as a $p \times M$ matrix,
+and this is particularly useful for confirming that a fit
+has made a parallelism assumption.
+
+
+
+
+
+
+
+\subsection{Common arguments}
+\label{sec:jsscat.commonArgs}
+
+
+The structure of the unified framework given in
+Section \ref{sec:jsscat.VGLMVGAMoverview}
+appears clearly through
+the pool of common arguments
+shared by the
+\VGAM{} family functions in Table \ref{tab:cat.quantities}.
+In particular,
+\texttt{reverse} and
+\texttt{parallel}
+are prominent with CDA.
+These are merely convenient shortcuts for the argument \texttt{constraints},
+which accepts a named list of constraint matrices $\bH_k$.
+For example, setting
+\texttt{cumulative(parallel = TRUE)} would constrain the coefficients $\beta_{(j)k}$
+in (\ref{gammod2}) to be equal for all $j=1,\ldots,M$,
+each separately for $k=2,\ldots,p$.
+That is, $\bH_k = \bone_M$.
+The argument \texttt{reverse} determines the `direction' of
+the parameter or quantity.
+
+Another argument not so much used with CDA is \texttt{zero};
+this accepts a vector specifying which $\eta_j$ is to be modelled as
+an intercept-only; assigning a \texttt{NULL} means none.
+
+
+
+
+
+
+
+
+\subsection{Link functions}
+\label{sec:jsscat.links}
+
+Almost all \VGAM{} family functions
+(one notable exception is \texttt{multinomial()})
+allow, in theory, for any link function to be assigned to each $\eta_j$.
+This provides maximum capability.
+If so then there is an extra argument to pass in any known parameter
+associated with the link function.
+For example, \texttt{link = "logoff", earg = list(offset = 1)}
+signifies a log link with a unit offset:
+$\eta_{j} = \log(\theta_{j} + 1)$ for some parameter $\theta_{j}\ (> -1)$.
+The name \texttt{earg} stands for ``extra argument''.
+Table \ref{tab:jsscat.links} lists some links relevant to categorical data.
+While the default gives a reasonable first choice,
+users are encouraged to try different links.
+For example, fitting a binary regression model
+(\texttt{binomialff()}) to the coal miners data set \texttt{coalminers} with
+respect to the response wheeze gives a
+nonsignificant regression coefficient for $\beta_{(1)3}$ with probit analysis
+but not with a logit link when
+$\eta = \beta_{(1)1} + \beta_{(1)2} \, \mathrm{age} + \beta_{(1)3} \, \mathrm{age}^2$.
+Developers and serious users are encouraged to write and use
+new link functions compatible with \VGAM.
+
+
+
+
+
+
+\begin{table*}[tt]
+\centering
+\medskip
+\begin{tabular}{|l|c|c|}
+\hline
+Link function & $g(\theta)$ & Range of $\theta$ \\
+\hline
+\texttt{cauchit()} & $\tan(\pi(\theta-\frac12))$ & $(0,1)$ \\
+\texttt{cloglog()} & $\log_e\{-\log_e(1 - \theta)\}$ & $(0,1)$ \\
+\texttt{fisherz()} & 
+$\frac12\,\log_e\{(1 + \theta)/(1 - \theta)\}$ & $(-1,1)$ \\
+\texttt{identity()} & $\theta$ & $(-\infty,\infty)$ \\
+\texttt{logc()} & $\log_e(1 - \theta)$ & $(-\infty,1)$ \\
+\texttt{loge()} & $\log_e(\theta)$ & $(0,\infty)$ \\
+\texttt{logit()} & $\log_e(\theta/(1 - \theta))$ & $(0,1)$ \\
+\texttt{logoff()} & $\log_e(\theta + A)$ & $(-A,\infty)$ \\
+\texttt{probit()} & $\Phi^{-1}(\theta)$ & $(0,1)$ \\
+\texttt{rhobit()} & $\log_e\{(1 + \theta)/(1 - \theta)\}$ & $(-1,1)$ \\
+\hline
+\end{tabular}
+\caption{
+Some \VGAM{} link functions pertinent to this article.
+\label{tab:jsscat.links}
+}
+\end{table*}
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Examples}
+\label{sec:jsscat.eg}
+
+This section illustrates CDA modeling on three
+data sets in order to give a flavour of what is available in the package.
+
+
+
+
+%20130919
+%Note: 
+%\subsection{2008 World Fly Fishing Championships}
+%\label{sec:jsscat.eg.WFFC}
+%are deleted since there are problems with accessing the \texttt{wffc.nc}
+%data etc. since they are now in \pkg{VGAMdata}.
+
+
+
+
+
+
+
+\subsection{Marital status data}
+\label{sec:jsscat.eg.mstatus}
+
+We fit a nonparametric multinomial logit model to data collected from
+a self-administered questionnaire administered in a large New Zealand
+workforce observational study conducted during 1992--3.
+The data were augmented by a second study consisting of retirees.
+For homogeneity, this analysis is restricted
+to a subset of 6053 European males with no missing values.
+The ages ranged between 16 and 88 years.
+The data can be considered a reasonable representation of the white
+male New Zealand population in the early 1990s, and
+are detailed in \cite{macm:etal:1995} and \cite{yee:wild:1996}.
+We are interested in exploring how $Y=$ marital status varies as a function
+of $x_2=$ age. The nominal response $Y$ has four levels;
+in sorted order, they are divorced or separated, married or partnered,
+single and widower.
+We will write these levels as $Y=1$, $2$, $3$, $4$, respectively,
+and will choose the married/partnered (second level) as the reference group
+because the other levels emanate directly from it.
+
+Suppose the data is in a data frame called \texttt{marital.nz}
+and looks like
+<<>>=
+head(marital.nz, 4)
+summary(marital.nz)
+@
+We fit the VGAM
+<<>>=
+fit.ms <- vgam(mstatus ~ s(age, df = 3), multinomial(refLevel = 2),
+               data = marital.nz)
+@
+
+Once again let's firstly check the input.
+<<>>=
+head(depvar(fit.ms), 4)
+colSums(depvar(fit.ms))
+@
+This seems okay.
+
+
+
+
+Now the estimated component functions $\widehat{f}_{(s)2}(x_2)$
+may be plotted with
+<<fig=F>>=
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+par(mfrow = c(2, 2))
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlayed
+#par(mfrow=c(1,1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+@
+to produce Figure \ref{fig:jsscat.eg.mstatus}.
+The \texttt{scale} argument is used here to ensure that the $y$-axes have
+a common scale---this makes comparisons between the component functions
+less susceptible to misinterpretation.
+The first three plots are the (centered) $\widehat{f}_{(s)2}(x_2)$ for
+$\eta_1$,
+$\eta_2$,
+$\eta_3$,
+where
+\begin{eqnarray}
+\label{eq:jsscat.eg.nzms.cf}
+\eta_{s}  = 
+\log(\pr(Y={t}) / \pr(Y={2}))  = 
+\beta_{(s)1} + f_{(s)2}(x_2),
+\end{eqnarray}
+$(s,t) = (1,1), (2,3), (3,4)$,
+and $x_2$ is \texttt{age}.
+The last plot are the smooths overlaid to aid comparison.
+
+
+It may be seen that the $\pm 2$ standard error bands
+about the \texttt{Widowed} group is particularly wide at
+young ages because of a paucity of data, and
+likewise at old ages amongst the \texttt{Single}s.
+The $\widehat{f}_{(s)2}(x_2)$ appear as one would expect.
+The log relative risk of
+being single relative to being married/partnered drops sharply from
+ages 16 to 40.
+The fitted function for the \texttt{Widowed} group increases
+with \texttt{age} and looks reasonably linear.
+The $\widehat{f}_{(1)2}(x_2)$
+suggests a possible maximum around 50 years old---this
+could indicate the greatest marital conflict occurs during
+the mid-life crisis years!
+
+
+
+\setkeys{Gin}{width=0.9\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.6,echo=FALSE>>=
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+ par(mfrow = c(2, 2))
+ par(mar = c(4.2, 4.0, 1.2, 2.2) + 0.1)
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlaid
+#par(mfrow = c(1, 1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+@
+\caption{
+Fitted (and centered) component functions
+$\widehat{f}_{(s)2}(x_2)$
+from the NZ marital status data
+(see Equation \ref{eq:jsscat.eg.nzms.cf}).
+The bottom RHS plot are the smooths overlaid.
+\label{fig:jsscat.eg.mstatus}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+The methods function for \texttt{plot()} can also plot the
+derivatives of the smooths.
+The call
+<<fig=F>>=
+plot(fit.ms, deriv=1, lcol=mycol, scale=0.3)
+@
+results in Figure \ref{fig:jsscat.eg.mstatus.cf.deriv}.
+Once again the $y$-axis scales are commensurate.
+
+\setkeys{Gin}{width=\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=7.2,height=2.4,echo=FALSE>>=
+# Plot output
+ par(mfrow = c(1, 3))
+ par(mar = c(4.5, 4.0, 0.2, 2.2) + 0.1)
+plot(fit.ms, deriv = 1, lcol = mycol, scale = 0.3)
+@
+\caption{
+Estimated first derivatives of the component functions,
+$\widehat{f'}_{(s)2}(x_2)$,
+from the NZ marital status data
+(see Equation \ref{eq:jsscat.eg.nzms.cf}).
+\label{fig:jsscat.eg.mstatus.cf.deriv}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+The derivative for the \texttt{Divorced/Separated} group appears
+linear so that a quadratic component function could be tried.
+Not surprisingly the \texttt{Single} group shows the greatest change;
+also, $\widehat{f'}_{(2)2}(x_2)$ is approximately linear till 50
+and then flat---this suggests one could fit a piecewise quadratic
+function to model that component function up to 50 years.
+The \texttt{Widowed} group appears largely flat.
+We thus fit the parametric model
+<<>>=
+foo <- function(x, elbow = 50)
+  poly(pmin(x, elbow), 2)
+
+clist <- list("(Intercept)" = diag(3),
+             "poly(age, 2)" = rbind(1, 0, 0),
+             "foo(age)"     = rbind(0, 1, 0),
+             "age"          = rbind(0, 0, 1))
+fit2.ms <-
+    vglm(mstatus ~ poly(age, 2) + foo(age) + age,
+         family = multinomial(refLevel = 2),
+         constraints = clist,
+         data = marital.nz)
+@
+Then
+<<>>=
+coef(fit2.ms, matrix = TRUE)
+@
+confirms that one term was used for each component function.
+The plots from
+<<fig=F>>=
+par(mfrow = c(2, 2))
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol=mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+@
+are given in Figure \ref{fig:jsscat.eg.mstatus.vglm}
+and appear like
+Figure \ref{fig:jsscat.eg.mstatus}.
+
+
+\setkeys{Gin}{width=0.9\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.6,echo=FALSE>>=
+# Plot output
+par(mfrow=c(2,2))
+ par(mar=c(4.5,4.0,1.2,2.2)+0.1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol = mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+@
+\caption{
+Parametric version of \texttt{fit.ms}: \texttt{fit2.ms}.
+The component functions are now quadratic, piecewise quadratic/zero,
+or linear.
+\label{fig:jsscat.eg.mstatus.vglm}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+It is possible to perform very crude inference based on heuristic theory
+of a deviance test:
+<<>>=
+deviance(fit.ms) - deviance(fit2.ms)
+@
+is small, so it seems the parametric model is quite reasonable
+against the original nonparametric model.
+Specifically,
+the difference in the number of `parameters' is approximately
+<<>>=
+(dfdiff <- df.residual(fit2.ms) - df.residual(fit.ms))
+@
+which gives an approximate $p$ value of
+<<>>=
+pchisq(deviance(fit.ms) - deviance(fit2.ms), df = dfdiff, lower.tail = FALSE)
+@
+Thus \texttt{fit2.ms} appears quite reasonable.
+
+
+
+
+
+
+
+
+The estimated probabilities of the original fit can be plotted
+against \texttt{age} using
+<<fig=F>>=
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo, ],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",  # main="Marital status amongst NZ Male Europeans",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,  # x="topright",
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+@
+which gives Figure \ref{fig:jsscat.eg.mstatus.fitted}.
+This shows that between 80--90\% of NZ white males
+aged between their early 30s to mid-70s
+were married/partnered.
+The proportion widowed
+started to rise steeply from 70 years onwards but remained below 0.5
+since males die younger than females on average.
+
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=4.8,echo=FALSE>>=
+ par(mfrow = c(1,1))
+ par(mar = c(4.5,4.0,0.2,0.2)+0.1)
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo,],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2.1)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+@
+\caption{
+Fitted probabilities for each class for the
+NZ male European
+marital status data
+(from Equation \ref{eq:jsscat.eg.nzms.cf}).
+\label{fig:jsscat.eg.mstatus.fitted}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+
+
+
+\subsection{Stereotype model}
+\label{sec:jsscat.eg.grc.stereotype}
+
+We reproduce some of the analyses of \cite{ande:1984} regarding the
+progress of 101 patients with back pain
+using the data frame \texttt{backPain} from \pkg{gnm}
+\citep{Rnews:Turner+Firth:2007,Turner+Firth:2009}.
+The three prognostic variables are
+length of previous attack ($x_1=1,2$),
+pain change ($x_2=1,2,3$) 
+and lordosis ($x_3=1,2$).
+Like him, we treat these as numerical and standardize and negate them.
+%
+The output
+<<>>=
+# Scale the variables? Yes; the Anderson (1984) paper did (see his Table 6).
+head(backPain, 4)
+summary(backPain)
+backPain <- transform(backPain, sx1 = -scale(x1), sx2 = -scale(x2), sx3 = -scale(x3))
+@
+displays the six ordered categories.
+Now a rank-1 stereotype model can be fitted with
+<<>>=
+bp.rrmlm1 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain)
+@
+Then
+<<>>=
+Coef(bp.rrmlm1)
+@
+are the fitted \bA, \bC{} and $\bB_1$ (see Equation \ref{eq:rrr.BAC}) and
+Table \ref{tab:rrvglam.jss.subset}) which agrees with his Table 6.
+Here, what is known as ``corner constraints'' is used
+($(1,1)$ element of \bA{} $\equiv 1$),
+and only the intercepts are not subject to any reduced-rank regression
+by default.
+The maximized log-likelihood from \textsl{\texttt{logLik(bp.rrmlm1)}}
+is $\Sexpr{round(logLik(bp.rrmlm1), 2)}$.
+The standard errors of each parameter can be obtained by
+\textsl{\texttt{summary(bp.rrmlm1)}}.
+The negative elements of $\widehat{\bC}$ imply the
+latent variable $\widehat{\nu}$ decreases in value with increasing
+\textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}}.
+The elements of $\widehat{\bA}$ tend to decrease so it suggests
+patients get worse as $\nu$ increases,
+i.e., get better as \textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}} increase.
+
+
+
+
+
+
+<<echo=FALSE>>=
+set.seed(123)
+@
+A rank-2 model fitted \textit{with a different normalization}
+<<>>=
+bp.rrmlm2 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain, Rank = 2,
+                   Corner = FALSE, Uncor = TRUE)
+@
+produces uncorrelated $\widehat{\bnu}_i = \widehat{\bC}^{\top} \bix_{2i}$.
+In fact \textsl{\texttt{var(lv(bp.rrmlm2))}} equals $\bI_2$
+so that the latent variables are also scaled to have unit variance.
+The fit was biplotted
+(rows of $\widehat{\bC}$ plotted as arrow;
+ rows of $\widehat{\bA}$ plotted as labels) using
+<<figure=F>>=
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1, 6), ylim = c(-1.2, 4),  # Use this if not scaled
+       xlim = c(-4.5, 2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+@
+to give Figure \ref{fig:jsscat.eg.rrmlm2.backPain}.
+It is interpreted via inner products due to (\ref{eq:rrr.BAC}).
+The different normalization means that the interpretation of $\nu_1$
+and $\nu_2$ has changed, e.g., increasing
+\textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}} results in increasing $\widehat{\nu}_1$ and
+patients improve more.
+Many of the latent variable points $\widehat{\bnu}_i$ are coincidental
+due to discrete nature of the $\bix_i$. The rows of $\widehat{\bA}$
+are centered on the blue labels (rather cluttered unfortunately) and
+do not seem to vary much as a function of $\nu_2$.
+In fact this is confirmed by \cite{ande:1984} who showed a rank-1
+model is to be preferred.
+
+
+
+This example demonstrates the ability to obtain a low dimensional view
+of higher dimensional data. The package's website has additional
+documentation including more detailed Goodman's RC and stereotype
+examples.
+
+
+
+
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.3,echo=FALSE>>=
+# Plot output
+ par(mfrow=c(1,1))
+ par(mar=c(4.5,4.0,0.2,2.2)+0.1)
+
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1,6), ylim = c(-1.2,4),  # Use this if not scaled
+       xlim = c(-4.5,2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+@
+\caption{
+Biplot of a rank-2 reduced-rank multinomial logit (stereotype) model
+fitted to the back pain data.
+A convex hull surrounds the latent variable scores
+$\widehat{\bnu}_i$
+(whose observation numbers are obscured because of their discrete nature).
+The position of the $j$th row of $\widehat{\bA}$
+is the center of the label ``\texttt{log(mu[,j])/mu[,6])}''.
+\label{fig:jsscat.eg.rrmlm2.backPain}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Some implementation details}
+\label{sec:jsscat.implementDetails}
+
+This section describes some implementation details of \VGAM{}
+which will be more of interest to the developer than to the casual user.
+
+
+
+\subsection{Common code}
+\label{sec:jsscat.implementDetails.code}
+
+It is good programming practice to write reusable code where possible.
+All the \VGAM{} family functions in Table \ref{tab:cat.quantities}
+process the response in the same way because the same segment of code
+is executed. This offers a degree of uniformity in terms of how input is
+handled, and also for software maintenance
+(\cite{altm:jack:2010} enumerates good programming techniques and references).
+As well, the default initial values are computed in the same manner
+based on sample proportions of each level of $Y$.
+
+
+
+
+
+\subsection[Matrix-band format of wz]{Matrix-band format of \texttt{wz}}
+\label{sec:jsscat.implementDetails.mbformat}
+
+The working weight matrices $\bW_i$ may become large for categorical
+regression models. In general, we have to evaluate the $\bW_i$
+for $i=1,\ldots,n$, and naively, this could be held in an \texttt{array} of
+dimension \texttt{c(M, M, n)}. However, since the $\bW_i$ are symmetric
+positive-definite it suffices to only store the upper or lower half of
+the matrix.
+
+
+
+The variable \texttt{wz} in \texttt{vglm.fit()}
+stores the working weight matrices $\bW_i$ in 
+a special format called the \textit{matrix-band} format. This
+format comprises a $n \times M^*$ matrix where
+\[
+M^*  =  \sum_{i=1}^{\footnotesize \textit{hbw}} \;
+\left(M-i+1\right)  =  
+\frac12 \, \textit{hbw}\, \left(2\,M - \textit{hbw} +1\right)
+\]
+is the number of columns. Here, \textit{hbw} refers to the
+\textit{half-bandwidth} of the matrix, which is an integer
+between 1 and $M$ inclusive. A diagonal matrix has
+unit half-bandwidth, a tridiagonal matrix has half-bandwidth 2, etc.
+
+
+Suppose $M=4$. Then \texttt{wz} will have up to $M^*=10$ columns
+enumerating the unique elements of $\bW_i$ as follows:
+\begin{eqnarray}
+\bW_i  =  
+\left( \begin{array}{rrrr}
+1 & 5 & 8 & 10 \\
+  & 2 & 6 & 9 \\
+  &   & 3 & 7 \\
+  &   &   & 4 
+\end{array} \right).
+\label{eqn:hbw.eg}
+\end{eqnarray}
+That is, the order is firstly the diagonal, then the band above that,
+followed by the second band above the diagonal etc.
+Why is such a format adopted? 
+For this example, if $\bW_i$ is diagonal then only the first 4 columns
+of \texttt{wz} are needed. If $\bW_i$ is tridiagonal then only the
+first 7 columns of \texttt{wz} are needed. 
+If $\bW_i$ \textit{is} banded then \texttt{wz} needs not have
+all $\frac12 M(M+1)$ columns; only $M^*$ columns suffice, and the
+rest of the elements of $\bW_i$ are implicitly zero.
+As well as reducing the size of \texttt{wz} itself in most cases, the
+matrix-band format often makes the computation of \texttt{wz} very
+simple and efficient. Furthermore, a Cholesky decomposition of a
+banded matrix will be banded. A final reason is that sometimes we
+want to input $\bW_i$ into \VGAM: if \texttt{wz} is $M \times M \times
+n$ then \texttt{vglm(\ldots, weights = wz)} will result in an error
+whereas it will work if \texttt{wz} is an $n \times M^*$ matrix.
+
+
+
+To facilitate the use of the matrix-band format,
+a few auxiliary functions have been written.
+In particular, there is \texttt{iam()} which gives the indices
+for an array-to-matrix.
+In the $4\times 4$ example above,
+<<>>=
+iam(NA, NA, M = 4, both = TRUE, diag = TRUE)
+@
+returns the indices for the respective array coordinates for
+successive columns of matrix-band format
+(see Equation \ref{eqn:hbw.eg}).
+If \texttt{diag = FALSE} then the first 4 elements in each vector
+are omitted. Note that the first two arguments of 
+\texttt{iam()} are not used here and have been assigned
+\texttt{NA}s for simplicity.
+For its use on the multinomial logit model, where
+$(\bW_i)_{jj} = w_i\,\mu_{ij} (1-\mu_{ij}),\ j=1,\ldots,M$, and 
+$(\bW_i)_{jk} = -w_i\,\mu_{ij} \mu_{ik},\ j\neq k$,
+this can be programmed succinctly like
+\begin{Code}
+wz <- mu[, 1:M] * (1 - mu[, 1:M])
+if (M > 1) {
+  index <- iam(NA, NA, M = M, both = TRUE, diag = FALSE)
+  wz <- cbind(wz, -mu[, index$row] * mu[, index$col])
+}
+wz <- w * wz
+\end{Code}
+(the actual code is slightly more complicated).
+In general, \VGAM{} family functions can be remarkably compact,
+e.g.,
+\texttt{acat()},
+\texttt{cratio()}
+and
+\texttt{multinomial()} are all less than 120 lines of code each.
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Extensions and utilities}
+\label{sec:jsscat.extnUtil}
+
+This section describes some useful utilities/extensions of the above.
+
+
+
+\subsection{Marginal effects}
+\label{sec:jsscat.extnUtil.margeff}
+
+
+Models such as the multinomial logit and cumulative link models
+model the posterior probability $p_{j} = \pr(Y=j|\bix)$ directly.
+In some applications, knowing the derivative of $p_{j}$
+with respect to some of the $x_k$ is useful;
+in fact, often just knowing the sign is important.
+The function \texttt{margeff()} computes the derivatives and
+returns them as a $p \times (M+1) \times n$ array.
+For the multinomial logit model it is easy to show
+\begin{eqnarray}
+\frac{\partial \, p_{j}(\bix_i)}{\partial \,
+\bix_{i}}
+&=&
+p_{j}(\bix_i)
+\left\{
+ \bbeta_{j} -
+\sum_{s=1}^{M+1}
+p_{s}(\bix_i)
+\,
+ \bbeta_{s}
+\right\},
+\label{eqn:multinomial.marginalEffects}
+\end{eqnarray}
+while for
+\texttt{cumulative(reverse = FALSE)}
+we have
+$p_{j} = \gamma_{j} - \gamma_{j-1} = h(\eta_{j}) - h(\eta_{j-1})$
+where $h=g^{-1}$ is the inverse of the link function
+(cf. Table \ref{tab:cat.quantities})
+so that
+\begin{eqnarray}
+\frac{\partial \, p_{j}(\bix_{})}{\partial \,
+\bix}
+&=&
+h'(\eta_{j}) \, \bbeta_{j} -
+h'(\eta_{j-1}) \, \bbeta_{j-1} .
+\label{eqn:cumulative.marginalEffects}
+\end{eqnarray}
+
+
+
+
+The function \texttt{margeff()} returns an array with these
+derivatives and should handle any value of
+\texttt{reverse} and \texttt{parallel}.
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\subsection[The xij argument]{The \texttt{xij} argument}
+\label{sec:jsscat.extnUtil.xij}
+
+There are many models, including those for categorical data,
+where the value of an explanatory variable $x_k$ differs depending
+on which linear/additive predictor $\eta_{j}$.
+Here is a well-known example from {consumer choice} modeling.
+Suppose an econometrician is interested in peoples'
+choice of transport for travelling to work
+and that there are four choices:
+$Y=1$ for ``bus'',
+$Y=2$ ``train'', 
+$Y=3$ ``car'' and
+$Y=4$ means ``walking''.
+Assume that people only choose one means to go to work.
+Suppose there are three covariates:
+$X_2=$ cost,
+$X_3=$ journey time, and
+$X_4=$ distance.
+Of the covariates only $X_4$ (and the intercept $X_1$)
+is the same for all transport choices;
+the cost and journey time differ according to the means chosen.
+Suppose a random sample of $n$ people is collected
+from some population, and that each person has
+access to all these transport modes.
+For such data, a natural regression model would be a 
+multinomial logit model with $M=3$:
+for $j=1,\ldots,M$, we have
+$\eta_{j} =$
+\begin{eqnarray}
+\log \frac{\pr(Y=j)}{\pr(Y=M+1)}
+&=&
+\beta_{(j)1}^{*} +
+\beta_{(1)2}^{*} \, (x_{i2j}-x_{i24}) +
+\beta_{(1)3}^{*} \, (x_{i3j}-x_{i34}) +
+\beta_{(1)4}^{*} \, x_{i4},
+\label{eqn:xij.eg.gotowork}
+\end{eqnarray}
+where, for the $i$th person,
+$x_{i2j}$ is the cost for the $j$th transport means, and
+$x_{i3j}$ is the journey time of the $j$th transport means.
+The distance to get to work is $x_{i4}$; it has the same value
+regardless of the transport means.
+
+
+Equation \ref{eqn:xij.eg.gotowork}
+implies $\bH_1=\bI_3$ and $\bH_2=\bH_3=\bH_4=\bone_3$.
+Note
+also that if the last response category is used as the baseline or
+reference group (the default of \texttt{multinomial()}) then $x_{ik,M+1}$
+can be subtracted from $x_{ikj}$ for $j=1,\ldots,M$---this
+is the natural way $x_{ik,M+1}$ enters into the model.
+
+
+
+
+Recall from (\ref{gammod2}) that we had
+\begin{equation}
+\eta_j(\bix_i)  =  \bbeta_j^{\top} \bix_i  = 
+\sum_{k=1}^{p} \, x_{ik} \, \beta_{(j)k} .
+\label{eqn:xij0}
+\end{equation}
+Importantly, this can be generalized to
+\begin{equation}
+\eta_j(\bix_{ij})  =  \bbeta_j^{\top} \bix_{ij}  = 
+\sum_{k=1}^{p} \, x_{ikj} \, \beta_{(j)k} ,
+\label{eqn:xij}
+\end{equation}
+or writing this another way (as a mixture or hybrid),
+\begin{equation}
+\eta_j(\bix_{i}^{*},\bix_{ij}^{*})  =  
+\bbeta_{j}^{*T} \bix_{i}^{*} + \bbeta_{j}^{**T} \bix_{ij}^{*} .
+\label{eqn:xij2}
+\end{equation}
+Often $\bbeta_{j}^{**} = \bbeta_{}^{**}$, say.
+In (\ref{eqn:xij2}) the variables in $\bix_{i}^{*}$ are common to
+all $\eta_{j}$, and the variables in $\bix_{ij}^{*}$ have
+different values for differing $\eta_{j}$.
+This allows for covariate values that are specific to each $\eta_j$,
+a facility which is very important in many applications.
+
+
+The use of the \texttt{xij} argument with the \VGAM{} family function
+\texttt{multinomial()} has very important applications in economics.
+In that field the term ``multinomial logit model'' includes a variety of
+models such as the ``generalized logit model'' where (\ref{eqn:xij0})
+holds, the ``conditional logit model'' where (\ref{eqn:xij}) holds,
+and the ``mixed logit model,'' which is a combination of the two,
+where (\ref{eqn:xij2}) holds.
+The generalized logit model focusses on the individual as the unit of
+analysis, and uses individual characteristics as explanatory variables,
+e.g., age of the person in the transport example.
+The conditional logit model assumes different values for each
+alternative and the impact of a unit of $x_k$ is assumed to be constant
+across alternatives, e.g., journey time in the choice of transport mode.
+Unfortunately, there is confusion in the literature for the terminology
+of the models. Some authors call \texttt{multinomial()}
+with (\ref{eqn:xij0}) the ``generalized logit model''.
+Others call the mixed
+logit model the ``multinomial logit model'' and view the generalized
+logit and conditional logit models as special cases.
+In \VGAM{} terminology there is no need to give different names to
+all these slightly differing special cases. They are all still called
+multinomial logit models, although it may be added that there are
+some covariate-specific linear/additive predictors.
+The important thing is that the framework accommodates $\bix_{ij}$,
+so one tries to avoid making life unnecessarily complicated.
+And \texttt{xij} can apply in theory to any VGLM and not just to the
+multinomial logit model.
+\cite{imai:king:lau:2008} present another perspective on the
+$\bix_{ij}$ problem with illustrations from \pkg{Zelig}
+\citep{Zelig:2009}.
+
+
+
+
+
+\subsubsection[Using the xij argument]{Using the \texttt{xij} argument}
+\label{sec:xij.sub}
+
+\VGAM{} handles variables whose values depend on $\eta_{j}$,
+(\ref{eqn:xij2}), using the \texttt{xij} argument.
+It is assigned an S formula or a list of \proglang{S} formulas.
+Each formula, which must have $M$ \textit{different} terms,
+forms a matrix that premultiplies a constraint matrix.
+In detail, (\ref{eqn:xij0}) can be written in vector form as
+\begin{equation}
+\boldeta(\bix_i)  =  \bB^{\top} \bix_i  = 
+\sum_{k=1}^{p} \, \bH_{k} \, \bbeta_{k}^{*} \, x_{ik},
+\label{eqn:xij0.vector}
+\end{equation}
+where
+$\bbeta_{k}^{*} =
+\left( \beta_{(1)k}^{*},\ldots,\beta_{(r_k)k}^{*} \right)^{\top}$
+is to be estimated.
+This may be written
+\begin{eqnarray}
+\boldeta(\bix_{i})
+&=&
+\sum_{k=1}^{p} \, \diag(x_{ik},\ldots,x_{ik}) \,
+\bH_k \, \bbeta_{k}^{*}.
+\label{eqn:xij.d.vector}
+\end{eqnarray}
+To handle (\ref{eqn:xij})--(\ref{eqn:xij2})
+we can generalize (\ref{eqn:xij.d.vector}) to
+\begin{eqnarray}
+\boldeta_i
+&=&
+\sum_{k=1}^{p} \, \diag(x_{ik1},\ldots,x_{ikM}) \;
+\bH_k \, \bbeta_{k}^{*}
+\ \ \ \ \left(=
+\sum_{k=1}^{p} \, \bX_{(ik)}^{*} \,
+\bH_k \, \bbeta_{k}^{*} ,
+\mathrm{\ say} \right).
+\label{eqn:xij.vector}
+\end{eqnarray}
+Each component of the list \texttt{xij} is a formula having $M$ terms
+(ignoring the intercept) which
+specifies the successive diagonal elements of the matrix $\bX_{(ik)}^{*}$.
+Thus each row of the constraint matrix may be multiplied by a different
+vector of values.
+The constraint matrices themselves are not affected by the
+\texttt{xij} argument.
+
+
+
+
+
+How can one fit such models in \VGAM{}?
+Let us fit (\ref{eqn:xij.eg.gotowork}).
+Suppose the journey cost and time variables have had the
+cost and time of walking subtracted from them.
+Then,
+using ``\texttt{.trn}'' to denote train,
+\begin{Code}
+fit2 <- vglm(cbind(bus, train, car, walk) ~ Cost + Time + Distance,
+             fam = multinomial(parallel = TRUE ~ Cost + Time + Distance - 1),
+             xij = list(Cost ~ Cost.bus + Cost.trn + Cost.car,
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  Cost.bus + Cost.trn + Cost.car +
+                        Time.bus + Time.trn + Time.car +
+                        Cost + Time + Distance,
+             data = gotowork)
+\end{Code}
+should do the job.
+Here, the argument \texttt{form2} is assigned a second \proglang{S} formula which
+is used in some special circumstances or by certain types
+of \VGAM{} family functions.
+The model has $\bH_{1} = \bI_{3}$ and $\bH_{2} = \bH_{3} = \bH_{4} = \bone_{3}$
+because the lack of parallelism only applies to the intercept.
+However, unless \texttt{Cost} is the same as \texttt{Cost.bus} and
+\texttt{Time} is the same as \texttt{Time.bus},
+this model should not be plotted with \texttt{plotvgam()};
+see the author's homepage for further documentation.
+
+
+By the way,
+suppose 
+$\beta_{(1)4}^{*}$
+in (\ref{eqn:xij.eg.gotowork})
+is replaced by $\beta_{(j)4}^{*}$.
+Then the above code but with
+\begin{Code}
+  fam = multinomial(parallel = FALSE ~ 1 + Distance),
+\end{Code}
+should fit this model.
+Equivalently,
+\begin{Code}
+  fam = multinomial(parallel = TRUE ~ Cost + Time - 1),
+\end{Code}
+
+
+
+
+
+
+\subsubsection{A more complicated example}
+\label{sec:xij.complicated}
+
+The above example is straightforward because the
+variables were entered linearly. However, things
+become more tricky if data-dependent functions are used in
+any \texttt{xij} terms, e.g., \texttt{bs()}, \texttt{ns()} or \texttt{poly()}.
+In particular, regression splines such as \texttt{bs()} and \texttt{ns()}
+can be used to estimate a general smooth function $f(x_{ij})$, which is
+very useful for exploratory data analysis.
+
+
+
+Suppose we wish to fit the variable \texttt{Cost} with a smoother.
+This is possible with regression splines and using a trick.
+Firstly note that
+\begin{Code}
+fit3 <- vglm(cbind(bus, train, car, walk) ~ ns(Cost) + Time + Distance,
+             multinomial(parallel = TRUE ~ ns(Cost) + Time + Distance - 1),
+             xij = list(ns(Cost) ~ ns(Cost.bus) + ns(Cost.trn) + ns(Cost.car),
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  ns(Cost.bus) + ns(Cost.trn) + ns(Cost.car) +
+                        Time.bus + Time.trn + Time.car +
+                        ns(Cost) + Cost + Time + Distance,
+             data = gotowork)
+\end{Code}
+will \textit{not} work because the basis functions for
+\texttt{ns(Cost.bus)}, \texttt{ns(Cost.trn)} and \texttt{ns(Cost.car)}
+are not identical since the knots differ.
+Consequently, they represent different functions despite
+having common regression coefficients.
+
+
+Fortunately, it is possible to force the \texttt{ns()} terms
+to have identical basis functions by using a trick:
+combine the vectors temporarily.
+To do this, one can let
+\begin{Code}
+NS <- function(x, ..., df = 3)
+      sm.ns(c(x, ...), df = df)[1:length(x), , drop = FALSE]
+\end{Code}
+This computes a natural cubic B-spline evaluated at \texttt{x} but it uses the
+other arguments as well to form an overall vector from which to obtain
+the (common) knots.
+Then the usage of \texttt{NS()} can be something like
+\begin{Code}
+fit4 <- vglm(cbind(bus, train, car, walk) ~ NS(Cost.bus, Cost.trn, Cost.car)
+                                          + Time + Distance,
+             multinomial(parallel = TRUE ~  NS(Cost.bus, Cost.trn, Cost.car)
+                                          + Time + Distance - 1),
+             xij = list(NS(Cost.bus, Cost.trn, Cost.car) ~
+                        NS(Cost.bus, Cost.trn, Cost.car) +
+                        NS(Cost.trn, Cost.car, Cost.bus) +
+                        NS(Cost.car, Cost.bus, Cost.trn),
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  NS(Cost.bus, Cost.trn, Cost.car) +
+                        NS(Cost.trn, Cost.car, Cost.bus) +
+                        NS(Cost.car, Cost.bus, Cost.trn) +
+                        Time.bus + Time.trn + Time.car +
+                        Cost.bus + Cost.trn + Cost.car +
+                        Time + Distance,
+             data = gotowork)
+\end{Code}
+So \texttt{NS(Cost.bus, Cost.trn, Cost.car)}
+is the smooth term for
+\texttt{Cost.bus}, etc.
+Furthermore, \texttt{plotvgam()} may be applied to
+\texttt{fit4}, in which case the fitted regression spline is plotted
+against its first inner argument, viz. \texttt{Cost.bus}.
+
+
+One of the reasons why it will predict correctly, too,
+is due to ``smart prediction''
+\citep{Rnews:Yee:2008}.
+
+
+
+\subsubsection{Implementation details} 
+\label{sec:jss.xij.implementationDetails} 
+
+The \texttt{xij} argument operates \textit{after} the
+ordinary $\bX_{\sVLM}$ matrix is created. Then selected columns
+of $\bX_{\sVLM}$ are modified from the constraint matrices, \texttt{xij}
+and \texttt{form2} arguments. That is, from \texttt{form2}'s model
+matrix $\bX_{\sformtwo}$, and the $\bH_k$. This whole operation
+is possible because $\bX_{\sVLM}$ remains structurally the same.
+The crucial equation is (\ref{eqn:xij.vector}).
+
+
+Other \texttt{xij} examples are given in the online help of
+\texttt{fill()} and \texttt{vglm.control()},
+as well as at the package's webpage.
+
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Discussion}
+\label{sec:jsscat.discussion}
+
+
+This article has sought to convey how VGLMs/VGAMs are well suited for
+fitting regression models for categorical data. Its primary strength
+is its simple and unified framework, and when reflected in software,
+makes practical CDA more understandable and efficient. Furthermore,
+there are natural extensions such as a reduced-rank variant and
+covariate-specific $\eta_{j}$. The \VGAM{} package potentially offers
+a wide selection of models and utilities.
+
+
+There is much future work to do.
+Some useful additions to the package include:
+\begin{enumerate}
+
+\item
+Bias-reduction \citep{firt:1993} is a method for removing the $O(n^{-1})$
+bias from a maximum likelihood estimate. For a substantial class of
+models including GLMs it can be formulated in terms of a minor adjustment
+of the score vector within an IRLS algorithm \citep{kosm:firt:2009}.
+One by-product, for logistic regression, is that while the maximum
+likelihood estimate (MLE) can be infinite, the adjustment leads to
+estimates that are always finite. At present the \R{} package \pkg{brglm}
+\citep{Kosmidis:2008} implements bias-reduction for a number of models.
+Bias-reduction might be implemented by adding an argument
+\texttt{bred = FALSE}, say, to some existing \VGAM{} family functions.
+
+
+\item
+Nested logit models were developed to overcome a fundamental shortcoming
+related to the multinomial logit model, viz. the independence of
+irrelevant alternatives (IIA) assumption. Roughly, the multinomial logit
+model assumes the ratio of the choice probabilities of two alternatives
+is not dependent on the presence or absence of other alternatives in
+the model. This presents problems that are often illustrated by the
+famed red bus-blue bus problem.
+
+
+
+
+\item
+The generalized estimating equations (GEE) methodology is largely
+amenable to IRLS and this should be added to the package in the future
+\citep{wild:yee:1996}.
+
+
+\item
+For logistic regression \proglang{SAS}'s \code{proc logistic} gives
+a warning if the data is {completely separate} or {quasi-completely
+separate}. Its effects are that some regression coefficients tend to $\pm
+\infty$. With such data, all (to my knowledge) \R{} implementations
+give warnings that are vague, if any at all, and this is rather
+unacceptable \citep{alli:2004}. The \pkg{safeBinaryRegression} package
+\citep{Konis:2009} overloads \code{glm()} so that a check for the
+existence of the MLE is made before fitting a binary response GLM.
+
+
+\end{enumerate}
+
+
+In closing, the \pkg{VGAM} package is continually being developed,
+therefore some future changes in the implementation details and usage
+may occur. These may include non-backward-compatible changes (see the
+\code{NEWS} file.) Further documentation and updates are available at
+the author's homepage whose URL is given in the \code{DESCRIPTION} file.
+
+
+
+% ----------------------------------------------------------------------
+\section*{Acknowledgments}
+
+The author thanks Micah Altman, David Firth and Bill Venables for helpful
+conversations, and Ioannis Kosmidis for a reprint.
+Thanks also to The Institute for Quantitative Social Science at Harvard
+University for their hospitality while this document was written during a
+sabbatical visit.
+
+
+
+
+
+\bibliography{categoricalVGAMbib}
+
+\end{document}
+
+
+
+
diff --git a/inst/doc/categoricalVGAM.pdf b/inst/doc/categoricalVGAM.pdf
new file mode 100644
index 0000000..eb6c7d2
Binary files /dev/null and b/inst/doc/categoricalVGAM.pdf differ
diff --git a/inst/doc/crVGAM.R b/inst/doc/crVGAM.R
new file mode 100644
index 0000000..cbff521
--- /dev/null
+++ b/inst/doc/crVGAM.R
@@ -0,0 +1,480 @@
+### R code from vignette source 'crVGAM.Rnw'
+
+###################################################
+### code chunk number 1: crVGAM.Rnw:105-111
+###################################################
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+
+
+###################################################
+### code chunk number 2: example-posber (eval = FALSE)
+###################################################
+## vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+##            family = posbernoulli.t, data = pdata)
+
+
+###################################################
+### code chunk number 3: poz-args-posbinomial
+###################################################
+args(posbinomial)
+
+
+###################################################
+### code chunk number 4: poz-args-posbernoulli-t
+###################################################
+args(posbernoulli.t)
+
+
+###################################################
+### code chunk number 5: poz-args-posbernoulli-b
+###################################################
+args(posbernoulli.b)
+
+
+###################################################
+### code chunk number 6: poz-args-posbernoulli-tb
+###################################################
+args(posbernoulli.tb)
+
+
+###################################################
+### code chunk number 7: poz-posbernoulli-tb-gen (eval = FALSE)
+###################################################
+## vglm(..., family = posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0,
+##                                    drop.b = TRUE ~ 0))
+
+
+###################################################
+### code chunk number 8: eg-deermice-look
+###################################################
+head(deermice, 4)
+
+
+###################################################
+### code chunk number 9: example1-model
+###################################################
+deermice <- within(deermice, {
+  age <- 2 - as.numeric(age) 
+  sex <- 1 - as.numeric(sex)
+})
+
+
+###################################################
+### code chunk number 10: example2-model
+###################################################
+M.0 <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1,
+            posbernoulli.t(parallel = TRUE ~ 1), data = deermice)
+M.b <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.b, data = deermice)
+M.t <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.t, data = deermice)
+M.h <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+            posbernoulli.t(parallel = TRUE ~ weight + sex + age), data = deermice)
+M.th <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.t, data = deermice)
+M.tb <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+             posbernoulli.tb, data = deermice)
+M.bh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.b, data = deermice)
+M.tbh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+              posbernoulli.tb, data = deermice)
+
+
+###################################################
+### code chunk number 11: eg-deermice-Nhat
+###################################################
+c(M.bh at extra$N.hat, M.bh at extra$SE.N.hat)
+c(logLik(M.bh), AIC(M.bh))
+
+
+###################################################
+### code chunk number 12: maketable
+###################################################
+
+Table <- rbind(c(round(M.tbh at extra$N.hat,2), 
+                 round(M.bh at extra$N.hat,2), 
+                 round(M.tb at extra$N.hat,2), 
+                 round(M.th at extra$N.hat,2), 
+                 round(M.h at extra$N.hat,2), 
+                 round(M.b at extra$N.hat,2),
+                 round(M.t at extra$N.hat,2), 
+                 round(M.0 at extra$N.hat,2)), 
+               
+              c(round(M.tbh at extra$SE.N.hat,2), 
+                round(M.bh at extra$SE.N.hat,2), 
+                round(M.tb at extra$SE.N.hat,2),
+                round(M.th at extra$SE.N.hat,2), 
+                round(M.h at extra$SE.N.hat,2), 
+                round(M.b at extra$SE.N.hat,2),
+                round(M.t at extra$SE.N.hat,2), 
+                round(M.0 at extra$SE.N.hat,2)), 
+               
+              -2*c(round(logLik(M.tbh),2), 
+                   round(logLik(M.bh),2), 
+                   round(logLik(M.tb),2), 
+                   round(logLik(M.th),2), 
+                   round(logLik(M.h),2), 
+                   round(logLik(M.b),2),
+                   round(logLik(M.t),2), 
+                   round(logLik(M.0),2)), 
+               
+              c(round(AIC(M.tbh),2), 
+                round(AIC(M.bh),2), 
+                round(AIC(M.tb),2), 
+                round(AIC(M.th),2),
+                round(AIC(M.h),2), 
+                round(AIC(M.b),2), 
+                round(AIC(M.t),2), 
+                round(AIC(M.0),2)));
+
+colnames(Table) <- c("M.tbh", "M.bh", "M.tb", 
+                     "M.th", "M.h", "M.b", "M.t", "M.0");
+rownames(Table) <- c("N.hat", "SE","-2ln(L)", "AIC");
+
+
+###################################################
+### code chunk number 13: example2-table
+###################################################
+Table
+
+
+###################################################
+### code chunk number 14: poz-posbernoulli-eg-deermice-coefs
+###################################################
+round(coef(M.bh), 2)
+round(sqrt(diag(vcov(M.bh))), 2)
+
+
+###################################################
+### code chunk number 15: poz-posbernoulli-eg-deermice-smooth
+###################################################
+fit.bh <- vgam(cbind(y1, y2, y3, y4, y5, y6) ~ s(weight, df = 3) + sex + age,
+               posbernoulli.b, data = deermice)
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5)
+
+
+###################################################
+### code chunk number 16: poz-posbernoulli-eg-deermice-summary
+###################################################
+summary(fit.bh)
+
+
+###################################################
+### code chunk number 17: poz-posbernoulli-eg-deermice-smooth-shadow (eval = FALSE)
+###################################################
+## plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+##      rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+
+
+###################################################
+### code chunk number 18: plot-deermice
+###################################################
+par(mfrow = c(2, 2))
+par(las = 1, cex = 1.1, mar = c(3.8, 4, 0.5, 0.2) + 0.1)
+par(mgp = c(2.3, 1, 0))  # Default is c(3, 1, 0)
+
+
+
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+
+# < < poz-posbernoulli-eg-deermice-smooth-shadow> >
+
+
+
+
+
+
+###################################################
+### code chunk number 19: birds91read
+###################################################
+data("prinia", package = "VGAM")
+
+
+###################################################
+### code chunk number 20: example2a
+###################################################
+head(prinia, 4)[, 1:4]
+
+
+###################################################
+### code chunk number 21: example2b
+###################################################
+M.h.GAM <- 
+  vgam(cbind(cap, noncap) ~ s(length, df = 3) + fat, 
+       posbinomial(omit.constant = TRUE, parallel = TRUE ~ s(length, df = 3) + fat),
+       data = prinia)
+M.h.GAM at extra$N.hat     
+M.h.GAM at extra$SE.N.hat  
+
+
+###################################################
+### code chunk number 22: eg-bird-smooth-shadow1
+###################################################
+plot.info <- plot(M.h.GAM,
+                  se = TRUE, las = 1, plot.arg = FALSE,
+                  lcol = "blue",
+                  scol = "orange",
+                  rcol = "purple",
+                  scale = 5)
+
+
+###################################################
+### code chunk number 23: eg-bird-smooth-shadow2 (eval = FALSE)
+###################################################
+## info.fit2 <- plot.info at preplot[[1]]
+## fat.effect <- coef(M.h.GAM)["fat"] 
+## intercept <- coef(M.h.GAM)["(Intercept)"]  
+## 
+## ooo <- order(info.fit2$x)
+## centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+## 
+## plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+##                                      centering.const + info.fit2$y[ooo],
+##                         lin.pred.0 = intercept + fat.effect * 0 +
+##                                      centering.const + info.fit2$y[ooo],
+##                         x2 = info.fit2$x[ooo])
+## 
+## plotframe <- transform(plotframe,
+##                        up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+##                        lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+##                        up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+##                        lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+## 
+## plotframe <- transform(plotframe,
+##                        fv.b    = logit(lin.pred.b,    inverse = TRUE),
+##                        up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+##                        lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+##                        fv.0    = logit(lin.pred.0,    inverse = TRUE),
+##                        up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+##                        lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+## 
+## with(plotframe,
+##      matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+##              lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+##              main = "", ylab = "", xlab = "Wing length (standardized)"))
+## mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+## with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+##                          col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+## legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+##        lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+
+
+###################################################
+### code chunk number 24: plot-bird
+###################################################
+par(mfrow = c(1, 1))
+
+
+
+info.fit2 <- plot.info at preplot[[1]]
+fat.effect <- coef(M.h.GAM)["fat"] 
+intercept <- coef(M.h.GAM)["(Intercept)"]  
+
+ooo <- order(info.fit2$x)
+centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+
+plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+                                     centering.const + info.fit2$y[ooo],
+                        lin.pred.0 = intercept + fat.effect * 0 +
+                                     centering.const + info.fit2$y[ooo],
+                        x2 = info.fit2$x[ooo])
+
+plotframe <- transform(plotframe,
+                       up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+                       up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+
+plotframe <- transform(plotframe,
+                       fv.b    = logit(lin.pred.b,    inverse = TRUE),
+                       up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+                       lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+                       fv.0    = logit(lin.pred.0,    inverse = TRUE),
+                       up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+                       lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+
+with(plotframe,
+     matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+             lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+             main = "", ylab = "", xlab = "Wing length (standardized)"))
+mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+                         col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+       lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+
+
+
+# < < eg-bird-smooth-shadow2 > >
+    
+    
+    
+
+
+###################################################
+### code chunk number 25: poz-posbernoulli-tb-huggins89t1-data
+###################################################
+head(Huggins89table1, 4)
+
+
+###################################################
+### code chunk number 26: poz-posbernoulli-tb-huggins89t1-look
+###################################################
+Hdata <- transform(Huggins89table1, x3.tij = t01,
+                   T02 = t02, T03 = t03, T04 = t04, T05 = t05, T06 = t06,
+                   T07 = t07, T08 = t08, T09 = t09, T10 = t10)
+Hdata <- subset(Hdata,
+                y01 + y02 + y03 + y04 + y05 + y06 + y07 + y08 + y09 + y10 > 0)
+
+
+###################################################
+### code chunk number 27: poz-posbernoulli-th-huggins89t0-fit
+###################################################
+fit.th <-
+   vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+        xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                            t09 + t10 - 1),
+        posbernoulli.t(parallel.t = TRUE ~ x2 + x3.tij), 
+        data = Hdata, trace = FALSE, 
+        form2 = ~ x2 + x3.tij + t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                                t09 + t10)
+
+
+###################################################
+### code chunk number 28: poz-posbernoulli-th-huggins89t0-constraints
+###################################################
+constraints(fit.th, matrix = TRUE)
+
+
+###################################################
+### code chunk number 29: poz-posbernoulli-tbh-huggins89t1-fit
+###################################################
+fit.tbh <-
+  vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+       xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 +
+                                 t07 + t08 + t09 + t10 +
+                                 T02 + T03 + T04 + T05 + T06 +
+                                 T07 + T08 + T09 + T10 - 1),
+       posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+       data = Hdata, trace = FALSE,
+       form2 = ~  x2 + x3.tij +
+                  t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 + t09 + t10 +
+                        T02 + T03 + T04 + T05 + T06 + T07 + T08 + T09 + T10)
+
+
+###################################################
+### code chunk number 30: poz-posbernoulli-tbh-huggins89t1-aic
+###################################################
+c(logLik(fit.th), AIC(fit.th))
+c(logLik(fit.tbh), AIC(fit.tbh))
+
+
+###################################################
+### code chunk number 31: poz-posbernoulli-tb-huggins89t1-constraints
+###################################################
+head(constraints(fit.tbh, matrix = TRUE), 4)
+tail(constraints(fit.tbh, matrix = TRUE), 4)
+
+
+###################################################
+### code chunk number 32: poz-posbernoulli-tb-huggins89t1-coefs
+###################################################
+coef(fit.tbh)
+sqrt(diag(vcov(fit.tbh))) 
+
+
+###################################################
+### code chunk number 33: poz-posbernoulli-tb-huggins89t1-Nhat
+###################################################
+fit.tbh at extra$N.hat   
+fit.tbh at extra$SE.N.hat
+
+
+###################################################
+### code chunk number 34: poz-posbernoulli-tbh-huggins89t1-fit-Select
+###################################################
+Hdata <- subset(Huggins89table1, rowSums(Select(Huggins89table1, "y")) > 0)
+Hdata.T <- Select(Hdata, "t")
+colnames(Hdata.T) <- gsub("t", "T", colnames(Hdata.T))
+Hdata <- data.frame(Hdata, Hdata.T)
+Hdata <- transform(Hdata, x3.tij = y01)
+Form2 <- Select(Hdata, prefix = TRUE, as.formula = TRUE)
+Xij   <- Select(Hdata, c("t", "T"), as.formula = TRUE,
+                sort = FALSE, rhs = "0", lhs = "x3.tij", exclude = "T01")
+fit.tbh <- vglm(Select(Hdata, "y") ~ x2 + x3.tij,
+                form2 = Form2,  xij = list(Xij),
+                posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+                data = Hdata, trace = FALSE)
+coef(fit.tbh)
+
+
+###################################################
+### code chunk number 35: poz-posbernoulli-bh-ephemeral-method1
+###################################################
+deermice <- transform(deermice, Lag1 = y1)
+M.tbh.lag1 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag1 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = ~ sex + weight + Lag1 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + y2 + y3 + y4 + y5 + y6,
+       data = deermice)
+coef(M.tbh.lag1)
+
+
+###################################################
+### code chunk number 36: poz-posbernoulli-bh-ephemeral-method2
+###################################################
+deermice <- transform(deermice, Lag1 = y1)
+deermice <- transform(deermice, f1 = y1, f2 = y1, f3 = y1, f4 = y1,
+                                f5 = y1, f6 = y1)
+tau <- 6
+H2 <- H3 <- cbind(rep(1, 2*tau-1))
+H4 <- cbind(c(rep(0, tau), rep(1, tau-1)))
+M.tbh.lag1.method2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0),
+       constraints = list("(Intercept)" = cbind(H4, 1), sex = H2, weight= H3, 
+                          Lag1 = H4),
+       xij = list(Lag1 ~ f1 + f2 + f3 + f4 + f5 + f6 +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = Select(deermice, prefix = TRUE, as.formula = TRUE),
+       data = deermice)
+coef(M.tbh.lag1.method2)
+
+
+###################################################
+### code chunk number 37: poz-posbernoulli-bh-ephemeral-lag2
+###################################################
+deermice <- transform(deermice, Lag2 = y1)
+M.bh.lag2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag2,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag2 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                         pmax(y4, y5)),
+       form2 = ~ sex + weight + Lag2 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                 pmax(y4, y5) + y6,
+       data = deermice)
+coef(M.bh.lag2)
+
+
diff --git a/inst/doc/crVGAM.Rnw b/inst/doc/crVGAM.Rnw
new file mode 100644
index 0000000..6a46807
--- /dev/null
+++ b/inst/doc/crVGAM.Rnw
@@ -0,0 +1,2247 @@
+\documentclass[article,shortnames,nojss]{jss}
+\usepackage{thumbpdf}
+%% need no \usepackage{Sweave.sty}
+
+
+%% Packages.
+
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage{bm}
+\usepackage{xspace}
+
+
+
+
+%\VignetteIndexEntry{The VGAM Package for Capture--Recapture Data Using the Conditional Likelihood}
+%\VignetteDepends{VGAM}
+%\VignetteKeywords{closed population size estimation, conditional likelihood,mark--capture--recapture, vector generalized additive model, VGAM}
+%\VignettePackage{VGAM}
+
+%% new commands
+%% Shortcut commands.
+\newcommand{\logit}{\mbox{\rm logit}}
+\newcommand{\bone}{{\bf 1}}
+\newcommand{\bzero}{{\bf 0}}
+\newcommand{\bid}{\mbox{$\bm{\mathcal{D}}$}}
+\newcommand{\bib}{\mbox{$\bm{b}$}}
+\newcommand{\bif}{\mbox{$\bm{f}$}}
+\newcommand{\bix}{\mbox{$\bm{x}$}}
+\newcommand{\biy}{\mbox{$\bm{y}$}}
+\newcommand{\biz}{\mbox{$\bm{z}$}}
+\newcommand{\bB}{\mbox{\rm \bf B}}
+\newcommand{\bX}{\mbox{\rm \bf X}}
+\newcommand{\bH}{\mbox{\rm \bf H}}
+\newcommand{\bI}{\mbox{\rm \bf I}}
+\newcommand{\bOO}{\mbox{\rm \bf O}}
+\newcommand{\bW}{\mbox{\rm \bf W}}
+\newcommand{\bY}{\mbox{\rm \bf Y}}
+\newcommand{\bbeta}{\mbox{$\bm{\beta}$}}
+\newcommand{\boldeta}{\mbox{$\bm{\eta}$}}
+\newcommand{\btheta}{\mbox{$\bm{\theta}$}}
+\newcommand{\calM}{\mbox{$\mathcal{M}$}}
+\newcommand{\mytilde}{\mbox{\lower.80ex\hbox{\char`\~}\xspace}}
+
+
+\author{Thomas W. Yee\\The University of Auckland \And
+Jakub Stoklosa\\The University of New South Wales \AND
+Richard M. Huggins\\The University of Melbourne}
+\title{The \pkg{VGAM} Package for Capture--Recapture Data Using the Conditional Likelihood}
+
+%% for pretty printing and a nice hypersummary also set:
+
+\Plainauthor{Thomas W. Yee, Jakub Stoklosa, Richard M. Huggins} %% comma-separated
+\Plaintitle{The VGAM Package for Capture--Recapture Data Using the Conditional Likelihood} %% without formatting
+\Shorttitle{The VGAM Package for Capture--Recapture Data} %% a short title (if necessary)
+
+%% an abstract and keywords
+\Abstract{
+It is well known that using individual covariate information 
+(such as body weight or gender) to model heterogeneity
+in capture--recapture (CR) experiments can greatly enhance
+inferences on the size of a closed population. Since individual 
+covariates are only observable for captured individuals, complex 
+conditional likelihood methods are usually required and these do 
+not constitute a standard generalized linear model (GLM) family. 
+Modern statistical techniques such as generalized additive models
+(GAMs), which allow a relaxing of the linearity assumptions on the 
+covariates, are readily available for many standard GLM families.
+Fortunately, a natural statistical framework for maximizing
+conditional likelihoods is available in the Vector GLM and Vector 
+GAM classes of models. We present several new \proglang{R}-functions 
+(implemented within the \pkg{VGAM} package) specifically developed to allow 
+the incorporation of individual covariates in the analysis of 
+closed population CR data using a GLM/GAM-like approach 
+and the conditional likelihood. As a result, a wide variety of 
+practical tools are now readily available in the \pkg{VGAM} object 
+oriented framework. We discuss and demonstrate their advantages, 
+features and flexibility using the new \pkg{VGAM} CR functions on several 
+examples.
+}
+
+
+
+\Keywords{closed population size estimation, conditional likelihood, 
+mark--capture--recapture, vector generalized additive model, \pkg{VGAM}}
+\Plainkeywords{closed population, conditional likelihood, 
+mark--capture--recapture, vector generalized additive model, VGAM R package}
+
+
+
+\Address{
+  Thomas W. Yee \\
+  Department of Statistics \\
+  University of Auckland, Private Bag 92019 \\
+  Auckland Mail Centre \\
+  Auckland 1142, New Zealand \\
+  E-mail: \email{t.yee at auckland.ac.nz}\\
+  URL: \url{http://www.stat.auckland.ac.nz/~yee/}
+}
+
+
+\begin{document}
+
+
+<<echo=FALSE, results=hide>>=
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+@
+
+
+
+
+
+
+
+
+
+
+%*********************************************************************
+\section[Introduction]{Introduction}
+%% Note: If there is markup in \(sub)section, then it has to be escape as above.
+\label{sec:intro}
+
+
+Capture--recapture (CR) surveys are widely used in ecology and 
+epidemiology to estimate population sizes. In essence they are 
+sampling schemes that allow the estimation of both $n$ and $p$ 
+in a Binomial($n$, $p$) experiment \citep{hugg:hwan:2011}.
+The simplest CR sampling design consists of units or individuals 
+in some population that are captured or tagged across several 
+sampling occasions, e.g., trapping a nocturnal mammal species
+on seven consecutive nights. In these experiments, when an individual 
+is captured for the first time then it is marked or tagged so that 
+it can be identified upon subsequent recapture. On each occasion recaptures 
+of individuals which have been previously marked are also noted. Thus 
+each observed individual has a capture history: a vector of 1s and 0s
+denoting capture/recapture and noncapture respectively. The unknown
+population size is then estimated using the observed capture histories 
+and any other additional information collected on captured individuals, 
+such as weight or sex, along with environmental information such as 
+rainfall or temperature.
+
+
+We consider closed populations, where there are no births, deaths,
+emigration or immigration throughout the sampling period
+\citep{amst:mcdo:manl:2005}. Such an assumption is often reasonable
+when the overall time period is relatively short. 
+\citet{otis:etal:1978} provided eight specific closed population CR
+models (see also \citet{pollock:1991}), which permit the individual
+capture probabilities to depend on time and behavioural response,
+and be heterogeneous between individuals.
+The use of covariate information (or explanatory variables)
+to explain heterogeneous capture probabilities 
+in CR experiments has received considerable attention over the
+last 30 years \citep{pollock:2002}. Population size estimates that
+ignore this heterogeneity typically result in biased population
+estimates \citep{amst:mcdo:manl:2005}.
+A recent book on CR experiements as a whole is \cite{mccr:morg:2014}.
+
+
+Since individual covariate information (such as gender or body weight)
+can only be collected on observed individuals, conditional likelihood 
+models are employed \citep{pollock:1984,hugg:1989,alho:1990,lebreton:1992}. 
+That is, one conditions on the individuals seen at least once through-out 
+the experiment, hence they allow for individual covariates to be 
+considered in the analysis. The capture probabilities are typically 
+modelled as logistic functions of the covariates, and parameters are 
+estimated using maximum likelihood. Importantly, these CR models are
+generalized linear models \citep[GLMs;][]{mccull:1989,hugg:hwan:2011}.
+
+
+Here, we maximize the conditional likelihood (or more
+formally the positive-Bernoulli distribution) models
+of \citet{hugg:1989}. This approach has become standard practice to carry 
+out inferences when considering individual covariates, with several different 
+software packages currently using this methodology, including:
+\proglang{MARK} \citep{cooch:white:2012},
+\proglang{CARE-2} \citep{hwang:chao:2003},
+and the \proglang{R} packages \citep{R:2014}:
+\pkg{mra} \citep{mcdonald:2010}, \pkg{RMark} \citep{laake:2013}
+and \pkg{Rcapture} \citep{rcapturepackage:2012,Baillargeon:Rivest:2007},
+the latter package uses a log-linear approach, which can be shown to be 
+equivalent to the conditional likelihood \citep{cormak:1989,hugg:hwan:2011}. 
+These programs are quite user friendly, and specifically, allow modelling 
+capture probabilities as linear functions of the covariates. So an obvious 
+question is to ask why develop yet another implementation for closed population
+CR modelling?
+
+
+Firstly, nonlinearity arises quite naturally in many ecological applications,
+\citep{schluter1988,yee:mitc:1991,craw:1993,gimenez:2006,bolk:2008}. 
+In the CR context, capture probabilities may depend nonlinearly on 
+individual covariates, e.g., mountain pygmy possums with lighter or 
+heavier body weights may have lower capture probabilities compared 
+with those having mid-ranged body weights 
+\citep[e.g.,][]{hugg:hwan:2007,stok:hugg:2012}.
+However, in our experience, the vast majority of CR software does not handle 
+nonlinearity well in regard to both estimation and in the plotting 
+of the smooth functions. Since GAMs \citep[]{hastie:1990,wood:2006} 
+were developed in the mid-1980s they have become a standard tool for 
+data analysis in regression. The nonlinear relationship between the 
+response and covariate is flexibly modelled, and few assumptions are 
+made on the functional relationship. The drawback in applying these
+models to CR data has been the difficult programming required to 
+implement the approach.
+
+
+Secondly, we have found several implementations of conditional 
+likelihood slow, and in some instances unreliable and difficult to use. 
+We believe our implementation has superior capabilities, and has 
+good speed and reliability. The results of 
+Section \ref{sec:poz:posbernoulli.eg.timingtests} contrast our software 
+with some others. Moreover, the incorporation of these methods in a general, 
+maintained statistical package will result in them being updated as 
+the package is updated.
+
+
+Standard GLM and GAM methodologies are unable to cope with the CR 
+models considered in this article because they are largely restricted 
+to one linear/additive predictor $\eta$. Fortunately however, a 
+natural extension in the form of the vector generalized linear 
+and additive model (VGLM/VGAM) classes do allow for multiple $\eta$s. 
+VGAMs and VGLMs are described in \citet{yee:wild:1996} and \citet{yee:hast:2003}. 
+Their implementation in the \pkg{VGAM} package \citep{yee:2008,yee:2010,yee:VGAM:2013-093} 
+has become increasing popular and practical over the last few years, due to 
+large number of exponential families available for discrete/multinomial 
+response data. In addition to flexible modelling of both VGLMs and VGAMs, 
+a wide range of useful features are also available:
+\begin{itemize}
+\item smoothing capabilities;
+
+\item model selection using, e.g., AIC or BIC \citep{burnham:anderson:1999}; 
+
+\item regression diagnostics and goodness--of--fit tools;
+
+\item reduced-rank regression \citep{yee:hast:2003} for dimension 
+reduction;
+
+\item computational speed and robustness;
+
+\item choice of link functions;
+
+\item offsets and prior weights; and
+
+\item (specifically) when using \proglang{R}: generic functions 
+based on object oriented programming, e.g., \code{fitted()}, 
+\code{coef()}, \code{vcov()}, \code{summary()}, \code{predict()}, 
+\code{AIC()}, etc.
+\end{itemize}
+
+
+Our goal is to provide users with an easy-to-use object-oriented \pkg{VGAM} 
+structure, where four \code{family}-type functions based on the conditional
+likelihood are available to fit the eight models of \citet{otis:etal:1978}. 
+We aim to give the user additional tools and features,
+such as those listed above, to carry out a more informative and
+broader analysis of CR data; particularly when considering more than
+one covariate. Finally, this article primarily focuses on the technical 
+aspects of the proposed package, and less so on the biological interpretation 
+for CR experiments. The latter will be presented elsewhere.
+
+
+An outline of this article is as follows. In Section \ref{sec:cr} we
+present the conditional likelihood for CR models and a description of
+the eight \citet{otis:etal:1978} models. Section \ref{sec:vgam}
+summarizes pertinent details of VGLMs and VGAMs. Their connection to
+the CR models is made in Section \ref{sec:meth}. Software details 
+are given in Section \ref{sec:software}, and examples on real and 
+simulated data using the new software are demonstrated in 
+Section \ref{sec:body:exam}. Some final remarks are given in 
+Section \ref{sec:discussion}. The two appendices give some
+technical details relating to the first and second derivatives
+of the conditional log-likelihood, and the means.
+
+
+\begin{table}[tt]
+\centering
+\begin{tabular}{cl}
+\hline 
+\ \ \ Symbol \ \ \ & Explanation \\
+\hline
+% --------------------------------------
+$N$ & (Closed) population size to be estimated \\
+% --------------------------------------
+$n$ & Total number of distinct individuals caught in the trapping experiment \\
+% --------------------------------------
+$\tau$ & Number of sampling occasions, where $\tau \geq 2$ \\
+% --------------------------------------
+$\biy_i$ & Vector of capture histories for individual $i$ $(i=1,\ldots,n)$ with observed values\\
+& 1 (captured) and 0 (noncaptured). Each $\biy_i$ has at least one observed 1 \\
+% --------------------------------------
+``$h$'' & Model $\calM$ subscript, for heterogeneity \\
+% --------------------------------------
+``$b$'' & Model $\calM$ subscript, for behavioural effects \\
+% --------------------------------------
+``$t$'' & Model $\calM$ subscript, for temporal effects \\
+% --------------------------------------
+$p_{ij}$ & Probability that individual $i$ is captured at sampling occasion $j$ $(j=1,\ldots,\tau)$ \\
+% --------------------------------------
+$z_{ij}$ & $= 1$ if individual $i$ has been captured before occasion $j$,
+else $= 0$ \\
+% --------------------------------------
+$\btheta^{}$ & Vector of regression coefficients to be estimated related to $p_{ij}$
+\\
+% --------------------------------------
+$\boldeta$ & Vector of linear predictors (see Table \ref{tab2} 
+for further details)
+\\
+% --------------------------------------
+$g$ & Link function applied to, e.g., $p_{ij}$. Logit by default
+\\
+% --------------------------------------
+\hline
+\end{tabular}
+\caption{
+Short summary of the notation used for the positive-Bernoulli distribution
+for capture--recapture (CR) experiments. Additional details are in the text.
+\label{tab0}
+}
+\end{table}
+
+
+%*********************************************************************
+\section[Capture--recapture models]{Capture--recapture models}
+\label{sec:cr}
+
+
+In this section we give an outline for closed population CR models
+under the conditional likelihood/GLM approach. For further details 
+we recommend \citet{hugg:1991} and \citet{hugg:hwan:2011}. 
+The notation of Table \ref{tab0} is used throughout this article.
+
+
+% ---------------------------------------------------------------
+\subsection{Conditional likelihood}
+\label{sec:condlik}
+
+
+Suppose we have a closed population of $N$ individuals,
+labelled $i=1,\ldots,N$ and $\tau$ capture occasions 
+labelled $j=1,\ldots,\tau$. We make the usual assumptions that 
+individuals in the population behave independently of each other, 
+individuals do not lose their tags, and tags are recorded correctly. 
+Let $y_{ij}=1$ if the $i$th individual was caught on the $j$th 
+occasion and be zero otherwise, and let $n$ be the number of 
+distinct individuals captured.
+
+
+Let $p_{ij}$ denote the probability of capturing individual $i$ 
+on occasion $j$. As noted in Section \ref{sec:intro},
+\citet{otis:etal:1978} 
+describe eight models for the capture probabilities,
+see Section \ref{sec:8models} 
+for further details. Label the individuals captured in the experiment 
+by $i=1,\ldots,n$ and those never captured by $i=n+1,\ldots,N$. The full 
+likelihood is given by
+\begin{eqnarray}
+L_{f} & = & K \prod_{i=1}^{N}\prod_{j=1}^{\tau} p_{ij}^{y_{ij}}
+(1-p_{ij})^{1- y_{ij}} 
+\nonumber
+\\
+& = & K 
+\left\{\prod_{i=1}^{n}\prod_{j=1}^{\tau}p_{ij}^{y_{ij}} 
+(1-p_{ij})^{1 - y_{ij}}\right\}\cdot
+\left\{\prod_{i=n+1}^{N} \prod_{j=1}^{\tau} (1-p_{ij})\right\}
+\label{eq:posbern.uncondlikelihood}
+\end{eqnarray}
+where $K$ is independent of the $p_{ij}$ but may depend on $N$. The
+RHS of (\ref{eq:posbern.uncondlikelihood}) requires knowledge of the
+uncaptured individuals and in general cannot be computed. Consequently 
+no MLE of $N$ will be available unless some homogeneity assumption is 
+made about the noncaptured individuals. Instead, a conditional likelihood 
+function based only on the individuals observed at least once is
+\begin{eqnarray}
+\label{eq:posbern.condlikelihood}
+L_{c} & \propto & \prod_{i=1}^{n} 
+\frac{\prod\limits_{j=1}^{\tau} p_{ij}^{y_{ij}} (1-p_{ij})^{1 - y_{ij}}}
+{1-\prod_{s=1}^{\tau}(1-p_{is}^{\dagger})}.
+\end{eqnarray}
+is used. Here $p_{is}^{\dagger}$ are the $p_{ij}$ computed as if the
+individual had not been captured prior to $j$ so that the denominator 
+is the probability individual $i$ is captured at least once. This 
+conditional likelihood (\ref{eq:posbern.condlikelihood}) is a modified
+version of the likelihood corresponding to a positive-Bernoulli
+distribution \citep{patil:1962}.
+
+
+\renewcommand{\arraystretch}{1.2}
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{|c||c|c|c|c|}
+\hline
+Capture & \multicolumn{4}{c|}{Joint probability}\\
+\cline{2-5}
+history & \multicolumn{1}{c|}{$\calM_0$/$\calM_h$} 
+& \multicolumn{1}{c|}{$\calM_b$/$\calM_{bh}$}
+& \multicolumn{1}{c|}{$\calM_t$/$\calM_{th}$} 
+& \multicolumn{1}{c|}{$\calM_{tb}$/$\calM_{tbh}$} \\
+\hline
+01 & $(1-p) p$ & $(1-p_{c}) \, p_{c}$ & $(1-p_1) p_2$ 
+& $(1-p_{c1}) \, p_{c2}$ \\
+10 & $p(1-p)$ & $p_{c} (1-p_{r})$ & $p_1 (1-p_2)$ 
+& $p_{c1} (1-p_{r2})$ \\
+11 & $p^2$ & $p_{c} \, p_{r}$ & $p_1 \, p_2$ & $p_{c1} \, p_{r2}$ \\
+\hline
+00 & $(1-p)^2$ & $(1-p_{c})^2$ & $(1-p_1)(1-p_2)$ 
+& $(1-p_{c1})(1-p_{c2})$ \\
+\hline \hline
+$ M \equiv \dim(\boldeta)$ & 1 & 2 & 2 $(=\tau)$ 
+& 3 $(=2 \tau - 1)$ \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Capture history sample space and corresponding probabilities 
+for the eight models of \citet{otis:etal:1978}, with $\tau=2$ capture occasions 
+in closed population CR experiment. Here, $p_{cj}=$ capture probability for 
+unmarked individuals at sampling period $j$, $p_{rj}=$ recapture 
+probability for marked individuals at sampling period $j$, and $p=$ 
+constant capture probability across $\tau=2$. Note that the ``00'' row 
+is never realized in sample data.}
+\label{tab1}
+\end{table}
+\renewcommand{\arraystretch}{1.0}
+
+
+% ---------------------------------------------------------------
+\subsection{The eight models}
+\label{sec:8models}
+
+
+Models which allow capture probabilities to depend on one or a 
+combination of time, heterogeneity or behavioural effects are defined 
+using appropriate subscripts, e.g., $\calM_{th}$ depends on time and 
+heterogeneity. These eight models have a nested structure 
+of which $\calM_{tbh}$ is the most general. The homogeneous 
+model $\calM_0$ is the simplest (but most unrealistic) and has equal 
+capture probabilities for each individual $H_0: p_{ij}=p$, regardless 
+of the sampling occasion. All eight models are GLMs, since the 
+conditional likelihood (\ref{eq:posbern.condlikelihood}) 
+belongs to the exponential family \citep{hugg:hwan:2011}.
+
+
+To illustrate the approach, we use the following toy example throughout,
+consider a CR experiment with two occasions---morning and evening
+(i.e., $\tau=2$), with capture 
+probabilities varying between the two occasions. Furthermore, suppose we
+have collected some individual covariates---weight and gender.
+The joint probabilities of all the eight models are listed in
+Table \ref{tab1}.  It can be seen that all but the positive-Binomial
+model ($\calM_{0}/\calM_{h}$) 
+require more than one probability and hence more than
+one linear predictor, so that the 
+original \cite{neld:wedd:1972} GLM framework is inadequate. Further, there 
+are two noteworthy points from Table \ref{tab1} which apply for 
+any $\tau\ge 2$:
+\begin{itemize}
+
+\item first, for $\calM_{t}$-type models, as $\tau$ increases
+so will the number of linear predictors and hence the potential
+number of parameters;
+ 
+\item secondly, it is evident that there are four main categories 
+consisting of non-heterogeneity models ($\calM_{0}$, $\calM_{b}$, $\calM_{t}$ 
+and $\calM_{tb}$), which are paired with a heterogeneity sub-model 
+(respectively $\calM_{h}$, $\calM_{bh}$, $\calM_{th}$ and $\calM_{tbh}$).
+
+\end{itemize}
+
+
+The four heterogeneity models allow for each individual to have 
+their own probability of capture/recapture. In our toy example, 
+the capture probabilities are dependent on an individual's weight 
+and gender. We discuss these models further in Section \ref{sec:vgam.basics}. 
+It is natural to consider individual covariates such as weight 
+and gender as linear/additive predictors. Let $x_{i}$ denote a 
+covariate (either continuous or discrete) for the $i$th individual, 
+which is constant across the capture occasions $j=1,\ldots,\tau$, 
+e.g., for continuous covariates one could use the first 
+observed value or the mean across all $j$. If there are $d-1$ covariates, 
+we write $\bix_i=(x_{i1},\ldots,x_{id})^{\top}$ with $x_{i1}=1$ if 
+there is an intercept. Also, let $g^{-1}(\eta)={\exp(\eta)}/\{{1+\exp(\eta)}\}$ 
+be the inverse \logit{} function. Consider model $\mathcal{M}_{tbh}$, then the
+capture/recapture probabilities are given as [notation follows
+Section \ref{sec:VGAMs.basics}]
+\begin{eqnarray*}
+p_{ij}^{\dagger} & = & g^{-1} \!
+\left(\qquad \quad \, \beta^*_{(j+1)1} + \bix_{i[-1]}^{\top} \, 
+\bbeta_{1[-1]}^{} \right), \qquad j=1,\ldots,\tau, \\
+p_{ij} & = & g^{-1} \!\left(\beta^*_{(1)1} + \beta^*_{(j+1)1} + 
+\bix_{i[-1]}^{\top} \,\bbeta_{1[-1]}^{} \right),\qquad j=2,\ldots,\tau,
+\end{eqnarray*}
+where $\beta^*_{(1)1}$ is the behavioural effect of prior capture, 
+$\beta^*_{(j+1)1}$ for $j=1,\ldots,\tau$ are time effects, 
+and $\bbeta_{1[-1]}$ are the remaining regression parameters 
+associated with the covariates. Computationally, the conditional 
+likelihood (\ref{eq:posbern.condlikelihood}) is maximized with 
+respect to all the parameters (denote by $\btheta{}$) by the 
+Fisher scoring algorithm using the derivatives given in 
+Appendix A.
+
+
+% ---------------------------------------------------------------
+\subsection[Estimation of N]{Estimation of $N$}
+\label{sec:Nhat}
+
+
+In the above linear models, to estimate $N$ let $\pi_{i}(\btheta)=1-\prod\limits_{s=1}^{\tau}(1-p_{is}^{\dagger})$ 
+be the probability that individual $i$ is captured at least once 
+in the course of the study. Then, if $\btheta$ is known, 
+the Horvitz--Thompson \citep[HT;][]{horv:thom:1952} estimator
+\begin{eqnarray}
+\label{eq:HT}
+\widehat{N}(\btheta) &=& \sum_{i=1}^{n} \; {\pi}_{i}(\btheta)^{-1}
+\end{eqnarray}
+is unbiased for the population size $N$ and an associated estimate of 
+the variance of $\widehat{N}(\btheta)$ is $s^2(\btheta) = \sum_{i=1}^{n} 
+\; {\pi}_{i}(\btheta)^{-2} \, \left[1-{\pi}_{i}(\btheta)\right]$. 
+If $\btheta$ is estimated by $\widehat{\btheta}$ then one can use
+\begin{eqnarray}
+\label{eq:est.varNhat2}
+\VAR\left(\widehat{N}(\widehat{\btheta}) \right) &  \approx  & 
+s^2(\widehat{\btheta}) + \widehat{\bid}^{\top} 
+\widehat{\VAR}(\widehat{\btheta}) \,\widehat{\bid}
+\end{eqnarray}
+where, following from a Taylor series expansion 
+of $\widehat{N}(\widehat{\btheta})$ 
+about $\widehat{N}(\btheta)$,
+\begin{eqnarray*}
+\bid\,  =  \, \frac{d N(\btheta)}{d \btheta}
+&  =  &\sum_{i=1}^n \; {\pi}_{i}(\btheta)^{-2} \; \,
+\frac{d {\pi}_{i}(\btheta)}{d \btheta} \\
+&  =  &\sum_{i=1}^n \; \frac{-1}{{\pi}_{i}(\btheta)^{2}} \; 
+\sum_{s=1}^{\tau} \; \left[\prod_{t=1,\ t \neq s}^{\tau}
+\left( 1 - p_{it}^{\dagger}\right)\right]
+\frac{\partial p_{is}^{\dagger}}{\partial \btheta}.
+\end{eqnarray*}
+
+
+%*********************************************************************
+\section[Vector generalized linear and additive models]{Vector generalized 
+linear and additive models}
+\label{sec:vgam}
+
+
+To extend the above linear models, we use VGLMs and VGAMs which we briefly 
+describe in this section. These models fit within a large statistical 
+regression framework which will be described in \citet{yee:2015}. 
+The details here are purposely terse; readers are directed
+to \citet{yee:2008,yee:2010} 
+for accessible overviews and examples, and \citet{yee:wild:1996} 
+and \citet{yee:hast:2003} for technical details.
+
+
+% ---------------------------------------------------------------
+\subsection[Basics]{Basics}
+\label{sec:vgam.basics}
+
+
+Consider observations on independent pairs $(\bix_i,\biy_i)$,
+$i=1,\ldots,n$. We use ``$[-1]$'' to delete the first element, 
+e.g., $\bix_{i[-1]} =(x_{i2},\ldots,x_{id})^{\top}$. For 
+simplicity, we will occasionally drop the subscript $i$ and simply 
+write $\bix =(x_{1},\ldots,x_{d})^{\top}$. Consider a single observation 
+where \biy{} is a $Q$-dimensional vector. For the CR models of this 
+paper, $Q=\tau$ when the response is entered as a matrix of 0s and 1s.
+The only exception is for the $\calM_0/\calM_h$ where the aggregated
+counts may be inputted, see Section \ref{sec:M0Mh}. VGLMs are defined 
+through the model for the conditional density
+\[
+f(\biy | \bix ; \bB)  =  f(\biy,\eta_1,\ldots,\eta_M)
+\]
+for some known function $f(\cdot)$, 
+where $\bB =(\bbeta_1 \,\bbeta_2 \,\cdots \,\bbeta_M)$ is 
+a $d\times M$ matrix of regression coefficients to be estimated.
+We may also write $\bB^{\top} = (\bbeta_{(1)} \,\bbeta_{(2)}\,\cdots\,
+\bbeta_{(d)})$ so that $\bbeta_j$ is the $j$th column of $\bB$ 
+and $\bbeta_{(k)}$ is the $k$th row.
+
+
+The $j$th linear predictor is then
+\begin{equation}
+\eta_j  =  \bbeta_j^{\top} \bix  =  \sum_{k=1}^d \beta_{(j)k} \, 
+x_{k},     j=1, \ldots, M,
+\label{gammod2}
+\end{equation}
+where $\beta_{(j)k}$ is the $k$th component of $\bbeta_j$.
+In the CR context, we remind the reader that,
+as in Table \ref{tab1}, we have $M=2$ for $\calM_{bh}$, 
+$M=\tau$ for $\calM_{th}$ and $M=2\tau-1$ for $\calM_{tbh}$.
+
+
+In GLMs the linear predictors are used to model the means.
+The $\eta_j$ of VGLMs model the parameters of a model. 
+In general, for a parameter $\theta_j$ we take
+\[
+\eta_j  =  g_j(\theta_j),     j=1,\ldots,M
+\]
+and we say $g_j$ is a parameter link function. Write
+\begin{equation}
+\boldeta_i  =  \left(\begin{array}{c}\eta_1(\bix_{i})\\
+\vdots \\
+\eta_M(\bix_{i})\end{array}\right)  = \bB^{\top} \bix_{i}  = 
+\left(\begin{array}{c}\bbeta_1^{\top} \bix_{i} \\\vdots \\
+\bbeta_M^{\top} \bix_{i}\end{array} \right).
+\label{eq:lin.pred}
+\end{equation}
+
+
+In practice we may wish to constrain the effect of a covariate to
+be the same for some of the $\eta_j$ and to have no effect for others.
+In our toy example, model $\calM_{th}$ with $\tau=M=2$, $d=3$, we have
+\begin{eqnarray*}
+\eta_1(\bix_i) & = & \beta_{(1)1} + \beta_{(1)2} \, x_{i2} + 
+\beta_{(1)3} \, x_{i3}, \\
+\eta_2(\bix_i) & = & \beta_{(2)1} + \beta_{(2)2} \, x_{i2} + 
+\beta_{(2)3} \, x_{i3},
+\end{eqnarray*}
+which correspond to $x_{i2}$ being the individual's weight 
+and $x_{i3}$ an indicator of gender say, then we have the 
+constraints $\beta_{(1)2}\equiv\beta_{(2)2}$
+and $\beta_{(1)3}\equiv\beta_{(2)3}$. Then, with ``${}^*$'' 
+denoting the parameters that are estimated,
+\begin{eqnarray*}
+\eta_1(\bix_i) & = & \beta^*_{(1)1} + \beta^*_{(1)2} \, x_{i2} + 
+\beta^*_{(1)3} \, x_{i3}, \\
+\eta_2(\bix_i) & = & \beta^*_{(2)1} + \beta^*_{(1)2} \, x_{i2} + 
+\beta^*_{(1)3} \, x_{i3}, \\
+\end{eqnarray*}
+and we may write
+\begin{eqnarray*}
+\boldeta(\bix_i) =
+\begin{pmatrix}\eta_1(\bix_i)\\
+\eta_2(\bix_i)\end{pmatrix}
+& = & \sum_{k=1}^3 \, \bbeta_{(k)} \, x_{ik}\\
+& = & \begin{pmatrix}\beta_{(1)1} & \beta_{(1)2} & \beta_{(1)3}\\
+\beta_{(2)1} & \beta_{(2)2} & \beta_{(2)3} \end{pmatrix}
+\begin{pmatrix}x_{i1}\\ x_{i2}\\ x_{i3} \end{pmatrix}\\
+& = &
+\begin{pmatrix}
+\beta^*_{(1)1} & \beta^*_{(1)2} & \beta^*_{(1)3}\\
+\beta^*_{(2)1} & \beta^*_{(1)2} & \beta^*_{(1)3}
+\end{pmatrix}
+\begin{pmatrix}x_{i1}\\ x_{i2}\\ x_{i3}\end{pmatrix}\\
+& = &
+\begin{pmatrix}1 & 0\\ 0 & 1\end{pmatrix} 
+\begin{pmatrix}
+\beta^*_{(1)1}\\ \beta^*_{(2)1}
+\end{pmatrix}
+x_{i1}+
+\begin{pmatrix}1\\1\end{pmatrix}
+\beta^*_{(1)2} \, x_{i2}+
+\begin{pmatrix}
+1\\
+1\end{pmatrix}
+\beta^*_{(1)3} \, x_{i3}\\
+& = & \sum_{k=1}^3 \, \bH_k \, \bbeta^*_{(k)} \, x_{ik}.
+\end{eqnarray*}
+We can also write this as (noting that $x_{i1}=1$)
+\begin{eqnarray*}
+\boldeta(\bix_i) & = & \begin{pmatrix}x_{i1} & 0 \\ 0 & x_{i1}
+\end{pmatrix} \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix} 
+\begin{pmatrix} \beta^*_{(1)1}\\ \beta^*_{(2)1} \end{pmatrix} + 
+\begin{pmatrix} x_{i2} & 0 \\ 0 & x_{i2} \end{pmatrix}
+\begin{pmatrix} 1 \\ 1\end{pmatrix}
+\beta^*_{(1)2} + \begin{pmatrix} x_{i3} & 0 \\ 0 & x_{i3} 
+\end{pmatrix}
+\begin{pmatrix}
+1 \\
+1
+\end{pmatrix}
+\beta^*_{(1)3}\\ 
+& = & \sum_{k=1}^3 \, \mathrm{diag}(x_{ik},x_{ik}) \, \bH_k \,\bbeta_{(k)}^{*}. 
+\end{eqnarray*}
+In general, for VGLMs, we represent the models as
+\begin{eqnarray}
+\boldeta(\bix_i) 
+& = & \sum_{k=1}^d \, \bbeta_{(k)} \, x_{ik} \nonumber \\
+& = & \sum_{k=1}^d \, \bH_k \, \bbeta_{(k)}^{*} \, x_{ik}
+\label{eq:constraints.VGLM}\\
+& = & \sum_{k=1}^d \, \mathrm{diag}(x_{ik},\ldots,x_{ik}) \,
+\bH_k \, \bbeta_{(k)}^{*}
+\nonumber
+\end{eqnarray}
+where $\bH_1,\bH_2,\ldots,\bH_d$ are known constraint matrices
+of full column-rank (i.e., rank \code{ncol}($\bH_k$)), $\bbeta_{(k)}^*$ 
+is a vector containing a possibly reduced set of regression coefficients.
+Then we may write
+\begin{equation}
+\label{eq:lin.coefs4}
+{\bB}^{\top}  =  
+\left( 
+\bH_1 \bbeta_{(1)}^* \; \; \; 
+\bH_2 \bbeta_{(2)}^* \;\;\; \cdots \;\;\; 
+\bH_d \bbeta_{(d)}^* 
+\right)
+\end{equation}
+as an expression of (\ref{eq:lin.pred}) concentrating on columns rather
+than rows. Note that with no constraints at all, all $\bH_k = \bI_M$
+and $\bbeta_{(k)}^*=\bbeta_{(k)}$. We need both (\ref{eq:lin.pred}) 
+and (\ref{eq:lin.coefs4}) since we focus on the $\eta_j$
+and at other times on the variables $x_{k}$. The constraint matrices 
+for common models are pre-programmed in \pkg{VGAM}
+and can be set up by using arguments such as \code{parallel} and \code{zero}
+found in \pkg{VGAM} family functions. Alternatively, there
+is the argument \code{constraints} where they may be explicitly
+inputted. Using \code{constraints} is less convenient but provides
+the full generality of its power.
+
+ 
+% ---------------------------------------------------------------
+\subsection[Handling time-varying covariates]{Handling time-varying covariates}
+\label{sec:xij}
+
+
+Often, the covariates may be time-varying, e.g., when using 
+temperature as a covariate, then a different value is observed and 
+measured for each occasion $j$ for $j=1,\dots,\tau$.
+Again, using our toy example with $M=2$, $d=3$,
+and $\tau=2$, suppose we have time-dependent covariates $\bix_{ij}$, $j=1,2$. 
+We may have the model
+\begin{eqnarray*}
+\eta_1(\bix_{i1}) & = & \beta^*_{(1)1} + \beta^*_{(1)2} \, x_{i21} +
+\beta^*_{(1)3}\, x_{i31},\\
+\eta_2(\bix_{i2}) & = & \beta^*_{(2)1} + \beta^*_{(1)2} \, x_{i22} +
+\beta^*_{(1)3}\, x_{i32},\\
+\end{eqnarray*}
+for the linear predictor on the two occasions. Here, $x_{ikt}$ is for 
+the $i$th animal, $k$th explanatory variable and $t$th time. We write this model as
+\begin{eqnarray*}
+\boldeta(\bix_{ij}) & = & \begin{pmatrix} x_{i11} & 0\\ 0 & x_{i12} \end{pmatrix}
+\begin{pmatrix} 1 & 0\\ 0 & 1\end{pmatrix} \begin{pmatrix} \beta^*_{(1)1}\\
+\beta^*_{(2)1} \end{pmatrix} + \begin{pmatrix} x_{i21} & 0\\ 0 & x_{i22}\end{pmatrix}
+\begin{pmatrix} 1\\ 1\end{pmatrix} \beta^*_{(1)2} + 
+\begin{pmatrix} x_{i31} & 0\\ 0 & x_{i32}\end{pmatrix}
+\begin{pmatrix} 1 \\ 1\end{pmatrix}
+\beta^*_{(1)3}\\ & = & \sum_{k=1}^3 \, \mathrm{diag}(x_{ik1},x_{ik2}) \,
+\bH_k\,\bbeta_{(k)}^{*}. 
+\end{eqnarray*}
+Thus to handle time-varying covariates one needs the \code{xij} facility of \pkg{VGAM} 
+(e.g., see Section \ref{sec:poz:posbernoulli.eg.hugg:1991}), which allows a covariate 
+to have different values for different $\eta_{j}$ through the general formula
+\begin{eqnarray}
+\boldeta(\bix_{ij})
+&  =  & \sum_{k=1}^{d}\, \mathrm{diag}(x_{ik1},\ldots,x_{ikM})\,
+\bH_k \,\bbeta_{(k)}^{*}=\sum_{k=1}^d \, 
+\bX^{\#}_{(ik)}\bH_k \,\bbeta_{(k)}^{*}
+\label{eq:vglimo:xij.vector.diag}
+\end{eqnarray}
+where $x_{ikj}$ is the value of variable $x_{k}$ for unit $i$ for $\eta_{j}$.
+The derivation of (\ref{eq:vglimo:xij.vector.diag}),
+followed by some examples are given in \cite{yee:2010}.
+Implementing this model requires specification 
+of the diagonal elements of the matrices $\bX^*_{ik}$ and we see
+its use in Section \ref{sec:poz:posbernoulli.eg.hugg:1991}.
+Clearly, a model may include a mix of time-dependent and
+time-independent covariates. 
+The model is then specified through the constraint matrices $\bH_k$ 
+and the covariate matrices $\bX^{\#}_{(ik)}$. Typically in CR experiments, 
+the time-varying covariates will be environmental effects. Fitting 
+time-varying individual covariates requires some interpolation when 
+an individual is not captured and is beyond the scope of the present 
+work.
+
+
+% ---------------------------------------------------------------
+\subsection[VGAMs]{VGAMs}
+\label{sec:VGAMs.basics}
+
+
+VGAMs replace the linear functions in (\ref{eq:constraints.VGLM})
+by smoothers such as splines. Hence, the central formula is
+\begin{equation}
+\boldeta_i  =  \sum_{k=1}^d \; \bH_k \, \bif_k^*(x_{ik})
+\label{eq:vgam}
+\end{equation}
+where $\bif_k^*(x_k) = (f_{k(1)}^*(x_k),\ldots,f_{k(M_k)}^*(x_k))^{\top}$
+is a vector of $M_k$ smooth functions of $x_k$, where $M_k=\mathtt{ncol}(\bH_k)$ 
+is the rank of the constraint matrix for $x_k$. Note that standard error bands 
+are available upon plotting the estimated component functions (details at \cite{yee:wild:1996}),
+e.g., see Figure \ref{fig:poz:deermice}.
+
+
+
+%*********************************************************************
+\section[VGLMs and VGAMs applied to CR data]{VGLMs and VGAMs applied to CR data}
+\label{sec:meth}
+
+
+In this section we merge the results of Sections \ref{sec:cr}
+and \ref{sec:vgam} to show how the eight models of \citet{otis:etal:1978} 
+can be fitted naturally within the VGLM/VGAM framework.
+
+
+% ---------------------------------------------------------------
+\subsection[Linear predictors and constraint matrices]{Linear predictors and constraint matrices}
+\label{sec:constraints}
+
+
+As in Section \ref{sec:vgam.basics}, we now write $\biy_i$ as the 
+capture history vector for individual $i$. Written technically, 
+$\biy_i \in (\{0,1\})^{\tau} \backslash\{\bzero_\tau\}$ so that 
+there is at least one 1 (capture). For simplicity let $p_c$ and $p_r$ 
+be the capture and recapture probabilities. Recall that the value 
+for $M$ will depend on the CR model type and the number of 
+capture occasions considered in the experiment,
+for example, consider model $\calM_b$ as in 
+Table \ref{tab1}, then $(\eta_1,\eta_2)=(g(p_c),g(p_r))$
+for some link function $g$, thus $M=2$. The upper half of Table \ref{tab2} 
+gives these for the eight \citet{otis:etal:1978} 
+models. The lower half of Table \ref{tab2} gives
+the names of the \pkg{VGAM} family function that fits those 
+models. They work very similarly to the \code{family} argument 
+of \code{glm()}, e.g.,
+
+
+
+<<label = example-posber, eval = FALSE, prompt = FALSE>>=
+vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+           family = posbernoulli.t, data = pdata)
+@
+
+
+
+is a simple call to fit a $\calM_{th}$ model. The response is a 
+matrix containing 0 and 1 values only, and three individual covariates
+are used here. The argument name \code{family} was chosen for not
+necessitating \code{glm()} users learning a new argument
+name; and the concept of error distributions as for the GLM
+class does not carry over for VGLMs. Indeed, \code{family} denotes some 
+full-likelihood specified statistical model worth fitting in its own right 
+regardless of an `error distribution' which may not make sense.
+Each family function has \code{logit()} as their default link, 
+however, alternatives such as \code{probit()} and \code{cloglog()} 
+are also permissible. Section \ref{sec:software} discusses the software side 
+of \pkg{VGAM} in detail, and Section \ref{sec:body:exam} gives more examples.
+
+
+
+As noted above, constraint matrices are used to simplify complex
+models, e.g., model $\calM_{tbh}$ into model $\calM_{th}$. The default 
+constraint matrices for the $\calM_{tbh}(\tau)$ model are given 
+in Table \ref{tab3}. These are easily constructed using the
+\code{drop.b}, \code{parallel.b} and \code{parallel.t}
+arguments in the family function. More generally, the $\bH_k$
+may be inputted using the \code{constraints} 
+argument---see \cite{yee:2008} and \cite{yee:2010} 
+for examples. It can be seen that the building blocks of 
+the $\bH_k$ are \bone, \bzero, \bI{} and \bOO. 
+This is because one wishes to constrain the effect of $x_k$ 
+to be the same for capture and recapture probabilities. In general, 
+we believe the $\bH_k$ in conjunction with (\ref{eq:vglimo:xij.vector.diag}) 
+can accommodate all linear constraints between the estimated regression 
+coefficients $\widehat{\beta}_{(j)k}$.
+
+
+
+For time-varying covariates models, the $M$ diagonal elements $x_{ikj}$
+in (\ref{eq:vglimo:xij.vector.diag}) correspond to the value of
+covariate $x_k$ at time $j$ for individual $i$. These are inputted
+successively in order using the \code{xij} argument, e.g., as in
+Section \ref{sec:poz:posbernoulli.eg.hugg:1991}.
+
+
+
+\clearpage
+% ---------------------------------------------------------------
+\subsection[Penalized likelihood and smoothing parameters]{Penalized 
+likelihood and smoothing parameters}
+\label{sec:gam}
+
+
+
+For each covariate $x_{k}$, the smoothness of each component 
+function $f^{*}_{(j)k}$ in (\ref{eq:vgam}) can be controlled 
+by the non-negative smoothing parameters $\lambda_{(j)k}$. 
+\cite{yee:wild:1994a} show that, when vector splines are used 
+as the smoother, the penalized conditional log-likelihood 
+\begin{eqnarray}
+\label{eq:posbern.pen.condlikelihood}
+\ell_p  \equiv \log\, L_p  =  \ell_c - \frac12 \sum_{k=1}^d 
+\sum_{j=1}^{\mathtt{ncol}(\mathbf{H}_k)}\,\lambda_{(j)k}\int_{a_k}^{b_k} 
+\left\{f^{*''}_{(j)k}(t) \right\}^2 {\rm d}t 
+\end{eqnarray}
+is maximized. Here, $\ell_c$ is the logarithm of the conditional likelihood 
+function (\ref{eq:posbern.condlikelihood}). 
+The penalized conditional likelihood (\ref{eq:posbern.pen.condlikelihood}) 
+is a natural extension of the penalty approach described 
+in \citet{gree:silv:1994} to models with multiple $\eta_j$.
+
+
+
+An important practical issue is to control for overfitting and 
+smoothness in the model. The \code{s()} function used within \code{vgam()} 
+signifies the smooth functions $f^{*}_{(j)k}$ estimated by vector splines, 
+and there is an argument \code{spar} for the smoothing parameters, 
+and a relatively small (positive) value will mean much flexibility and wiggliness. 
+As \code{spar} increases the solution converges to the least squares 
+estimate. More commonly, the argument \code{df} is used, and this is 
+known as the equivalent degrees of freedom (EDF). A value of unity 
+means a linear fit, and the default is the value 4 which affords
+a reasonable amount of flexibility.
+
+
+
+\renewcommand{\arraystretch}{1.3}
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{ll}
+\hline 
+\hline 
+% --------------------------------------
+Model       & $\bm{\eta}^{\top}$ \\
+\hline
+$\calM_{0}$/$\calM_{h}$ & $g(p)$ \\
+% --------------------------------------
+$\calM_{b}$/$\calM_{bh}$ & $(g(p_c), g(p_r))$ \\
+% --------------------------------------
+$\calM_{t}$/$\calM_{th}$ & $(g(p_{1}),\ldots,g(p_{\tau}))$ \\ 
+% --------------------------------------
+$\calM_{tb}$/$\calM_{tbh}$ \ \ \ &
+$(g(p_{c1}),\ldots,g(p_{c\tau}),g(p_{r2}),\ldots,g(p_{r\tau}))$ \\
+\hline
+% --------------------------------------
+% --------------------------------------
+\hline
+Model \ \ \ \ \ & \code{family =} \\
+\hline
+%--------------------------------------
+$\calM_{0}$/$\calM_{h}$ & \code{posbinomial(omit.constant = TRUE)} \\
+& \code{posbernoulli.b(drop.b = FALSE \mytilde{} 0)} \\
+& \code{posbernoulli.t(parallel.t = FALSE \mytilde{} 0)} \\
+& 
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 0, parallel.t = FALSE \mytilde{} 0)} \\
+% --------------------------------------
+$\calM_{b}$/$\calM_{bh}$ &
+\code{posbernoulli.b()} \\
+&
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 1, parallel.t = FALSE \mytilde{} 0)} \\
+% --------------------------------------
+$\calM_{t}$/$\calM_{th}$ &
+\code{posbernoulli.t()} \\
+ &
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 0, parallel.t = FALSE \mytilde{} 1)} \\
+% --------------------------------------
+$\calM_{tb}$/$\calM_{tbh}$ \ \ \ &
+\code{posbernoulli.tb()} \\
+\hline
+% --------------------------------------
+\end{tabular}
+\end{center}
+\caption{Upper table gives the $\boldeta$ for the eight \citet{otis:etal:1978} 
+models. Lower table gives the relationships between the eight models 
+and function calls. See Table \ref{tab1} for definitions. 
+The $g=\logit$ link is default for all.\label{tab2}}
+\end{table}
+\renewcommand{\arraystretch}{1.0}
+
+
+%*********************************************************************
+\section[Software details for CR models in VGAM]{Software details for 
+CR models in \pkg{VGAM}}
+\label{sec:software}
+
+
+Having presented the conditional likelihood (\ref{eq:posbern.condlikelihood})
+and VGLMs/VGAMs for CR models, we further discuss the fitting in \pkg{VGAM}. 
+It is assumed that users are somewhat familiar with modelling in \proglang{R} 
+and using \code{glm()} class objects. \pkg{VGAM}, authored by TWY, uses S4 classes.
+In order to present the new \code{family} functions developed for \code{vglm()} 
+and \code{vgam()}, some additional preliminaries for \pkg{VGAM} are given below. 
+Version 0.9-4 or later is assumed, and the latest prerelease version is 
+available at \url{http://www.stat.auckland.ac.nz/ yee/VGAM/prerelease}.
+
+
+In \code{vglm()}/\code{vgam()}, both $\calM_0$ and $\calM_h$ 
+are serviced by \code{family = posbinomial()}, i.e., the 
+positive-binomial family. For models $\calM_{b}$, $\calM_{t}$ 
+and $\calM_{tb}$, each of these are serviced by their 
+corresponding \code{family = posbernoulli.}-type functions 
+as in Table \ref{tab2}. Formulas of the form \code{\mytilde{} 1} 
+correspond to $\calM_{0}$, $\calM_{b}$, $\calM_{t}$ and $\calM_{tb}$;
+otherwise they are $\calM_{h}$, $\calM_{bh}$,$\calM_{th}$ and $\calM_{tbh}$.
+
+
+Below we describe each of the eight models with their \pkg{VGAM}
+representation and their default values, we also give additional remarks.
+All eight models can be fit using \code{posbernoulli.tb()}, it is generally 
+not recommended as it is less efficient in terms of memory requirements and speed.
+
+
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{ccc}
+& \multicolumn{1}{c}{\code{ parallel.t}} 
+& \code{!parallel.t} \\[0.9em]
+\cline{2-3}
+% --------------------------------------
+\multicolumn{1}{r|}{\code{ parallel.b}} &
+\multicolumn{1}{r|}{
+%
+$\left(
+\begin{array}{ll}
+\bzero_\tau & \bone_\tau\\
+\bone _{\tau-1} \  & \bone_{\tau-1}
+\end{array} 
+\right)$,
+$\left(
+\begin{array}{l}
+\bone_\tau\\
+\bone_{\tau-1}
+\end{array} 
+\right)$ 
+}
+& % ---------------------------------------------
+\multicolumn{1}{r|}{ 
+$\left(
+\begin{array}{ll}
+\bzero_\tau & \bI_\tau\\
+\bone _{\tau-1} \  & \bI_{{\tau}[-1,]}
+\end{array} 
+\right)$,
+$\left(
+\begin{array}{l}
+\bI_\tau\\
+\bI_{{\tau}[-1,]}
+\end{array} 
+\right)$ 
+}
+\\[1.5em] % This gives problems
+% --------------------------------------
+\cline{2-3}
+% --------------------------------------
+\multicolumn{1}{r|}{\code{!parallel.b}} &
+\multicolumn{1}{r|}{
+$\left(
+\begin{array}{ll}
+\bOO_{\tau \times (\tau-1)} \  & \bone_{\tau}\\
+\bI_{\tau-1} & \bone_{\tau-1} 
+\end{array} 
+\right)$,
+%
+$\left(
+\begin{array}{l}
+\bone_\tau\\
+\bone_{\tau-1}
+\end{array} 
+\right)$ 
+}
+& % --------------------------------------
+\multicolumn{1}{r|}{ 
+$\left(
+\begin{array}{ll}
+\bOO_{\tau \times(\tau-1)} \  & \bI_{\tau}\\
+\bI_{\tau-1}& \bI_{{\tau}[-1,]}
+\end{array} 
+\right)$,
+%
+$\left(
+\begin{array}{l}
+\bI_\tau\\
+\bI_{{\tau}[-1,]}
+\end{array} 
+\right)$ 
+} 
+\\[1.5em]
+% --------------------------------------
+\cline{2-3}
+\end{tabular}
+\end{center}
+\caption{ For the general $\calM_{tbh}(\tau)$
+family \code{posbernoulli.tb()}, 
+the constraint matrices corresponding to the arguments \code{parallel.t},
+\code{parallel.b} and \code{drop.b}. In each cell the 
+LHS matrix is $\bH_k$ when \code{drop.b} is \code{FALSE} 
+for $x_k$. The RHS matrix is when \code{drop.b} is \code{TRUE} 
+for $x_k$; it simply deletes the left submatrix of $\bH_k$.
+These $\bH_k$ should be seen in light of Table \ref{tab2}.
+Notes:
+(i) the default for \code{posbernoulli.tb()}
+is $\bH_1 = $ the LHS matrix of the top-right cell
+and $\bH_k = $ the RHS matrix of the top-left cell; and
+(ii) $\bI_{{\tau}[-1,]} = (\bzero_{\tau-1} | \bI_{\tau-1})$.
+\label{tab3}}
+\end{table}
+
+
+% ---------------------------------------------------------------
+\subsection[Basic software details]{Basic software details}
+\label{sec:furthersoftware}
+
+
+All family functions except \code{posbinomial()} should have
+a $n\times\tau$ capture history matrix as the response, preferably 
+with column names. Indicators of the past capture of individual $i$, 
+defined as $z_{ij}$, are stored on \pkg{VGAM} objects as the \code{cap.hist1}
+component in the \code{extra} slot. Also, there is a component
+called \code{cap1} which indicates on which sampling occasion the
+first capture occurred.
+
+
+As will be illustrated in Section \ref{sec:poz:posbernoulli.eg.hugg:1991}, 
+a fitted CR object stores the point estimate for the population
+size estimator (\ref{eq:HT}), in the \code{extra} slot with
+component name \code{N.hat}. Likewise, its standard error (\ref{eq:est.varNhat2}) 
+has component name \code{SE.N.hat}. By default all the family functions
+return fitted values corresponding to the probabilities in the conditional 
+likelihood function (\ref{eq:posbern.condlikelihood}), however,
+Appendix B describes an alternative type of
+fitted value; the choice is made by the argument \code{type.fitted}, 
+and the fitted values are returned by the \code{fitted()} methods function.
+
+
+Notice that in Table \ref{tab2}, the \pkg{VGAM} family functions have 
+arguments such as \verb+parallel.b+ which may be assigned a logical or 
+else a formula with a logical as the response. If it is a single logical 
+then the function may or may not apply that constraint to the intercept. 
+The formula is the most general and some care must be taken with the 
+intercept term. Here are some examples of the syntax:
+\begin{itemize}
+\item \code{parallel.b = TRUE \mytilde{} x2} means a parallelism assumption is
+applied to variables $x_2$ and the intercept, since
+formulas include the intercept by default. 
+ 
+\item \code{parallel.b = TRUE \mytilde{} x2-1} means a parallelism assumption is
+applied to variable $x_2$ only.
+ 
+\item \code{parallel.b = FALSE \mytilde{} 0} means a parallelism assumption 
+is applied to every variable including the intercept.
+\end{itemize}
+
+
+% ---------------------------------------------------------------
+\subsection[Models M0/Mh]{Models $\calM_0$/$\calM_h$}
+\label{sec:M0Mh}
+
+
+For $\calM_0$/$\calM_h$, the defaults are given as
+
+
+<<label = poz-args-posbinomial>>=
+args(posbinomial)
+@ 
+
+
+
+Both models can alternatively be fitted using \code{posbernoulli.t()},
+\code{posbernoulli.b()} and\\ \code{posbernoulli.tb()} by setting the
+appropriate constrains/arguments (Table \ref{tab2}). For example, 
+setting \code{posbernoulli.t(parallel.t = FALSE \mytilde{} 0)}
+constrains all the $p_{j}$ to be equal.
+
+
+If comparing all eight models using \code{AIC()} or \code{BIC()} then 
+setting \code{omit.constant = TRUE} will allow for comparisons to be 
+made with the positive-Bernoulli functions given below. The reason is that 
+this omits the log-normalizing constant $\log{\tau \choose \tau{y}_i^{*}}$
+from its conditional log-likelihood so that it is comparable with the logarithm
+of (\ref{eq:posbern.condlikelihood}).
+
+
+An extreme case for $\calM_h$ is where $p_{ij} = p_i$ with $p_i$ being
+parameters in their own right \citep{otis:etal:1978}. While this could
+possibly be fitted by creating a covariate of the form \code{factor(1:n)} 
+there would be far too many parameters for comfort. Such an extreme 
+case is not recommended to avoid over-parameterization.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mt/Mth]{Models $\calM_t$/$\calM_{th}$}
+\label{sec:MtMth}
+
+
+<<label = poz-args-posbernoulli-t>>=
+args(posbernoulli.t)
+@ 
+
+
+
+Note that for $\calM_t$, capture probabilities are the same for each 
+individual but may vary with time, i.e., $H_0: p_{ij} = p_{j}$.
+One might wish to constrain the probabilities of a subset
+of sampling occasions to be equal by forming the appropriate
+constraint matrices.
+
+
+Argument \code{iprob} is for an optional initial value for the
+probability, however all \pkg{VGAM} family functions are self-starting
+and usually do not need such input.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mb/Mbh]{Models $\calM_b$/$\calM_{bh}$}
+\label{sec:MbMbh}
+
+
+<<label = poz-args-posbernoulli-b>>=
+args(posbernoulli.b)
+@
+
+
+
+Setting \code{drop.b = FALSE \mytilde{} 0} assumes 
+there is no behavioural effect and this reduces to $\calM_0$/$\calM_h$.
+The default constraint matrices are
+\[\bH_1 =\left(\begin{array}{cc}
+0 & 1 \\ 1 & 1 \\
+\end{array} \right), \ \ \ \ \bH_2 = \cdots = \bH_d =
+\left(\begin{array}{c}
+1 \\ 1 \\
+\end{array}\right)\]
+so that the first coefficient $\beta_{(1)1}^{*}$ corresponds to the 
+behavioural effect. Section \ref{sec:poz:posbernoulli.eg.ephemeral}
+illustrates how the VGLM/VGAM framework can handle short-term and
+long-term behavioural effects.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mtb/Mtbh]{Models $\calM_{tb}$/$\calM_{tbh}$}
+\label{sec:MtbMtbh}
+
+
+There are three arguments which determine whether there 
+are behavioural effects and/or time effects: \code{parallel.b},
+\code{parallel.t} and \code{drop.b}. The last two are as above.
+The defaults are
+
+
+
+<<label = poz-args-posbernoulli-tb>>=
+args(posbernoulli.tb)
+@
+
+
+
+One would usually want to keep the behavioural effect to be equal over
+different sampling occasions, therefore \code{parallel.b} should be
+normally left to its default. Allowing it to be \code{FALSE} for a
+covariate $x_k$ means an additional $\tau-1$ parameters, something
+that is not warranted unless the data set is very large and/or the
+behavioural effect varies greatly over time.
+
+
+
+Arguments \code{ridge.constant} and \code{ridge.power}
+concern the working weight matrices and are explained in 
+Appendix A. 
+
+
+
+Finally, we note that using
+
+
+
+<<label = poz-posbernoulli-tb-gen, prompt=FALSE, eval=FALSE>>=
+vglm(..., family = posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0,
+                                   drop.b = TRUE ~ 0))
+@
+
+
+
+fits the most general model. Its formula is effectively (\ref{gammod2})
+for $M=2\tau-1$, hence there are $(2\tau-1)d$ regression coefficients
+in total---far too many for most data sets.
+
+
+
+%*********************************************************************
+\newpage
+\section[Examples]{Examples}
+\label{sec:body:exam}
+
+
+We present several examples using \pkg{VGAM} on both real-life and
+simulated CR data.
+
+
+% ---------------------------------------------------------------
+\subsection[Deer mice]{Deer mice}
+\label{sec:deer.mice}
+
+
+Our first example uses a well-known data set analyzed in both \citet{hugg:1991} 
+and \citet{amst:mcdo:manl:2005}. The CR data was collected on the deer mouse 
+(\textit{Peromyscus maniculatus}), a small rodent native to North America, 
+and about 8 to 10 cm long, not counting the length of the tail. There 
+were $n=38$ individual mice caught over $\tau=6$ trapping occasions. 
+Individual body weight, sex and age (young or adult) were also recorded, 
+which we used as covariates to model heterogeneity. The data are given 
+in the following data frame \code{deermice}:
+
+
+<<label = eg-deermice-look>>=
+head(deermice, 4)
+@
+
+
+Each row represents the capture history followed by the corresponding 
+covariate values for each observed individual. We compared our 
+results with those given in \citet{hugg:1991}, who
+reported an analysis which involved fitting all eight model variations. 
+Prior to this we relabelled the age and sex covariates to match those
+given in \citet{hugg:1991}.
+
+
+<<label = example1-model>>=
+deermice <- within(deermice, {
+  age <- 2 - as.numeric(age) 
+  sex <- 1 - as.numeric(sex)
+})
+@
+
+
+Below we demonstrate model fitting for each model in \pkg{VGAM}:
+
+
+<<label = example2-model>>=
+M.0 <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1,
+            posbernoulli.t(parallel = TRUE ~ 1), data = deermice)
+M.b <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.b, data = deermice)
+M.t <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.t, data = deermice)
+M.h <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+            posbernoulli.t(parallel = TRUE ~ weight + sex + age), data = deermice)
+M.th <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.t, data = deermice)
+M.tb <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+             posbernoulli.tb, data = deermice)
+M.bh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.b, data = deermice)
+M.tbh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+              posbernoulli.tb, data = deermice)
+@
+
+
+Notice that \code{parallel = TRUE} was used for models $\calM_{0}/\calM_{h}$. 
+Population size estimates with standard errors (SE), log-likelihood and 
+AIC values, can all be easily obtained using the following, 
+for example, consider model $\calM_{bh}$:
+
+
+<<label = eg-deermice-Nhat>>=
+c(M.bh at extra$N.hat, M.bh at extra$SE.N.hat)
+c(logLik(M.bh), AIC(M.bh))
+@
+
+
+We did this for each model, and obtained the following:
+
+
+<<maketable, echo=FALSE, results=hide, message=FALSE, warning=FALSE>>=
+
+Table <- rbind(c(round(M.tbh at extra$N.hat,2), 
+                 round(M.bh at extra$N.hat,2), 
+                 round(M.tb at extra$N.hat,2), 
+                 round(M.th at extra$N.hat,2), 
+                 round(M.h at extra$N.hat,2), 
+                 round(M.b at extra$N.hat,2),
+                 round(M.t at extra$N.hat,2), 
+                 round(M.0 at extra$N.hat,2)), 
+               
+              c(round(M.tbh at extra$SE.N.hat,2), 
+                round(M.bh at extra$SE.N.hat,2), 
+                round(M.tb at extra$SE.N.hat,2),
+                round(M.th at extra$SE.N.hat,2), 
+                round(M.h at extra$SE.N.hat,2), 
+                round(M.b at extra$SE.N.hat,2),
+                round(M.t at extra$SE.N.hat,2), 
+                round(M.0 at extra$SE.N.hat,2)), 
+               
+              -2*c(round(logLik(M.tbh),2), 
+                   round(logLik(M.bh),2), 
+                   round(logLik(M.tb),2), 
+                   round(logLik(M.th),2), 
+                   round(logLik(M.h),2), 
+                   round(logLik(M.b),2),
+                   round(logLik(M.t),2), 
+                   round(logLik(M.0),2)), 
+               
+              c(round(AIC(M.tbh),2), 
+                round(AIC(M.bh),2), 
+                round(AIC(M.tb),2), 
+                round(AIC(M.th),2),
+                round(AIC(M.h),2), 
+                round(AIC(M.b),2), 
+                round(AIC(M.t),2), 
+                round(AIC(M.0),2)));
+
+colnames(Table) <- c("M.tbh", "M.bh", "M.tb", 
+                     "M.th", "M.h", "M.b", "M.t", "M.0");
+rownames(Table) <- c("N.hat", "SE","-2ln(L)", "AIC");
+@
+
+
+
+<<label = example2-table>>=
+Table
+@
+
+
+Based on the AIC, it was concluded that $\calM_{bh}$ was 
+superior (although other criteria can also be considered), 
+yielding the following coefficients (as well as their SEs):
+
+
+<<label = poz-posbernoulli-eg-deermice-coefs>>=
+round(coef(M.bh), 2)
+round(sqrt(diag(vcov(M.bh))), 2)
+@
+
+
+which, along with the estimates for the population size, 
+agree with the results of \citet{hugg:1991}.
+The first coefficient, \Sexpr{round(coef(M.bh)[1],2)}, is positive 
+and hence implies a trap-happy effect.
+
+
+Now to illustrate the utility of fitting VGAMs, we performed some model
+checking on $\calM_{bh}$ by confirming that the component function
+of \code{weight} is indeed linear. To do this, we smoothed this covariate 
+but did not allow it to be too flexible due to the size of the data set.
+
+
+<<label = poz-posbernoulli-eg-deermice-smooth, fig.keep = 'none', message=FALSE, warning=FALSE>>=
+fit.bh <- vgam(cbind(y1, y2, y3, y4, y5, y6) ~ s(weight, df = 3) + sex + age,
+               posbernoulli.b, data = deermice)
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5)
+@
+
+
+Notice that the \code{s()} function was used to smooth over 
+the weight covariate with the equivalent degrees of freedom set 
+to 3. Plots of the estimated component functions against each 
+covariate are given in Figure \ref{fig:poz:deermice}.
+In general, \code{weight} does seem to have a (positive) 
+linear effect on the logit scale. Young deer mice
+appear more easily caught compared to adults,
+and gender seems to have a smaller effect than weight.
+A more formal test of linearity is
+
+
+<<label = poz-posbernoulli-eg-deermice-summary>>=
+summary(fit.bh)
+@ 
+
+
+and not surprisingly, this suggests there is no significant nonlinearity.
+This is in agreement with Section 6.1 of \citet{hwan:hugg:2011} 
+who used kernel smoothing.
+
+
+Section \ref{sec:poz:posbernoulli.eg.ephemeral}
+reports a further analysis of the \code{deermice} data using a
+behavioural effect comprising of long-term and
+short-term memory.
+
+
+<<label = poz-posbernoulli-eg-deermice-smooth-shadow, eval=FALSE, echo = FALSE, message=FALSE, warning=FALSE>>=
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+@
+
+
+
+
+
+% ---------------------------------------------------------------------
+\begin{figure}[tt]
+\begin{center}
+<<plot-deermice, width=6.0, height=5, echo=FALSE, message=FALSE, warning=FALSE>>=
+par(mfrow = c(2, 2))
+par(las = 1, cex = 1.1, mar = c(3.8, 4, 0.5, 0.2) + 0.1)
+par(mgp = c(2.3, 1, 0))  # Default is c(3, 1, 0)
+
+
+
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+
+# < < poz-posbernoulli-eg-deermice-smooth-shadow> >
+
+
+
+
+@
+\caption{Estimated component functions with approximate $\pm 2$ 
+pointwise SE bands fitting a $\calM_{bh}$-VGAM, using 
+the \code{deermice} data. The rugplot gives jittered values of 
+each covariate value $x_{ik}$.\label{fig:poz:deermice}
+}
+\end{center}
+\end{figure}
+
+
+% ---------------------------------------------------------------------
+
+
+<<birds91read, echo = FALSE>>=
+data("prinia", package = "VGAM")
+@
+
+
+\subsection[Yellow-bellied Prinia]{Yellow-bellied Prinia}
+\label{sec:bird}
+
+
+Our second example also uses a well-known and well-studied data 
+set collected on the Yellow-bellied Prinia (\textit{Prinia flaviventris}),
+a common bird species located in Southeast Asia. A CR experiment was 
+conducted at the Mai Po Nature Reserve in Hong Kong during 1991, 
+where captured individuals had their wing lengths measured and 
+fat index recorded. A total of $\tau=19$ weekly capture occasions 
+were considered, where $n=151$ distinct birds were captured. In previous
+studies, models $\calM_h$ and $\calM_{th}$ have both been fitted 
+to these data, where both wing length and fat index were used as
+covariates. We focus our attention on the former model, and considered
+the \code{posbinomial()} function, with some further emphasis on
+demonstrating smoothing on covariates. The \code{prinia} data
+consists of four columns and rows corresponding to each observed
+individual:
+
+
+<<label = example2a, size = "small">>=
+head(prinia, 4)[, 1:4]
+@
+
+
+The first two columns give the observed covariate values for each
+individual, followed by the number of times each individual was captured/not
+captured respectively (columns 3--4). Notice that the wing
+length (\code{length}) was standardized here.
+We considered smoothing over the wing length, 
+and now plotted the fitted capture probabilities with and 
+without fat content against wing length present, see Figure \ref{fig:bird}.
+
+
+<<label = example2b>>=
+M.h.GAM <- 
+  vgam(cbind(cap, noncap) ~ s(length, df = 3) + fat, 
+       posbinomial(omit.constant = TRUE, parallel = TRUE ~ s(length, df = 3) + fat),
+       data = prinia)
+M.h.GAM at extra$N.hat     
+M.h.GAM at extra$SE.N.hat  
+@
+
+
+<<label = eg-bird-smooth-shadow1, echo=FALSE, fig.keep = 'none', message = FALSE, warning = FALSE>>=
+plot.info <- plot(M.h.GAM,
+                  se = TRUE, las = 1, plot.arg = FALSE,
+                  lcol = "blue",
+                  scol = "orange",
+                  rcol = "purple",
+                  scale = 5)
+@
+
+
+
+<<label = eg-bird-smooth-shadow2, echo=FALSE, eval=FALSE>>=
+info.fit2 <- plot.info at preplot[[1]]
+fat.effect <- coef(M.h.GAM)["fat"] 
+intercept <- coef(M.h.GAM)["(Intercept)"]  
+
+ooo <- order(info.fit2$x)
+centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+
+plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+                                     centering.const + info.fit2$y[ooo],
+                        lin.pred.0 = intercept + fat.effect * 0 +
+                                     centering.const + info.fit2$y[ooo],
+                        x2 = info.fit2$x[ooo])
+
+plotframe <- transform(plotframe,
+                       up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+                       up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+
+plotframe <- transform(plotframe,
+                       fv.b    = logit(lin.pred.b,    inverse = TRUE),
+                       up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+                       lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+                       fv.0    = logit(lin.pred.0,    inverse = TRUE),
+                       up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+                       lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+
+with(plotframe,
+     matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+             lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+             main = "", ylab = "", xlab = "Wing length (standardized)"))
+mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+                         col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+       lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+@
+
+
+
+
+
+
+
+
+\begin{figure}[tt]
+\begin{center}
+<<plot-bird, width=6.0, height=5.5, echo=FALSE, message=FALSE, warning=FALSE>>=
+par(mfrow = c(1, 1))
+
+
+
+info.fit2 <- plot.info at preplot[[1]]
+fat.effect <- coef(M.h.GAM)["fat"] 
+intercept <- coef(M.h.GAM)["(Intercept)"]  
+
+ooo <- order(info.fit2$x)
+centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+
+plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+                                     centering.const + info.fit2$y[ooo],
+                        lin.pred.0 = intercept + fat.effect * 0 +
+                                     centering.const + info.fit2$y[ooo],
+                        x2 = info.fit2$x[ooo])
+
+plotframe <- transform(plotframe,
+                       up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+                       up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+
+plotframe <- transform(plotframe,
+                       fv.b    = logit(lin.pred.b,    inverse = TRUE),
+                       up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+                       lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+                       fv.0    = logit(lin.pred.0,    inverse = TRUE),
+                       up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+                       lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+
+with(plotframe,
+     matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+             lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+             main = "", ylab = "", xlab = "Wing length (standardized)"))
+mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+                         col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+       lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+
+
+
+# < < eg-bird-smooth-shadow2 > >
+    
+    
+    
+@
+\caption{
+Capture probability estimates with approximate $\pm 2$ pointwise SEs, 
+versus wing length with (blue) and without (orange) fat content present 
+fitting a $\calM_{h}$-VGAM, using the \code{prinia} data. 
+Notice that the standard errors are wider at the boundaries.
+\label{fig:bird}
+}
+\end{center}
+\end{figure}
+
+
+Both the estimates for the population size and shape of the fitted 
+capture probabilities with smoothing (Figure \ref{fig:bird}) matched 
+those in previous studies, e.g., see Figure 1 of \citet{hwan:hugg:2007}. 
+Notice that capture probabilities are larger for individuals with 
+fat content present, also the approximate $\pm 2$ pointwise SEs 
+become wider at the boundaries---this feature is commonly seen
+in smooths.
+
+
+% ------------------------------------------------------------
+\subsection[A time-varying covariate example]{A time-varying covariate example}
+\label{sec:poz:posbernoulli.eg.hugg:1991}
+
+
+To illustrate time-varying covariates in the $\calM_{th}$ 
+and $\calM_{tbh}$ model via the \code{xij} argument, we 
+mimicked the results of \citet{hugg:1989} who fitted the $\calM_{tbh}$ 
+model to a small simulated data set of $n=18$ observed individuals 
+and $\tau=10$ trapping occasions. To help illustrate the procedure 
+we also fitted model $\calM_{th}$. The true population was $N=20$. 
+For the $i$th individual, model $\calM_{th}$ will be written 
+as ($i=1,\ldots,18$, $j=1,\ldots,10$)
+\begin{eqnarray}
+\label{eq:huggins89t0}
+\logit \, p_{ij} & = & \beta_{(1)1}^{*} + \beta_{(1)2}^{*} 
+\cdot \mathtt{x2}_{i} + \beta_{(1)3}^{*} \cdot \mathtt{x3}_{j}, \ \ %%
+\end{eqnarray}
+and model $\calM_{tbh}$ will be written as ($i=1,\ldots,18$, $j=1,\ldots,10$)
+\begin{eqnarray}
+\label{eq:huggins89t1}
+\logit \, p_{ij} &=&
+\beta_{(1)1}^{*} \,z_{ij} + \beta_{(2)1}^{*} + \beta_{(1)2}^{*} 
+\cdot \mathtt{x2}_{i} + \beta_{(1)3}^{*} \cdot \mathtt{x3}_{j}, \ \ \ %%
+\end{eqnarray}
+where $\beta_{(1)1}^{*}$ in (\ref{eq:huggins89t1}) is the behavioural 
+effect, and $z_{ij}$ is defined in Table \ref{tab0}. Variable \code{x2} 
+is an ordinary individual covariate such as weight, as in the previous 
+examples. The variable \code{x3} is a time-varying or occasion-specific 
+covariate such as temperature or daily rainfall that is handled using 
+the \code{xij} argument described in Section \ref{sec:xij}. Note that 
+the environmental covariates are involved in the $\eta_j$ for individuals 
+that have not been and have been previously captured so that if behavioural
+response is included in the model (e.g., $\calM_{tbh}$) these must be
+repeated to construct the overall model matrix. Also,
+note that there can be no recaptures on the first occasion so that
+the environmental variable for this occasion 
+need not be repeated. We first examined the data
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-data>>=
+head(Huggins89table1, 4)
+@
+
+
+The time-varying/occasion-specific covariate variable \code{x3} 
+is represented by variables \code{t01}--\code{t10}. As noted above, we need 
+to construct the \code{T02}--\code{T10} to model the recapture probabilities 
+through $\eta_{j}$ for $j=11,\ldots,19$
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-look>>=
+Hdata <- transform(Huggins89table1, x3.tij = t01,
+                   T02 = t02, T03 = t03, T04 = t04, T05 = t05, T06 = t06,
+                   T07 = t07, T08 = t08, T09 = t09, T10 = t10)
+Hdata <- subset(Hdata,
+                y01 + y02 + y03 + y04 + y05 + y06 + y07 + y08 + y09 + y10 > 0)
+@
+  
+  
+The last step deletes the two observations which were never caught, such that $n=18$. 
+Thus model (\ref{eq:huggins89t0}) can be fitted by
+
+
+<<label = poz-posbernoulli-th-huggins89t0-fit>>=
+fit.th <-
+   vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+        xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                            t09 + t10 - 1),
+        posbernoulli.t(parallel.t = TRUE ~ x2 + x3.tij), 
+        data = Hdata, trace = FALSE, 
+        form2 = ~ x2 + x3.tij + t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                                t09 + t10)
+@
+
+
+The \code{form2} argument is required if \code{xij} is used 
+and it needs to include all the variables in the model. It is from
+this formula that a very large model matrix is constructed, from which
+the relevant columns are extracted to construct the diagonal matrix
+in (\ref{eq:vglimo:xij.vector.diag}) in the specified order of diagonal 
+elements given by \code{xij}. Their names need to be uniquely specified.
+To check the constraint matrices we can use
+
+
+<<label = poz-posbernoulli-th-huggins89t0-constraints>>=
+constraints(fit.th, matrix = TRUE)
+@
+
+
+Model (\ref{eq:huggins89t1}) can be fitted by
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-fit>>=
+fit.tbh <-
+  vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+       xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 +
+                                 t07 + t08 + t09 + t10 +
+                                 T02 + T03 + T04 + T05 + T06 +
+                                 T07 + T08 + T09 + T10 - 1),
+       posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+       data = Hdata, trace = FALSE,
+       form2 = ~  x2 + x3.tij +
+                  t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 + t09 + t10 +
+                        T02 + T03 + T04 + T05 + T06 + T07 + T08 + T09 + T10)
+@
+
+
+To compare with model (\ref{eq:huggins89t0}) we have
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-aic>>=
+c(logLik(fit.th), AIC(fit.th))
+c(logLik(fit.tbh), AIC(fit.tbh))
+@
+
+
+so that the behavioural response model does indeed give a better fit. 
+To check, the constraint matrices are (cf., Table \ref{tab3})
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-constraints>>=
+head(constraints(fit.tbh, matrix = TRUE), 4)
+tail(constraints(fit.tbh, matrix = TRUE), 4)
+@
+
+
+The coefficients $\widehat{\beta}_{(j)k}^{*}$ and their standard errors are
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-coefs>>=
+coef(fit.tbh)
+sqrt(diag(vcov(fit.tbh))) 
+@
+
+
+The first coefficient, \Sexpr{round(coef(fit.tbh)[1], 2)}, is positive and
+hence implies a trap-happy effect. The Wald statistic for the behavioural effect,
+being \Sexpr{round(c(coef(fit.tbh) / sqrt(diag(vcov(fit.tbh))))[1], 2)},
+suggests the effect is real.
+
+
+Estimates of the population size can be obtained from
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-Nhat>>=
+fit.tbh at extra$N.hat   
+fit.tbh at extra$SE.N.hat
+@
+
+
+This compares with $\widehat{N}=20.86$ with a standard error 
+of $4.51$ \citep{hugg:1989}.
+
+
+In closing, we refit model \code{fit.tbh} using
+\code{Select()} to illustrate the avoidance of
+manual specification of cumbersome formulas and response matrices
+with many columns. For example, suppose \code{pdata} is a data frame
+with columns \code{y01}, \code{y02}, \ldots, \code{y30}.
+Then \code{Select(pdata, "y")} will return the matrix 
+\code{cbind(y01, y02, \ldots, y30)} if there are no other 
+variables beginning with \code{"y"}.
+
+
+Starting with \code{Huggins89table1}, the following
+code works quite generally provided the original variables
+are labelled as \code{y01}, \code{y02}, \ldots,
+and \code{t01}, \code{t02}, \ldots.
+The code makes a copy of \code{cbind(t01,\ldots,t10)}
+for the capture probabilities
+and calls the variables \code{cbind(T01,\ldots,T10)}
+for the recapture probabilities.
+Also, \code{Form2} contains more variables than what is needed.
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-fit-Select, eval=T>>=
+Hdata <- subset(Huggins89table1, rowSums(Select(Huggins89table1, "y")) > 0)
+Hdata.T <- Select(Hdata, "t")
+colnames(Hdata.T) <- gsub("t", "T", colnames(Hdata.T))
+Hdata <- data.frame(Hdata, Hdata.T)
+Hdata <- transform(Hdata, x3.tij = y01)
+Form2 <- Select(Hdata, prefix = TRUE, as.formula = TRUE)
+Xij   <- Select(Hdata, c("t", "T"), as.formula = TRUE,
+                sort = FALSE, rhs = "0", lhs = "x3.tij", exclude = "T01")
+fit.tbh <- vglm(Select(Hdata, "y") ~ x2 + x3.tij,
+                form2 = Form2,  xij = list(Xij),
+                posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+                data = Hdata, trace = FALSE)
+coef(fit.tbh)
+@
+
+
+Note that this illustrates the ability to enter a matrix response without 
+an explicit \code{cbind()}, e.g., \code{Y <- Select(Hdata, "y")} and the 
+invocation \code{vglm(Y \mytilde{}} $\cdots$\code{)} would work as well.
+However, the utility of \code{cbind()} encourages the use of column names, 
+which is good style and avoids potential coding errors.
+
+
+% ------------------------------------------------------------
+\subsection[Ephemeral and enduring memory]{Ephemeral and enduring memory}
+\label{sec:poz:posbernoulli.eg.ephemeral}
+
+
+\cite{yang:chao:2005} consider modelling the behavioural effect
+with both enduring (long-term) and ephemeral (short-term) memory components.
+For example, the short-term component depends on whether or not the animal
+was caught on the most recent sampling occasion. We call this a lag-1 effect.
+In the example of this section, which combines aspects of
+Sections \ref{sec:deer.mice} 
+and \ref{sec:poz:posbernoulli.eg.hugg:1991}, we illustrate how this
+may be easily achieved
+within the VGLM framework; it is another case of using the \code{xij}
+argument. We retain the enduring component as with the $\calM_{tbh}$:
+$\bH_1$ contains a column that applies to all the recapture probabilities. 
+For simplicity, we first consider a lag-1 effect only
+(as in \cite{yang:chao:2005}) 
+for the short-term component.
+
+
+In the following, we fit a $\calM_{tbh}$ model to \code{deermice}
+with both long-term and short-term effects:
+\begin{eqnarray*}
+\mathrm{logit}\, p_{cs} &=&
+\beta_{(2)1}^{*} + 
+\beta_{(1)2}^{*} \, \mathtt{sex} +
+\beta_{(1)3}^{*} \, \mathtt{weight},
+\\
+\mathrm{logit}\, p_{rt} &=&
+\beta_{(1)1}^{*} + \beta_{(2)1}^{*} + 
+\beta_{(1)2}^{*} \, \mathtt{sex} +
+\beta_{(1)3}^{*} \, \mathtt{weight} +
+\beta_{(1)4}^{*} \, y_{t-1},
+\end{eqnarray*}
+where $s=2,\ldots,\tau$, $t=1,\ldots,\tau$ and $\tau=6$.
+
+
+<<label = poz-posbernoulli-bh-ephemeral-method1>>=
+deermice <- transform(deermice, Lag1 = y1)
+M.tbh.lag1 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag1 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = ~ sex + weight + Lag1 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + y2 + y3 + y4 + y5 + y6,
+       data = deermice)
+coef(M.tbh.lag1)
+@ 
+
+
+The coefficient of \code{Lag1}, \Sexpr{round(coef(M.tbh.lag1)["Lag1"], dig = 4)},
+is the estimated ephemeral effect $\widehat{\beta}_{(1)4}^{*}$.
+The estimated enduring effect $\widehat{\beta}_{(1)1}^{*}$
+has value \Sexpr{round(coef(M.tbh.lag1)["(Intercept):1"], dig = 4)}.
+Note that the \code{fill()} function is used to create 6 variables
+having 0 values, i.e., $\bzero_n$.
+
+
+There is an alternative method to fit the above model; here we 
+set $\bH_{\mathtt{Lag1}} = (\bzero_{\tau}^{\top}, \bone_{\tau-1}^{\top})^{\top}$
+and the variables \code{fill(y1)},\ldots,\code{fill(y6)}
+can be replaced by variables that do not need to be 0.
+Importantly, the two methods have $\bX^{\#}_{(ik)}\bH_k$ in (\ref{eq:vglimo:xij.vector.diag})
+being the same regardless. The second alternative method requires 
+constraint matrices to be inputted using the \code{constraints} argument.
+For example,
+
+
+<<label = poz-posbernoulli-bh-ephemeral-method2>>=
+deermice <- transform(deermice, Lag1 = y1)
+deermice <- transform(deermice, f1 = y1, f2 = y1, f3 = y1, f4 = y1,
+                                f5 = y1, f6 = y1)
+tau <- 6
+H2 <- H3 <- cbind(rep(1, 2*tau-1))
+H4 <- cbind(c(rep(0, tau), rep(1, tau-1)))
+M.tbh.lag1.method2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0),
+       constraints = list("(Intercept)" = cbind(H4, 1), sex = H2, weight= H3, 
+                          Lag1 = H4),
+       xij = list(Lag1 ~ f1 + f2 + f3 + f4 + f5 + f6 +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = Select(deermice, prefix = TRUE, as.formula = TRUE),
+       data = deermice)
+coef(M.tbh.lag1.method2)
+@ 
+
+
+is identical. In closing, it can be noted that
+more complicated models can be handled.
+For example, the use of \code{pmax()} to handle lag-2 effects
+as follows.
+
+
+<<label = poz-posbernoulli-bh-ephemeral-lag2>>=
+deermice <- transform(deermice, Lag2 = y1)
+M.bh.lag2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag2,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag2 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                         pmax(y4, y5)),
+       form2 = ~ sex + weight + Lag2 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                 pmax(y4, y5) + y6,
+       data = deermice)
+coef(M.bh.lag2)
+@ 
+
+
+Models with separate lag-1 and lag-2 effects may also be similarly estimated as above.
+
+
+
+
+
+
+
+%*********************************************************************
+\section[Discussion]{Discussion}
+\label{sec:discussion}
+
+
+We have presented how the VGLM/VGAM framework naturally handles the 
+conditional-likelihood and closed population CR models in a GLM-like
+manner. Recently, \citet{stok:2011} proposed a partial likelihood approach for
+heterogeneous models with covariates. There, the recaptures of the 
+observed individuals were modelled, which yielded a binomial distribution, 
+and hence a GLM/GAM framework in \proglang{R} is also possible. However, some 
+efficiency is lost, as any individuals captured only once on the last 
+occasion are excluded. The advantage of partial likelihood is that the 
+full range of GLM based techniques, which includes more than GAMs, 
+are readily applicable. \citet{hugg:hwan:2007,hwan:hugg:2011} 
+and \citet{stok:hugg:2012} implemented smoothing on covariates for more 
+general models, however these methods required implementing sophisticated 
+coding for estimating the model parameters. \citet{zwane:2004} also used 
+the \pkg{VGAM} package for smoothing and CR data, but considered multinomial
+logit models as an alternative to the conditional likelihood. We 
+believe the methods here, based on spline smoothing and classical GAM, 
+are a significant improvement in terms of ease of use, capability and 
+efficiency.
+
+
+When using any statistical software, the user must take a 
+careful approach when analyzing and interpreting their output data. 
+In our case, one must be careful when estimating the population via 
+the HT estimator. Notice that (\ref{eq:HT}) is a sum of the reciprocal of 
+the estimated capture probabilities seen at least 
+once, $\widehat{\pi}_{i}(\btheta)$. Hence, for very 
+small $\widehat{\pi}_{i}(\btheta)$, the population size estimate may 
+give a large and unrealistic value (this is also apparent when 
+using the \pkg{mra} package and \pkg{Rcapture} which gives the 
+warning message: \code{The abundance estimation for this model 
+can be unstable}). To avoid this, \citet{stok:hugg:2012} 
+proposed a robust HT estimator which places a lower bound 
+on $\widehat{\pi}_{i}(\btheta)$ to prevent it from giving 
+unrealistically large values. In \pkg{VGAM}, a warning similar 
+to \pkg{Rcapture} is also implemented, and there are 
+arguments to control how close to 0 ``very small'' is and to 
+suppress the warning entirely.
+
+
+There are limitations for $\calM_{h}$-type models, in that they rely 
+on the very strong assumption that all the heterogeneity is explained 
+by the unit-level covariates. This assumption is often not true,
+see, e.g., \cite{rive:bail:2014}. To this end, a proposal is to add
+random-effects to the VGLM class. This would result in the VGLMM class 
+(``M'' for mixed) which would be potentially very useful if developed
+successfully. Of course, VGLMMs would contain
+GLMMs \citep{mccu:sear:neuh:2008} as a special case.
+Further future implementations also include: 
+automatic smoothing parameter selection (via, say, generalized cross 
+validation or AIC); including a bootstrap procedure as an alternative 
+for standard errors.
+
+
+GAMs are now a standard statistical tool in the modern data analyst's
+toolbox. With the exception of the above references, CR analysis has 
+since been largely confined to a few regression coefficients (at most), 
+and devoid of any data-driven exploratory analyses involving graphics. 
+This work has sought to rectify this need by introducing GAM-like 
+analyses using a unified statistical framework. Furthermore, the 
+functions are easy to use and often can be invoked by a single 
+line of code. Finally, we believe this work is a substantial 
+improvement over other existing software for closed population 
+estimation, and we have shown \pkg{VGAM}'s favourable speed and 
+reliability over other closed population CR \proglang{R}-packages.
+
+
+%*********************************************************************
+\section*{Acknowledgements}
+
+
+We thank the reviewers for their helpful feedback that led to substantial 
+improvements in the manuscript. TWY thanks Anne Chao for a helpful 
+conversation, and the Department of Mathematics and Statistics at 
+the University of Melbourne for hospitality, during a sabbatical visit 
+to Taiwan and a workshop, respectively. Part of his work was also 
+conducted while as a visitor to the Institute of Statistical Science, 
+Academia Sinica, Taipei, during October 2012. JS visited TWY on the 
+Tweedle-funded Melbourne Abroad Travelling Scholarship, the 
+University of Melbourne, during September 2011. All authors would 
+also like to thank Paul Yip for providing and giving permission for 
+use of the \code{prinia} data set, and Zachary Kurtz for some helpful 
+comments.
+
+
+
+
+\bibliography{./crVGAM}
+
+
+
+
+%*********************************************************************
+\section*{Appendix A: Derivatives}
+\label{sec:posbernoulli.technical}
+
+
+We give the first and (expected) second derivatives of the models. 
+Let $z_{ij}= 1$ if individual $i$ has been captured before occasion $j$,
+else $=0$. Also, let $p_{cj}$ and $p_{rj}$ be the probability that an
+individual is captured/recaptured at sampling occasion $j$, 
+and $Q_{s:t} = \prod\limits_{j=s}^{t} (1-p_{cj})$.
+Occasionally, subscripts $i$ are omitted for clarity.
+\cite{hugg:1989} gives a general form for the derivatives of the
+conditional likelihood (\ref{eq:posbern.condlikelihood}).
+
+
+For the $\calM_{tbh}$, the score vector is 
+\begin{eqnarray*}
+\frac{\partial \ell_{i}}{\partial p_{cj}^{}} 
+&  =  & (1 - z_{ij})
+\left[\frac{y_{ij}}{p_{cj}^{}} - \frac{1-y_{ij}}{1-p_{cj}^{}} \right] -
+\frac{Q_{1:\tau} / (1-p_{cj}^{})}{1-Q_{1:\tau}},\ \ \ \ j=1,\ldots,\tau,\\
+\frac{\partial \ell_{i}}{\partial p_{rj}^{}}
+&  =  & z_{ij} \left[\frac{ y_{ij}}{ p_{rj}^{}} - \frac{1-y_{ij}}{1-p_{rj}^{}} \right],
+\ \ \ \ j=2,\ldots,\tau,
+\end{eqnarray*}
+and the non-zero elements of the expected information matrix (EIM)
+can be written
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{cj}^{2}}\right)
+& = &
+\frac{Q_{1:(j-1)}}{1 - Q_{1:\tau}} \left\{\frac{1}{p_{cj}} + 
+\frac{1 - Q_{(j+1):\tau}}{1 - p_{cj}}\right\} -
+\left(\frac{\partial Q_{1:\tau} / \partial p_{cj}}{1-Q_{1:\tau}}\right)^2\\
+& = & \frac{1}{(1 - p_{cj})^2 (1 - Q_{1:\tau})} 
+\left\{\frac{Q_{1:j}}{p_{cj}} -\frac{Q_{1:\tau}}{1 - Q_{1:\tau}} \right\}, \\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{rj}^{2}}\right)
+& = &\frac{1-Q_{1:j}/(1-p_{cj})}{p_{rj}(1-p_{rj})(1 - Q_{1:\tau})},\\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{cj} \,\partial p_{ck} }\right)
+& = & 
+\displaystyle{\frac{\displaystyle{ -\frac{\partial Q_{1:\tau}}{\partial p_{cj}^{}} 
+\frac{\partial Q_{1:\tau}}{\partial p_{ck}^{}} }}{(1-Q_{1:\tau})^2}} - 
+\displaystyle{\frac{\displaystyle{\frac{\partial^2 Q_{1:\tau}}{\partial p_{cj}^{} \, 
+\partial p_{ck}^{}}}}{(1-Q_{1:\tau})}},\ \ \ j\neq k,
+\end{eqnarray*}
+where $\partial Q_{1:\tau} / \partial p_{cj}^{} = -Q_{1:\tau} / (1-p_{cj})$ 
+and $\partial^2 Q_{1:\tau} / (\partial p_{cj} \, \partial p_{ck}) = 
+Q_{1:\tau} / \{(1-p_{cj})(1-p_{ck})\}$.
+
+
+Arguments \code{ridge.constant} and \code{ridge.power}
+in \code{posbernoulli.tb()} add a ridge parameter to the first $\tau$
+EIM diagonal elements, i.e., those for $p_{cj}$. This ensures that
+the working weight matrices are positive-definite, and is needed
+particularly in the first few iteratively reweighted
+least squares iterations. Specifically, at iteration ${a}$ a 
+positive value ${\omega K \times a^p}$ is added, where $K$ and $p$ 
+correspond to the two arguments, and $\omega$ is the
+mean of elements of such working weight matrices. The ridge factor
+decays to zero as iterations proceed and plays a negligible role upon
+convergence.
+
+
+For individual $i$, let $y_{0i}$ be the number of noncaptures before
+the first capture, $y_{r0i}$ be the number of noncaptures after the
+first capture, and $y_{r1i}$ be the number of recaptures after the
+first capture. For the $\calM_{bh}$, the score vector is
+\begin{eqnarray*}
+\frac{\partial\ell_{i}}{\partial p_{c}^{}} & = & \frac{1}{p_{c}^{}} -
+\frac{y_{0i}}{1 - p_{c}^{}} - \frac{\tau (1 - p_{ij}^{})^{\tau-1}}{1-Q_{1:\tau}},\\
+\frac{\partial\ell_{i}}{\partial p_{r}^{}} & = & 
+\frac{y_{r1i}}{p_{r}^{}} - \frac{y_{r0i}}{1 - p_{c}^{}}.
+\end{eqnarray*}
+The non-zero elements of the EIM can be written
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{c}^{2}}\right)
+& = &\frac{p_c}{1-Q_{1:\tau}} \;\sum_{j=1}^{\tau} \;(1-p_c)^{j-1}
+\left(\frac{j-1}{(1-p_c)^2} + \frac{1}{p_c^2}\right) - \frac{\partial}{\partial p_c} \!
+\left(\frac{\partial Q_{1:\tau} / \partial p_c}{1-Q_{1:\tau}} \right)\\
+& = &\frac{1 - Q_{1:\tau} - p_c [1 + (\tau-1)Q_{1:\tau}]}{p_c \, (1-p_c)^2 \, (1-Q_{1:\tau})} 
++ \frac{1}{p_c^2} -\mbox{}\\
+& & \tau (\tau-1) \, \frac{(1-p_c)^{\tau-2}}{1-Q_{1:\tau}} +\tau^2 \, 
+\frac{(1-p_c)^{\tau-2}}{(1-Q_{1:\tau})^2}, \\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{r}^{2}}\right)
+& = & \frac{1}{p_r \, (1-p_r) \, (1-Q_{1:\tau})} \; \sum_{j=1}^{\tau}
+\left\{1 - (1-p_c)^{j-1}\right\}\\
+& = & \frac{\tau - (1-Q_{1:\tau}) / p_c}{p_r (1-p_r) (1-Q_{1:\tau})}.
+\end{eqnarray*}
+
+
+For the $\calM_{th}$, the score vector is
+\begin{eqnarray*}
+\frac{\partial \ell_{i}}{\partial p_{j}^{}}
+& = & \frac{y_{ij}}{p_{ij}^{}} -\frac{1-y_{ij}}{1-p_{ij}^{}} - 
+\frac{Q_{1:\tau} /(1-p_{ij}^{})}{1-Q_{1:\tau}},\ \ \ \ j=1,\ldots,\tau,
+\end{eqnarray*}
+and the EIM elements are
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{j}^{2}}\right)
+& = & \frac{1 - p_{j} - Q_{1:\tau}}{p_{j} \,(1-p_{j})^2 \, (1 - Q_{1:\tau})^2},\\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{j} \,\partial p_{k}}\right)
+& = & \frac{p_{j} \, p_{k} \, Q_{1:\tau}(1-Q_{1:\tau}) + Q_{1:\tau}^2}{(1-Q_{1:\tau})^2 \,(1-p_{j})\, (1-p_{k})},
+\ \ \ \ \ j\neq k.
+\end{eqnarray*}
+
+
+%*********************************************************************
+\section*{Appendix B: Fitted values}
+\label{sec:fitted.values}
+
+
+By default all the family functions have fitted values corresponding
+to the probabilities in the conditional
+likelihood function (\ref{eq:posbern.condlikelihood}), viz.
+\[
+{\widehat{p}}_{ij}^{ y_{ij}} \left(1-\widehat{p}_{ij}\right)^{1-y_{ij}}
+\cdot\left[1 - \prod\limits_{s=1}^{\tau}
+\left( 1 - \widehat{p}_{i,cs}^{}\right)\right]^{-1}.
+\]
+Alternatively, the unconditional means of the $Y_j$ can be
+returned as the fitted values upon selecting
+\code{type.fitted = "mean"} argument.
+They are $\mu_1 = E(Y_1) =p_{c1} / (1 - Q_{1:\tau})$,
+ $\mu_2 = [(1 - p_{c1}) \,p_{c2} + p_{c1}\, p_{r2}]/(1 - Q_{1:\tau})$, and
+for $j=3,4,\ldots,\tau$,
+\[
+\mu_j = 
+\left( 1 - Q_{1:\tau} \right)^{-1}
+\left\{p_{cj}\, Q_{1:(j-1)} + p_{rj} \!
+\left[ p_{c1} + \sum_{s=2}^{j-1} \, p_{cs} \,Q_{1:(s-1)}
+\right]
+\right\}.
+\]
+
+
+
+
+
+\end{document}
+
+
+
+
diff --git a/inst/doc/crVGAM.pdf b/inst/doc/crVGAM.pdf
new file mode 100644
index 0000000..06786af
Binary files /dev/null and b/inst/doc/crVGAM.pdf differ
diff --git a/man/AR1.Rd b/man/AR1.Rd
new file mode 100644
index 0000000..651395a
--- /dev/null
+++ b/man/AR1.Rd
@@ -0,0 +1,198 @@
+\name{AR1}
+\alias{AR1}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Autoregressive Process with Order-1 Family Function }
+\description{
+  Maximum likelihood estimation of the three-parameter AR-1 model
+
+
+}
+\usage{
+AR1(ldrift = "identitylink", lsd  = "loge", lvar = "loge",
+    lrho = "rhobit", idrift  = NULL,
+    isd  = NULL, ivar = NULL, irho = NULL,
+    ishrinkage = 0.9, type.likelihood = c("exact", "conditional"),
+    var.arg = FALSE, almost1 = 0.99, zero = c(-2, -3))
+}
+
+%     deviance.arg = FALSE,
+
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{ldrift, lsd, lvar, lrho}{
+    Link functions applied to the scaled mean, standard deviation
+    or variance, and correlation parameters.
+  The parameter \code{drift} is known as the \emph{drift}, and
+  it is a scaled mean.
+  See \code{\link{Links}} for more choices.
+
+  
+  }
+  \item{idrift, isd, ivar, irho}{
+  Optional initial values for the parameters.
+  If failure to converge occurs then try different values
+  and monitor convergence by using \code{trace = TRUE}.
+  For a \eqn{S}-column response, these arguments can be of length
+  \eqn{S}, and they are recycled by the columns first.
+  A value \code{NULL} means an initial value for each response is
+  computed internally.
+
+
+
+  }
+
+  \item{ishrinkage, zero}{
+  See \code{\link{CommonVGAMffArguments}} for more information.
+
+
+  }
+  \item{var.arg}{
+  Same meaning as \code{\link{uninormal}}.
+
+
+  }
+  \item{type.likelihood}{
+    What type of likelihood function is maximized.
+    The first choice (default) is the sum of the marginal likelihood
+    and the conditional likelihood.
+    Choosing the conditional likelihood means that the first observation is
+    effectively ignored (this is handled internally by setting
+    the value of the first prior weight to be some small
+    positive number, e.g., \code{1.0e-6}).
+    See the note below.
+
+
+  }
+
+
+\item{almost1}{
+  A value close to 1 but slightly smaller. One of the off-diagonal
+  EIM elements is multiplied by this, to ensure that the EIM is
+  positive-definite.
+
+
+
+}
+
+}
+\details{
+  The AR-1 model implemented here has
+  \deqn{Y_1 \sim N(\mu, \sigma^2 / (1-\rho^2)), }{%
+    Y(1) ~ N(mu, sigma^2 / (1-rho^2), }
+  and
+  \deqn{Y_i  = \mu^* + \rho Y_{i-1} + e_i, }{%
+    Y(i) =  mu^* +  rho * Y(i-1) + e(i) }
+  where the \eqn{e_i}{e(i)} are i.i.d. Normal(0, sd = \eqn{\sigma}{sigma})
+  random variates.
+
+
+  Here are a few notes:
+  1. A test for stationarity might be to test
+  whether \eqn{\mu^*}{mu^*} is intercept-only.
+  2. The mean of all the \eqn{Y_i}{Y(i)}
+  is \eqn{\mu^* /(1-\rho)}{mu^* / (1-rho)} and
+  these are returned as the fitted values.
+  3. The correlation of all the \eqn{Y_i}{Y(i)} with \eqn{Y_{i-1}}{Y(i-1)}
+  is \eqn{\rho}{rho}.
+  4. The default link function ensures that \eqn{-1 < \rho < 1}{-1 < rho < 1}.
+
+  
+}
+\section{Warning}{
+  Monitoring convergence is urged: set \code{trace = TRUE}.
+
+  
+
+  Yet to do: add an argument that allows the scaled mean parameter
+  to be deleted, i.e, a 2-parameter model is fitted.
+  Yet to do: \code{ARff(p.lag = 1)} should hopefully be written soon.
+
+
+
+}
+\value{
+  An object of class \code{"vglmff"} (see \code{\link{vglmff-class}}).
+  The object is used by modelling functions such as \code{\link{vglm}},
+  and \code{\link{vgam}}.
+
+
+}
+%\references{
+
+
+%}
+\author{ Thomas W. Yee and Victor Miranda }
+\note{
+  For \code{type.likelihood = "conditional"},
+  the prior weight for the first observation is set to
+  some small positive number, which has the effect of deleting
+  that observation.
+  However, \eqn{n} is still the original \eqn{n} so that
+  statistics such as the residual degrees of freedom are
+  unchanged (uncorrected possibly).
+
+
+  Multiple responses are handled.
+  The mean is returned as the fitted values.
+
+
+  Practical experience has shown that half-stepping is a very
+  good idea. The default options use step sizes that are
+  about one third the usual step size. Consequently,
+  \code{maxit} is increased to about 100, by default.
+
+
+}
+  
+
+
+\seealso{
+  \code{\link{vglm.control}},
+  \code{\link{dAR1}},
+  \code{\link{uninormal}},
+  \code{\link[stats]{arima.sim}},
+
+
+}
+\examples{
+# Example 1: using  arimia.sim() to generate a stationary time series
+nn <- 100; set.seed(1)
+tsdata <- data.frame(x2 =  runif(nn))
+tsdata  <- transform(tsdata,
+              index = 1:nn,
+              TS1 = arima.sim(nn, model = list(ar = -0.80),
+                              sd = exp(1.0)),
+              TS2 = arima.sim(nn, model = list(ar =  0.50),
+                              sd = exp(1.0 + 2 * x2)))
+fit1a <- vglm(cbind(TS1, TS2) ~ x2, AR1(zero = c(1:4, 6)),
+             data = tsdata, trace = TRUE)
+rhobit(-0.8)
+rhobit( 0.5)
+coef(fit1a, matrix = TRUE)
+summary(fit1a)  # SEs are useful to know
+
+# Example 2: another stationary time series
+nn <- 1000
+my.rho <- rhobit(-1.0, inverse = TRUE)
+my.mu  <- 2.5
+my.sd  <- exp(1)
+tsdata  <- data.frame(index = 1:nn, TS3 = runif(nn))
+for (ii in 2:nn)
+  tsdata$TS3[ii] <- my.mu + my.rho * tsdata$TS3[ii-1] + rnorm(1, sd = my.sd)
+tsdata <- tsdata[-(1:ceiling(nn/5)), ]  # Remove the burn-in data:
+fit2a <- vglm(TS3 ~ 1, AR1(type.likelihood = "conditional"),
+             data = tsdata, trace = TRUE)
+coef(fit2a, matrix = TRUE)
+summary(fit2a)  # SEs are useful to know
+Coef(fit2a)["rho"]  # Estimate of rho for intercept-only models
+my.rho
+coef(fit2a)[1]  # drift
+my.mu           # Should be the same
+head(weights(fit2a, type= "prior"))    # First one is effectively deleted
+head(weights(fit2a, type= "working"))  # Ditto
+}
+\keyword{models}
+\keyword{regression}
+
+
+
diff --git a/man/AR1UC.Rd b/man/AR1UC.Rd
new file mode 100644
index 0000000..e6117f7
--- /dev/null
+++ b/man/AR1UC.Rd
@@ -0,0 +1,101 @@
+\name{dAR1}
+\alias{dAR1}
+\alias{dAR1}
+%\alias{pbisa}
+%\alias{qbisa}
+%\alias{rbisa}
+\title{The AR-1 Autoregressive Process}
+\description{
+  Density for the AR-1 model.
+
+}
+\usage{
+dAR1(x, drift = 0, var.error = 1, ARcoef1 = 0.0,
+     type.likelihood = c("exact", "conditional"), log = FALSE)
+}
+\arguments{
+  \item{x,}{vector of quantiles.}
+
+  \item{drift}{
+    the scaled mean (also known as the \emph{drift} parameter),
+    \eqn{\mu^*}{mu^*}.
+    Note that the mean is \eqn{\mu^* /(1-\rho)}{mu^* / (1-rho)}.
+    The default corresponds to observations that have mean 0.
+
+
+
+  }
+  \item{log}{
+  Logical.
+  If \code{TRUE} then the logarithm of the density is returned.
+
+
+  }
+  \item{type.likelihood, var.error, ARcoef1}{
+    See \code{\link{AR1}}.
+    The argument \code{ARcoef1} is \eqn{\rho}{rho}.
+    The argument \code{var.error} is the variance of  the
+    i.i.d. random noise, i.e., \eqn{\sigma^2}{sigma^2}.
+    If \code{type.likelihood = "conditional"} then the
+    first element or row of the result is currently
+    assigned \code{NA}---this
+    is because the density of the first observation is effectively
+    ignored.
+
+
+  }
+}
+\value{
+  \code{dAR1} gives the density.
+
+
+  
+% \code{pbisa} gives the distribution function, and
+% \code{qbisa} gives the quantile function, and
+% \code{rbisa} generates random deviates.
+
+
+}
+\author{ T. W. Yee and Victor Miranda }
+\details{
+  Most of the background to this function is given
+  in \code{\link{AR1}}.
+  All the arguments are converted into matrices, and then
+  all their dimensions are obtained. They are then coerced
+  into the same size: the number of rows is the maximum
+  of all the single rows, and ditto for the number of columns.
+
+
+
+}
+%\note{
+%}
+\seealso{
+  \code{\link{AR1}}.
+
+
+}
+\examples{
+nn <- 100; set.seed(1)
+tdata <- data.frame(index = 1:nn,
+                    TS1 = arima.sim(nn, model = list(ar = -0.50),
+                                    sd = exp(1)))
+fit1 <- vglm(TS1 ~ 1, AR1, data = tdata, trace = TRUE)
+rhobit(-0.5)
+coef(fit1, matrix = TRUE)
+(Cfit1 <- Coef(fit1))
+summary(fit1)  # SEs are useful to know
+logLik(fit1)
+sum(dAR1(depvar(fit1), drift = Cfit1[1], var.error = (Cfit1[2])^2,
+         ARcoef1 = Cfit1[3], log = TRUE))
+
+fit2 <- vglm(TS1 ~ 1, AR1(type.likelihood = "cond"), data = tdata, trace = TRUE)
+(Cfit2 <- Coef(fit2))  # Okay for intercept-only models
+logLik(fit2)
+head(keep <- dAR1(depvar(fit2), drift = Cfit2[1], var.error = (Cfit2[2])^2,
+                  ARcoef1 = Cfit2[3], type.likelihood = "cond", log = TRUE))
+sum(keep[-1])
+}
+\keyword{distribution}
+
+
diff --git a/man/VGAM-package.Rd b/man/VGAM-package.Rd
index 26739d3..4864d52 100644
--- a/man/VGAM-package.Rd
+++ b/man/VGAM-package.Rd
@@ -26,6 +26,7 @@ The central modelling functions are
 \code{\link{vglm}},
 \code{\link{vgam}},
 \code{\link{rrvglm}},
+\code{\link{rcim}},
 \code{\link{cqo}},
 \code{\link{cao}}.
 For detailed control of fitting,
@@ -36,6 +37,7 @@ A companion package called \pkg{VGAMdata} contains some larger
 data sets which were shifted from \pkg{VGAM}.
 
 
+
 The classes of GLMs and GAMs are special cases of VGLMs and VGAMs.
 The VGLM/VGAM framework is intended to be very general
 so that it encompasses as many distributions and models as
@@ -56,12 +58,14 @@ nonlinear least-squares
 problems.
 
 
+
 VGAMs are to VGLMs what GAMs are to GLMs.
 Vector smoothing (see \code{\link{vsmooth.spline}}) allows several
 additive predictors to be estimated as a sum of smooth functions of
 the covariates.
 
 
+
 For a complete list of this package, use \code{library(help = "VGAM")}.
 New \pkg{VGAM} family functions are continually being written and
 added to the package.
@@ -166,9 +170,19 @@ The \pkg{VGAM} package for categorical data analysis.
 
 
 
-(Oldish) documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee/VGAM}
-contains some further information and examples.
+My website for the \pkg{VGAM} package is at
+\url{https://www.stat.auckland.ac.nz/~yee/VGAM}
+and I hope to put more resources there in the future,
+especially as relating to my book.
+
+
+
+
+%(Oldish) documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee/VGAM}
+%contains some further information and examples.
+
+
 
 
 }
@@ -181,6 +195,7 @@ contains some further information and examples.
   \code{\link{vglm}},
   \code{\link{vgam}},
   \code{\link{rrvglm}},
+  \code{\link{rcim}},
   \code{\link{cqo}},
   \code{\link{TypicalVGAMfamilyFunction}},
   \code{\link{CommonVGAMffArguments}},
diff --git a/man/acat.Rd b/man/acat.Rd
index 20fc1cb..e606181 100644
--- a/man/acat.Rd
+++ b/man/acat.Rd
@@ -86,9 +86,12 @@ The \pkg{VGAM} package for categorical data analysis.
 \url{http://www.jstatsoft.org/v32/i10/}.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{https://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+
+
+
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/betaII.Rd b/man/betaII.Rd
index f59b198..f09feba 100644
--- a/man/betaII.Rd
+++ b/man/betaII.Rd
@@ -7,8 +7,10 @@
   beta II distribution.
 }
 \usage{
-betaII(lscale = "loge", lshape2.p = "loge", lshape3.q = "loge",
-       iscale = NULL, ishape2.p = 2, ishape3.q = 2, zero = NULL)
+betaII(lscale = "loge", lshape2.p = "loge", lshape3.q = "loge", 
+       iscale = NULL, ishape2.p = NULL, ishape3.q = NULL, imethod = 1, 
+       gscale = exp(-5:5), gshape2.p = exp(-5:5), gshape3.q = exp(-5:5), 
+       probs.y = c(0.25, 0.5, 0.75), zero = -(2:3))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -19,16 +21,18 @@ betaII(lscale = "loge", lshape2.p = "loge", lshape3.q = "loge",
 
 
   }
-  \item{iscale, ishape2.p, ishape3.q}{
-  Optional initial values for \code{scale}, \code{p} and \code{q}.
+  \item{iscale, ishape2.p, ishape3.q, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
 
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2,3\} which correspond to
-  \code{scale}, \code{p}, \code{q}, respectively.
+  \item{gscale, gshape2.p, gshape3.q}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
 
 
   }
@@ -52,6 +56,7 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(p + 1) \, \Gamma(q - 1) / (\Gamma(p) \, \Gamma(q))}{%
         E(Y) = b  gamma(p + 1)  gamma(q - 1) / ( gamma(p)  gamma(q))}
 provided \eqn{q > 1}; these are returned as the fitted values.
+This family function handles multiple responses.
 
 
 }
@@ -73,7 +78,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
@@ -93,7 +98,8 @@ Hoboken, NJ, USA: Wiley-Interscience.
 }
 
 \examples{
-bdata <- data.frame(y = rsinmad(2000, shape1.a = 1, exp(2), exp(1)))  # Not genuine data!
+bdata <- data.frame(y = rsinmad(2000, shape1.a = 1, shape3.q = exp(2),
+                                scale = exp(1)))  # Not genuine data!
 fit <- vglm(y ~ 1, betaII, data = bdata, trace = TRUE)
 fit <- vglm(y ~ 1, betaII(ishape2.p = 0.7, ishape3.q = 0.7),
             data = bdata, trace = TRUE)
diff --git a/man/betaR.Rd b/man/betaR.Rd
index 47ce185..75c96cb 100644
--- a/man/betaR.Rd
+++ b/man/betaR.Rd
@@ -103,9 +103,11 @@ betaR(lshape1 = "loge", lshape2 = "loge",
 %Hoboken, NJ, USA: John Wiley and Sons, Fourth edition.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+%  Documentation accompanying the \pkg{VGAM} package at
+%  \url{https://www.stat.auckland.ac.nz/~yee}
+%  contains further information and examples.
+
+
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/betaff.Rd b/man/betaff.Rd
index aae08c8..ec37305 100644
--- a/man/betaff.Rd
+++ b/man/betaff.Rd
@@ -100,9 +100,9 @@ betaff(A = 0, B = 1, lmu = "logit", lphi = "loge",
   \bold{31}, 799--815.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+%  Documentation accompanying the \pkg{VGAM} package at
+%  \url{https://www.stat.auckland.ac.nz/~yee}
+%  contains further information and examples.
 
 
 }
diff --git a/man/betaprime.Rd b/man/betaprime.Rd
index 070b936..f466bd7 100644
--- a/man/betaprime.Rd
+++ b/man/betaprime.Rd
@@ -80,9 +80,9 @@ Volume 2,
 New York: Wiley.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{https://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/binom2.or.Rd b/man/binom2.or.Rd
index 9184f49..e8b25f9 100644
--- a/man/binom2.or.Rd
+++ b/man/binom2.or.Rd
@@ -152,9 +152,9 @@ binom2.or(lmu = "logit", lmu1 = lmu, lmu2 = lmu, loratio = "loge",
   Journal of Theoretical Biology, \bold{259}(4), 684--694.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+% Documentation accompanying the \pkg{VGAM} package at
+% \url{https://www.stat.auckland.ac.nz/~yee}
+% contains further information and examples.
 
 
 }
diff --git a/man/brat.Rd b/man/brat.Rd
index 12f5c7a..c47d316 100644
--- a/man/brat.Rd
+++ b/man/brat.Rd
@@ -8,7 +8,7 @@
 
 }
 \usage{
-brat(refgp = "last", refvalue = 1, init.alpha = 1)
+brat(refgp = "last", refvalue = 1, ialpha = 1)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -26,7 +26,7 @@ brat(refgp = "last", refvalue = 1, init.alpha = 1)
 
 
   }
-  \item{init.alpha}{
+  \item{ialpha}{
   Initial values for the \eqn{\alpha}{alpha}s. 
   These are recycled to the appropriate length.
 
diff --git a/man/bratt.Rd b/man/bratt.Rd
index 34d5b6b..0bd5381 100644
--- a/man/bratt.Rd
+++ b/man/bratt.Rd
@@ -8,7 +8,7 @@
 
 }
 \usage{
-bratt(refgp = "last", refvalue = 1, init.alpha = 1, i0 = 0.01)
+bratt(refgp = "last", refvalue = 1, ialpha = 1, i0 = 0.01)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -25,7 +25,7 @@ bratt(refgp = "last", refvalue = 1, init.alpha = 1, i0 = 0.01)
 
 
   }
-  \item{init.alpha}{
+  \item{ialpha}{
   Initial values for the \eqn{\alpha}{alpha}s. 
   These are recycled to the appropriate length.
 
diff --git a/man/cao.Rd b/man/cao.Rd
index 208f694..bc30be3 100644
--- a/man/cao.Rd
+++ b/man/cao.Rd
@@ -256,9 +256,13 @@ Yee, T. W. (2006)
 Constrained additive ordination.
 \emph{Ecology}, \bold{87}, 203--213.
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+
+
+% Documentation accompanying the \pkg{VGAM} package at
+% \url{http://www.stat.auckland.ac.nz/~yee}
+% contains further information and examples.
+
+
 
 }   
 \author{T. W. Yee}
diff --git a/man/cdf.lmscreg.Rd b/man/cdf.lmscreg.Rd
index 2920eff..24c0dd4 100644
--- a/man/cdf.lmscreg.Rd
+++ b/man/cdf.lmscreg.Rd
@@ -50,9 +50,11 @@ Quantile regression via vector generalized additive models.
 \emph{Statistics in Medicine}, \bold{23}, 2295--2315.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+
+
 
 
 }
diff --git a/man/constraints.Rd b/man/constraints.Rd
index d82de25..c8446c5 100644
--- a/man/constraints.Rd
+++ b/man/constraints.Rd
@@ -135,8 +135,8 @@ Reduced-rank vector generalized linear models.
 \bold{3}, 15--41.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee} contains additional
-information.
+%\url{http://www.stat.auckland.ac.nz/~yee} contains additional
+%information.
 
 
 }
diff --git a/man/cqo.Rd b/man/cqo.Rd
index 6e15114..46e6bf0 100644
--- a/man/cqo.Rd
+++ b/man/cqo.Rd
@@ -558,9 +558,10 @@ original FORTRAN code into C.
 
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+
 
 
 }
diff --git a/man/cumulative.Rd b/man/cumulative.Rd
index 5ba3b4e..b358040 100644
--- a/man/cumulative.Rd
+++ b/man/cumulative.Rd
@@ -197,9 +197,9 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-Further information and examples on categorical data analysis
-by the \pkg{VGAM} package can be found at
-\url{http://www.stat.auckland.ac.nz/~yee/VGAM/doc/categorical.pdf}.
+%Further information and examples on categorical data analysis
+%by the \pkg{VGAM} package can be found at
+%\url{http://www.stat.auckland.ac.nz/~yee/VGAM/doc/categorical.pdf}.
 
 
 }
diff --git a/man/dagum.Rd b/man/dagum.Rd
index 35e8ccc..7975b68 100644
--- a/man/dagum.Rd
+++ b/man/dagum.Rd
@@ -7,8 +7,10 @@
   Dagum distribution.
 }
 \usage{
-dagum(lss, lshape1.a = "loge", lscale = "loge", lshape2.p = "loge",
-           ishape1.a = NULL, iscale = NULL, ishape2.p = 1, zero = NULL)
+dagum(lscale = "loge", lshape1.a = "loge", lshape2.p = "loge", 
+      iscale = NULL, ishape1.a = NULL, ishape2.p = NULL, imethod = 1, 
+      lss = TRUE, gscale = exp(-5:5), gshape1.a = exp(-5:5), gshape2.p = exp(-5:5), 
+      probs.y = c(0.25, 0.5, 0.75), zero = ifelse(lss, -(2:3), -c(1, 3)))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -17,23 +19,31 @@ dagum(lss, lshape1.a = "loge", lscale = "loge", lshape2.p = "loge",
   
   }
 
-  \item{lshape1.a, lscale, lshape2.p}{
+ \item{lshape1.a, lscale, lshape2.p}{
   Parameter link functions applied to the
   (positive) parameters \code{a}, \code{scale}, and \code{p}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{ishape1.a, iscale, ishape2.p}{
-  Optional initial values for \code{a}, \code{scale}, and \code{p}.
+  \item{iscale, ishape1.a, ishape2.p, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for 
+  \code{ishape2.p} is needed to obtain a good estimate for
+  the other parameter.
+
+
+  }
+  \item{gscale, gshape1.a, gshape2.p}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2,3\} which correspond to
-  \code{a}, \code{scale}, \code{p}, respectively.
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
+
 }
 \details{
   The 3-parameter Dagum distribution is the 4-parameter
@@ -60,6 +70,7 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(p + 1/a) \, \Gamma(1 - 1/a) / \Gamma(p)}{%
         E(Y) = b  gamma(p + 1/a)  gamma(1 - 1/a) /  gamma(p)}
 provided \eqn{-ap < 1 < a}; these are returned as the fitted values.
+This family function handles multiple responses.
 
 
 }
@@ -82,7 +93,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 From Kleiber and Kotz (2003), the MLE is rather sensitive to isolated
@@ -110,7 +121,8 @@ while estimates for \eqn{a} and \eqn{p} can be considered unbiased for
 }
 
 \examples{
-ddata <- data.frame(y = rdagum(n = 3000, scale = exp(2), shape1 = exp(1), exp(1)))
+ddata <- data.frame(y = rdagum(n = 3000, scale = exp(2),
+                               shape1 = exp(1), shape2 = exp(1)))
 fit <- vglm(y ~ 1, dagum(lss = FALSE), data = ddata, trace = TRUE)
 fit <- vglm(y ~ 1, dagum(lss = FALSE, ishape1.a = exp(1)),
             data = ddata, trace = TRUE)
diff --git a/man/deermice.Rd b/man/deermice.Rd
index 4d2188e..a15f117 100644
--- a/man/deermice.Rd
+++ b/man/deermice.Rd
@@ -78,7 +78,8 @@ approach to capture experiments.
 
 \seealso{
     \code{\link[VGAM:posbernoulli.b]{posbernoulli.b}},
-    \code{\link[VGAM:posbernoulli.t]{posbernoulli.t}}.
+    \code{\link[VGAM:posbernoulli.t]{posbernoulli.t}},
+    \code{\link{fill1}}.
 
 
 }
diff --git a/man/dirmul.old.Rd b/man/dirmul.old.Rd
index 55f9656..4db7120 100644
--- a/man/dirmul.old.Rd
+++ b/man/dirmul.old.Rd
@@ -7,8 +7,7 @@
   non-negative integers.
 }
 \usage{
-dirmul.old(link = "loge",
-           init.alpha = 0.01, parallel = FALSE, zero = NULL)
+dirmul.old(link = "loge", ialpha = 0.01, parallel = FALSE, zero = NULL)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -19,7 +18,7 @@ dirmul.old(link = "loge",
   Here, \eqn{M} is the number of columns of the response matrix.
 
   }
-  \item{init.alpha}{
+  \item{ialpha}{
   Numeric vector. Initial values for the 
   \code{alpha} vector. Must be positive.
   Recycled to length \eqn{M}.
@@ -97,9 +96,9 @@ Overdispersion in allelic counts and \eqn{\theta}-correction in forensic genetic
 
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/exponential.Rd b/man/exponential.Rd
index 33e76df..6b26d06 100644
--- a/man/exponential.Rd
+++ b/man/exponential.Rd
@@ -92,7 +92,7 @@ Hoboken, NJ, USA: John Wiley and Sons, Fourth edition.
     \code{\link[stats]{Exponential}}.
 
 
-%   \code{\link{cexpon}},
+%   \code{\link{cens.exponential}},
 
 
 }
diff --git a/man/fill.Rd b/man/fill.Rd
index 15c92f1..a0e75be 100644
--- a/man/fill.Rd
+++ b/man/fill.Rd
@@ -86,12 +86,12 @@ fill(x, values = 0, ncolx = ncol(x))
 
 
 }
-\references{ 
-  More information can be found at
-  \url{http://www.stat.auckland.ac.nz/~yee}.
-
-
-}
+%\references{ 
+%  More information can be found at
+%  \url{http://www.stat.auckland.ac.nz/~yee}.
+%
+%
+%}
 
 % \section{Warning }{
 % Care is needed in such cases.
@@ -244,6 +244,25 @@ max(abs(head(predict(fit3)) -
 \dontrun{
 plotvgam(fit3, se = TRUE, xlab = "lop")  # Correct
 }
+
+
+# Example 4. Capture-recapture model with ephemeral and enduring
+# memory effects. Similar to Yang and Chao (2005), Biometrics.
+deermice <- transform(deermice, Lag1 = y1)
+M.tbh.lag1 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag1 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = ~ sex + weight + Lag1 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + y2 + y3 + y4 + y5 + y6,
+       data = deermice, trace = TRUE)
+coef(M.tbh.lag1)
 }
 \keyword{models}
 \keyword{regression}
diff --git a/man/fisk.Rd b/man/fisk.Rd
index 61e76d3..c3ecf2f 100644
--- a/man/fisk.Rd
+++ b/man/fisk.Rd
@@ -8,8 +8,10 @@
 
 }
 \usage{
-fisk(lss, lshape1.a = "loge", lscale = "loge",
-          ishape1.a = NULL, iscale = NULL, zero = NULL)
+fisk(lscale = "loge", lshape1.a = "loge", iscale = NULL, 
+    ishape1.a = NULL, imethod = 1, lss = TRUE, gscale = exp(-5:5), 
+    gshape1.a = exp(-5:5), probs.y = c(0.25, 0.5, 0.75), zero = ifelse(lss, 
+        -2, -1))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -20,21 +22,29 @@ fisk(lss, lshape1.a = "loge", lscale = "loge",
 
   \item{lshape1.a, lscale}{
   Parameter link functions applied to the
-  (positive) parameters \code{a} and \code{scale}.
+  (positive) parameters \eqn{a} and \code{scale}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{ishape1.a, iscale}{
-  Optional initial values for \code{a} and \code{scale}.
+  \item{iscale, ishape1.a, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for 
+  \code{iscale} is needed to obtain a good estimate for
+  the other parameter.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2\} which correspond to
-  \code{a}, \code{scale}, respectively.
+  \item{gscale, gshape1.a}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+
 }
 \details{
   The 2-parameter Fisk (aka log-logistic) distribution is the 4-parameter
@@ -50,7 +60,7 @@ The Fisk distribution has density
         f(y) = a y^(a-1) / [b^a (1 + (y/b)^a)^2]}
   for \eqn{a > 0}, \eqn{b > 0}, \eqn{y \geq 0}{y >= 0}.
 Here, \eqn{b} is the scale parameter \code{scale},
-and \code{a} is a shape parameter.
+and \eqn{a} is a shape parameter.
 The cumulative distribution function is
   \deqn{F(y) = 1 - [1 + (y/b)^a]^{-1} = [1 + (y/b)^{-a}]^{-1}.}{%
         F(y) = 1 - [1 + (y/b)^a]^(-1) = [1 + (y/b)^(-a)]^(-1).}
@@ -58,6 +68,8 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(1 + 1/a) \, \Gamma(1 - 1/a)}{%
         E(Y) = b  gamma(1 + 1/a)  gamma(1 - 1/a)}
 provided \eqn{a > 1}; these are returned as the fitted values.
+This family function handles multiple responses.
+
 
 
 }
@@ -71,13 +83,13 @@ provided \eqn{a > 1}; these are returned as the fitted values.
 Kleiber, C. and Kotz, S. (2003)
 \emph{Statistical Size Distributions in Economics and
              Actuarial Sciences},
-Hoboken, NJ: Wiley-Interscience.
+Hoboken, NJ, USA: Wiley-Interscience.
 
 }
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
@@ -99,7 +111,7 @@ Hoboken, NJ: Wiley-Interscience.
 \examples{
 fdata <- data.frame(y = rfisk(n = 200, shape = exp(1), scale = exp(2)))
 fit <- vglm(y ~ 1, fisk(lss = FALSE), data = fdata, trace = TRUE)
-fit <- vglm(y ~ 1, fisk(lss = FALSE, ishape1.a = exp(1)), data = fdata, trace = TRUE)
+fit <- vglm(y ~ 1, fisk(ishape1.a = exp(2)), data = fdata, trace = TRUE)
 coef(fit, matrix = TRUE)
 Coef(fit)
 summary(fit)
diff --git a/man/genbetaII.Rd b/man/genbetaII.Rd
index 537bb74..c976050 100644
--- a/man/genbetaII.Rd
+++ b/man/genbetaII.Rd
@@ -8,9 +8,12 @@
 
 }
 \usage{
-genbetaII(lss, lshape1.a = "loge", lscale = "loge", lshape2.p = "loge",
-          lshape3.q = "loge", ishape1.a = NULL, iscale = NULL,
-          ishape2.p = 1, ishape3.q = 1, zero = NULL)
+genbetaII(lscale = "loge", lshape1.a = "loge", lshape2.p = "loge",
+          lshape3.q = "loge", iscale = NULL, ishape1.a = NULL,
+          ishape2.p = NULL, ishape3.q = NULL, lss = TRUE,
+          gscale = exp(-5:5), gshape1.a = exp(-5:5),
+          gshape2.p = exp(-5:5), gshape3.q = exp(-5:5),
+          zero = ifelse(lss, -(2:4), -c(1, 3:4)))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -30,14 +33,17 @@ genbetaII(lss, lshape1.a = "loge", lscale = "loge", lshape2.p = "loge",
 
 
   }
-  \item{ishape1.a, iscale}{
-  Optional initial values for \code{a} and \code{scale}.
-  A \code{NULL} means a value is computed internally.
+  \item{iscale, ishape1.a, ishape2.p, ishape3.q}{
+  Optional initial values for the parameters.
+  A \code{NULL} means a value is computed internally using
+  the arguments \code{gscale}, \code{gshape1.a}, etc.
 
 
   }
-  \item{ishape2.p, ishape3.q}{
-  Optional initial values for \code{p} and \code{q}.
+  \item{gscale, gshape1.a, gshape2.p, gshape3.q}{
+    See \code{\link{CommonVGAMffArguments}} for information.
+    Replaced by \code{iscale}, \code{ishape1.a} etc. if given.
+
 
   }
 % \item{gshape1.a, gscale, gshape2.p, gshape3.q}{
@@ -50,8 +56,9 @@ genbetaII(lss, lshape1.a = "loge", lscale = "loge", lshape2.p = "loge",
   \item{zero}{
   An integer-valued vector specifying which
   linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2,3,4\} which correspond to
-  \code{a}, \code{scale}, \code{p}, \code{q}, respectively.
+  The default is to set all the shape parameters to be
+  intercept-only.
+
 
 
   }
@@ -87,6 +94,8 @@ provided \eqn{-ap < 1 < aq}; these are returned as the fitted values.
 %\eqn{u^{p-1} (1-u)^{q-1}}{u^(p-1) (1-u)^(q-1)} where \eqn{y>0}.
 
 
+  This family function handles multiple responses.
+
 
 }
 \value{
@@ -112,24 +121,47 @@ Fisher information matrix for the Feller-Pareto distribution.
 
 }
 
-\author{ T. W. Yee }
+\author{ T. W. Yee, with help from Victor Miranda. }
 \note{
-
+  The default is to use a grid search with respect to all
+  four parameters; this is quite costly and is time consuming.
 If the self-starting initial values fail, try experimenting
-with the initial value arguments, especially those whose
-default value is not \code{NULL}.
+with the initial value arguments.
 Also, the constraint \eqn{-ap < 1 < aq}
 may be violated as the iterations progress so it pays
 to monitor convergence, e.g., set \code{trace = TRUE}.
 Successful convergence depends on having very good initial
-values. This is rather difficult for this distribution!
-More improvements could be made here.
+values. This is rather difficult for this distribution so that
+a grid search is conducted by default.
+One suggestion for increasing the estimation reliability
+is to set \code{stepsize = 0.5} and \code{maxit = 100};
+see \code{\link{vglm.control}}.
+
+
+}
+\section{Warning}{
+  This distribution is very flexible and it is not generally
+  recommended to use this family function when the sample size is
+  small---numerical problems easily occur with small samples.
+  Probably several hundred observations at least are needed in order
+  to estimate the parameters with any level of confidence.
+  Neither is the inclusion of covariates recommended at all---not
+  unless there are several thousand observations.
+  The mean is finite only when \eqn{-ap < 1 < aq}, and this can be
+  easily violated by the parameter estimates for small sample sizes.
+  Try fitting some of the special cases of this distribution
+  (e.g., \code{\link{sinmad}}, \code{\link{fisk}}, etc.) first, and
+  then possibly use those models for initial values for this
+  distribution.
 
 
 
 }
 
+
+
 \seealso{
+    \code{\link{dgenbetaII}},
     \code{\link{betaff}},
     \code{\link{betaII}},
     \code{\link{dagum}},
@@ -139,19 +171,20 @@ More improvements could be made here.
     \code{\link{inv.lomax}},
     \code{\link{paralogistic}},
     \code{\link{inv.paralogistic}},
-    \code{\link{lino}}.
+    \code{\link{lino}},
+    \code{\link{CommonVGAMffArguments}},
+    \code{\link{vglm.control}}.
 
 
 }
 
 \examples{
 \dontrun{
-gdata <- data.frame(y = rsinmad(3000, shape1 = exp(2), scale = exp(2),
+gdata <- data.frame(y = rsinmad(3000, shape1 = exp(1), scale = exp(2),
                                 shape3 = exp(1)))  # A special case!
 fit <- vglm(y ~ 1, genbetaII(lss = FALSE), data = gdata, trace = TRUE)
 fit <- vglm(y ~ 1, data = gdata, trace = TRUE,
-            genbetaII(lss = FALSE, ishape1.a = 4, ishape2.p = 2.2,
-                      iscale = 7, ishape3.q = 2.3))
+            genbetaII(ishape1.a = 3, iscale = 7, ishape3.q = 2.3))
 coef(fit, matrix = TRUE)
 Coef(fit)
 summary(fit)
diff --git a/man/genbetaIIUC.Rd b/man/genbetaIIUC.Rd
new file mode 100644
index 0000000..36a5afc
--- /dev/null
+++ b/man/genbetaIIUC.Rd
@@ -0,0 +1,97 @@
+\name{GenbetaII}
+\alias{GenbetaII}
+\alias{dgenbetaII}
+%\alias{pgenbetaII}
+%\alias{qgenbetaII}
+%\alias{rgenbetaII}
+\title{The Generalized Beta II Distribution}
+\description{
+  Density
+  for the generalized beta II distribution
+  with shape parameters \code{a}
+  and \code{p}
+  and \code{q}, and scale parameter \code{scale}.
+
+
+
+% distribution function, quantile function and random generation
+
+
+
+}
+\usage{
+dgenbetaII(x, scale = 1, shape1.a, shape2.p, shape3.q, log = FALSE)
+}
+
+
+%pgenbetaII(q, scale = 1, shape1.a, shape2.p, shape3.q,
+%           lower.tail = TRUE, log.p = FALSE)
+%qgenbetaII(p, scale = 1, shape1.a, shape2.p, shape3.q,
+%           lower.tail = TRUE, log.p = FALSE)
+%rgenbetaII(n, scale = 1, shape1.a, shape3.q, shape3.q)
+
+
+\arguments{
+% \item{x, q}{vector of quantiles.}
+  \item{x}{vector of quantiles.}
+% \item{p}{vector of probabilities.}
+% \item{n}{number of observations. If \code{length(n) > 1}, the length
+%   is taken to be the number required.}
+  \item{shape1.a, shape2.p, shape3.q}{positive shape parameters.}
+  \item{scale}{positive scale parameter.}
+  \item{log}{
+  Logical.
+  If \code{log = TRUE} then the logarithm of the density is returned.
+
+  }
+% \item{lower.tail, log.p}{
+% Same meaning as in \code{\link[stats:Normal]{pnorm}}
+% or \code{\link[stats:Normal]{qnorm}}.
+
+
+% }
+
+}
+\value{
+  \code{dgenbetaII} gives the density.
+% \code{pgenbetaII} gives the distribution function,
+% \code{qgenbetaII} gives the quantile function, and
+% \code{rgenbetaII} generates random deviates.
+
+
+}
+\references{
+
+Kleiber, C. and Kotz, S. (2003)
+\emph{Statistical Size Distributions in Economics and
+             Actuarial Sciences},
+Hoboken, NJ, USA: Wiley-Interscience.
+
+
+}
+\author{ T. W. Yee }
+\details{
+  See \code{\link{genbetaII}}, which is the \pkg{VGAM} family function
+  for estimating the parameters by maximum likelihood estimation.
+  Several distributions, such as the Singh-Maddala, are special case of
+  this flexible 4-parameter distribution.
+  The product of \code{shape1.a} and \code{shape2.p} determines the
+  behaviour of the density at the origin.
+
+
+}
+%\note{
+%
+%
+%}
+\seealso{
+  \code{\link{genbetaII}}.
+
+
+}
+\examples{
+dgenbetaII(0, shape1.a = 1/4, shape2.p = 4, shape3.q = 3)
+dgenbetaII(0, shape1.a = 1/4, shape2.p = 2, shape3.q = 3)
+dgenbetaII(0, shape1.a = 1/4, shape2.p = 8, shape3.q = 3)
+}
+\keyword{distribution}
diff --git a/man/genpoisUC.Rd b/man/genpoisUC.Rd
new file mode 100644
index 0000000..0e12daf
--- /dev/null
+++ b/man/genpoisUC.Rd
@@ -0,0 +1,81 @@
+\name{dgenpois}
+\alias{dgenpois}
+%\alias{pgenpois}
+%\alias{qgenpois}
+%\alias{rgenpois}
+\title{The Generalized Poisson Distribution}
+\description{
+  Density for the Generalized Poisson Distribution.
+
+}
+\usage{
+dgenpois(x, lambda = 0, theta, log = FALSE)
+}
+\arguments{
+  \item{x,}{vector of quantiles.}
+
+  \item{lambda, theta}{
+    See \code{\link{genpoisson}}.
+    The default value of \code{lambda} corresponds to an
+    ordinary Poisson distribution.
+
+
+  }
+  \item{log}{
+  Logical.
+  If \code{TRUE} then the logarithm of the density is returned.
+
+
+  }
+}
+\value{
+  \code{dgenpois} gives the density.
+  The value \code{NaN} is returned for elements not satisfying
+  the parameter restrictions, e.g., if \eqn{\lambda > 1}{lambda > 1}.
+
+
+  
+% \code{pgenpois} gives the distribution function, and
+% \code{qgenpois} gives the quantile function, and
+% \code{rgenpois} generates random deviates.
+
+
+}
+\author{ T. W. Yee }
+\details{
+  Most of the background to this function is given
+  in \code{\link{genpoisson}}.
+  Some warnings relevant to this distribution are given there,
+  especially relating to the complicated range of the
+  parameter \code{lambda} about or near \eqn{-1}.
+
+
+  Note that numerical round off errors etc. can occur; see
+  below for an example.
+
+
+}
+%\note{
+%}
+\seealso{
+  \code{\link{genpoisson}},
+  \code{\link[stats:Poisson]{dpois}}.
+
+
+}
+\examples{
+sum(dgenpois(0:1000, lambda = -0.5, theta = 2))  # Not perfect...
+\dontrun{
+lambda <- -0.2; theta <- 2; y <- 0:10
+proby <- dgenpois(y, lambda = lambda, theta = theta, log = FALSE)
+plot(y, proby, type = "h", col = "blue", lwd = 2, ylab = "P[Y=y]",
+     main = paste("Y ~ Generalized Poisson(lambda=", lambda,
+                  ", theta=", theta, ")", sep = ""), las = 1,
+     sub = "Orange is the Poisson probability function")
+sum(proby)
+lines(y + 0.1, dpois(y, theta), type = "h", lwd = 2, col = "orange")
+}
+}
+\keyword{distribution}
+
+
diff --git a/man/genpoisson.Rd b/man/genpoisson.Rd
index 543d544..400ec9c 100644
--- a/man/genpoisson.Rd
+++ b/man/genpoisson.Rd
@@ -3,13 +3,13 @@
 %- Also NEED an '\alias' for EACH other topic documented here.
 \title{ Generalized Poisson distribution }
 \description{
-  Estimation of the two parameters of a generalized Poisson distribution.
+  Estimation of the two-parameter generalized Poisson distribution.
 
 }
 \usage{
-genpoisson(llambda = extlogit(min = -1, max = 1), ltheta = "loge",
+genpoisson(llambda = "rhobit", ltheta = "loge",
            ilambda = NULL, itheta = NULL,
-           use.approx = TRUE, imethod = 1, zero = 1)
+           use.approx = TRUE, imethod = 1, ishrinkage = 0.95, zero = -1)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -17,7 +17,8 @@ genpoisson(llambda = extlogit(min = -1, max = 1), ltheta = "loge",
   Parameter link functions for \eqn{\lambda} and \eqn{\theta}.
   See \code{\link{Links}} for more choices.
   The \eqn{\lambda} parameter lies at least within the interval
-  \eqn{[-1,1]}; see below for more details.
+  \eqn{[-1,1]}; see below for more details,
+  and an alternative link is \code{\link{rhobit}}.
   The \eqn{\theta} parameter is positive, therefore the default is the
   log link.
 
@@ -36,22 +37,29 @@ genpoisson(llambda = extlogit(min = -1, max = 1), ltheta = "loge",
 
   }
   \item{imethod}{
-  An integer with value \code{1} or \code{2} which
+  An integer with value \code{1} or \code{2} or \code{3} which
   specifies the initialization method for the parameters.
   If failure to converge occurs try another value
   and/or else specify a value for \code{ilambda} and/or \code{itheta}.
 
 
   }
-  \item{zero}{
-  An integer vector, containing the value 1 or 2.
-  If so, \eqn{\lambda} or \eqn{\theta} respectively
-  are modelled as an intercept only.
-  If set to \code{NULL} then both linear/additive predictors are modelled
-  as functions of the explanatory variables.
-
+  \item{ishrinkage, zero}{
+    See \code{\link{CommonVGAMffArguments}} for information.
 
+    
   }
+% \item{zero}{
+% An integer vector, containing the value 1 or 2.
+% If so, \eqn{\lambda} or \eqn{\theta} respectively
+% are modelled as an intercept only.
+% If set to \code{NULL} then both linear/additive predictors are modelled
+% as functions of the explanatory variables.
+
+
+% }
+
+
 }
 \details{
 The generalized Poisson distribution has density
@@ -66,10 +74,20 @@ when \eqn{\lambda < 0}
 [and then \eqn{P(Y=y) = 0} for \eqn{y > m}].
 Note the complicated support for this distribution means,
 for some data sets,
-the default link for \code{llambda} is not always appropriate.
+the default link for \code{llambda} will not always work, and
+some tinkering may be required to get it running.
+
 
 
-An ordinary Poisson distribution corresponds to \eqn{\lambda = 0}{lambda = 0}.
+As Consul and Famoye (2006) state on p.165, the lower limits
+on \eqn{\lambda} and \eqn{m \ge 4}{m >= 4} are imposed
+to ensure that there are at least 5 classes with nonzero
+probability when \eqn{\lambda} is negative.
+
+
+
+An ordinary Poisson distribution corresponds
+to \eqn{\lambda = 0}{lambda = 0}.
 The mean (returned as the fitted values) is 
 \eqn{E(Y) = \theta / (1 - \lambda)}
 and the variance is \eqn{\theta / (1 - \lambda)^3}.
@@ -105,23 +123,45 @@ New York, USA: Marcel Dekker.
 
 
 }
+\section{Warning }{
+  Monitor convergence!
+  This family function is fragile.
+  Don't get confused because \code{theta}
+  (and not \code{lambda}) here really
+  matches more closely with \code{lambda} of
+  \code{\link[stats:Poisson]{dpois}}.
+
+  
+}
+
 \author{ T. W. Yee }
 \note{
-  This distribution is useful for dispersion modelling.
+  This family function handles multiple responses.
+  This distribution is potentially useful for dispersion modelling.
   Convergence problems may occur when \code{lambda} is very close
   to 0 or 1.
+  If a failure occurs then you might want to try something like
+  \code{llambda = extlogit(min = -0.9, max = 1)}
+  to handle the LHS complicated constraint,
+  and if that doesn't work, try
+  \code{llambda = extlogit(min = -0.8, max = 1)}, etc.
 
 
 }
+
 \seealso{
-  \code{\link{poissonff}}.
+  \code{\link{poissonff}},
+  \code{\link[stats:Poisson]{dpois}}.
+  \code{\link{dgenpois}},
+  \code{\link{rhobit}},
+  \code{\link{extlogit}}.
 
 
 }
 \examples{
 gdata <- data.frame(x2 = runif(nn <- 200))
-gdata <- transform(gdata, y1 = rpois(nn, exp(2 - x2)))  # Ordinary Poisson data
-fit <- vglm(y1 ~ x2, genpoisson(zero = 1), data = gdata, trace = TRUE)
+gdata <- transform(gdata, y1 = rpois(nn, exp(2 - x2)))  # Poisson data
+fit <- vglm(y1 ~ x2, genpoisson, data = gdata, trace = TRUE)
 coef(fit, matrix = TRUE)
 summary(fit)
 }
diff --git a/man/iam.Rd b/man/iam.Rd
index 7e80ca0..8a84d88 100644
--- a/man/iam.Rd
+++ b/man/iam.Rd
@@ -82,12 +82,12 @@ iam(j, k, M, both = FALSE, diag = TRUE)
 
   }
 }
-\references{ 
-  The website \url{http://www.stat.auckland.ac.nz/~yee} contains
-  some additional information.
-
-
-}
+%\references{ 
+%  The website \url{http://www.stat.auckland.ac.nz/~yee} contains
+%  some additional information.
+%
+%
+%}
 \author{ T. W. Yee }
 \note{
   This function is used in the \code{weight} slot of many \pkg{VGAM}
diff --git a/man/inv.lomax.Rd b/man/inv.lomax.Rd
index 7a3203b..9da37e5 100644
--- a/man/inv.lomax.Rd
+++ b/man/inv.lomax.Rd
@@ -7,29 +7,38 @@
   inverse Lomax distribution.
 }
 \usage{
-inv.lomax(lscale = "loge", lshape2.p = "loge",
-          iscale = NULL, ishape2.p = 1, zero = NULL)
+inv.lomax(lscale = "loge", lshape2.p = "loge", iscale = NULL, 
+    ishape2.p = NULL, imethod = 1, gscale = exp(-5:5), gshape2.p = exp(-5:5), 
+    probs.y = c(0.25, 0.5, 0.75), zero = -2)
 }
+
 %- maybe also 'usage' for other objects documented here.
 \arguments{
-  \item{lscale, lshape2.p}{
+ \item{lscale, lshape2.p}{
   Parameter link functions applied to the
-  (positive) scale parameter \code{scale} and
-  (positive) shape parameter \code{p}.
+  (positive) parameters \eqn{b}, and \eqn{p}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{iscale, ishape2.p}{
-  Optional initial values for \code{scale} and \code{p}.
+  \item{iscale, ishape2.p, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for 
+  \code{ishape2.p} is needed to obtain a good estimate for
+  the other parameter.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2\} which correspond to
-  \code{scale}, \code{p}, respectively.
+  \item{gscale, gshape2.p}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+
 }
 \details{
   The 2-parameter inverse Lomax distribution is the 4-parameter
@@ -46,7 +55,9 @@ The inverse Lomax distribution has density
   for \eqn{b > 0}, \eqn{p > 0}, \eqn{y \geq 0}{y >= 0}.
 Here, \eqn{b} is the scale parameter \code{scale},
 and \code{p} is a shape parameter.
-The mean does not seem to exist; the median is returned as the fitted values.
+The mean does not seem to exist; the \emph{median} is returned
+as the fitted values.
+This family function handles multiple responses.
 
 
 
@@ -75,7 +86,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
@@ -96,10 +107,10 @@ Hoboken, NJ, USA: Wiley-Interscience.
 }
 
 \examples{
-idata <- data.frame(y = rinv.lomax(n = 2000, exp(2), exp(1)))
+idata <- data.frame(y = rinv.lomax(n = 2000, scale = exp(2), exp(1)))
 fit <- vglm(y ~ 1, inv.lomax, data = idata, trace = TRUE)
-fit <- vglm(y ~ 1, inv.lomax(iscale = exp(2), ishape2.p = exp(1)), data = idata,
-            trace = TRUE, epsilon = 1e-8)
+fit <- vglm(y ~ 1, inv.lomax(iscale = exp(3)), data = idata,
+            trace = TRUE, epsilon = 1e-8, crit = "coef")
 coef(fit, matrix = TRUE)
 Coef(fit)
 summary(fit)
diff --git a/man/inv.paralogistic.Rd b/man/inv.paralogistic.Rd
index bb7bfeb..b0d84d4 100644
--- a/man/inv.paralogistic.Rd
+++ b/man/inv.paralogistic.Rd
@@ -7,8 +7,10 @@
   inverse paralogistic distribution.
 }
 \usage{
-inv.paralogistic(lss, lshape1.a = "loge", lscale = "loge",
-                      ishape1.a = 2, iscale = NULL, zero = NULL)
+inv.paralogistic(lscale = "loge", lshape1.a = "loge", iscale = NULL, 
+    ishape1.a = NULL, imethod = 1, lss = TRUE, gscale = exp(-5:5), 
+    gshape1.a = exp(-5:5), probs.y = c(0.25, 0.5, 0.75),
+    zero = ifelse(lss, -2, -1))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -17,24 +19,31 @@ inv.paralogistic(lss, lshape1.a = "loge", lscale = "loge",
   
   }
 
-  \item{lshape1.a, lscale}{ 
+ \item{lshape1.a, lscale}{
   Parameter link functions applied to the
-  (positive) shape parameter \code{a} and
-  (positive) scale parameter \code{scale}.
+  (positive) parameters \code{a}  and \code{scale}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{ishape1.a, iscale}{
-  Optional initial values for \code{a} and \code{scale}.
+  \item{iscale, ishape1.a, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for 
+  \code{ishape1.a} is needed to obtain a good estimate for
+  the other parameter.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2\} which correspond to
-  \code{a}, \code{scale}, respectively.
+  \item{gscale, gshape1.a}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+
 }
 \details{
   The 2-parameter inverse paralogistic distribution is the 4-parameter
@@ -54,6 +63,8 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(a + 1/a) \, \Gamma(1 - 1/a) / \Gamma(a)}{%
         E(Y) = b  gamma(a + 1/a)  gamma(1 - 1/a) /  gamma(a)}
 provided \eqn{a > 1}; these are returned as the fitted values.
+This family function handles multiple responses.
+
 
 
 }
@@ -76,7 +87,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
@@ -99,8 +110,8 @@ Hoboken, NJ, USA: Wiley-Interscience.
 \examples{
 idata <- data.frame(y = rinv.paralogistic(n = 3000, exp(1), scale = exp(2)))
 fit <- vglm(y ~ 1, inv.paralogistic(lss = FALSE), data = idata, trace = TRUE)
-fit <- vglm(y ~ 1, inv.paralogistic(lss = FALSE, ishape1.a = 2.7, iscale = 7.3),
-            data = idata, trace = TRUE, epsilon = 1e-8)
+fit <- vglm(y ~ 1, inv.paralogistic(imethod = 2, ishape1.a = 4),
+            data = idata, trace = TRUE, crit = "coef")
 coef(fit, matrix = TRUE)
 Coef(fit)
 summary(fit)
diff --git a/man/is.buggy.Rd b/man/is.buggy.Rd
new file mode 100644
index 0000000..7927b4d
--- /dev/null
+++ b/man/is.buggy.Rd
@@ -0,0 +1,110 @@
+\name{is.buggy}
+\alias{is.buggy}
+\alias{is.buggy.vlm}
+% 20150326
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{ Does the fitted object suffer from a known bug? }
+\description{
+  Checks to see if a fitted object suffers from some known bug.
+
+
+}
+\usage{
+is.buggy(object, ...)
+is.buggy.vlm(object, each.term = FALSE, ...)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{object}{
+  A fitted \pkg{VGAM} object, e.g., from
+  \code{\link{vgam}}.
+
+
+  }
+  \item{each.term}{
+    Logical. If \code{TRUE} then a logical is returned for each term.
+
+
+  }
+  \item{\dots}{
+  Unused for now.
+
+
+  }
+
+}
+\details{
+  It is known that \code{\link{vgam}} with \code{\link{s}} terms
+  do not correctly handle constraint matrices (\code{cmat}, say) when
+  \code{crossprod(cmat)} is not diagonal.
+  This function detects whether this is so or not.
+  Note that probably all \pkg{VGAM} family functions have defaults where all
+  \code{crossprod(cmat)}s are diagonal, therefore do not suffer from this
+  bug. It is more likely to occur if the user inputs constraint matrices
+  using the \code{constraints} argument (and setting \code{zero = NULL}
+  if necessary).
+
+
+}
+\value{
+  The default is a single logical (\code{TRUE} if any term is
+  \code{TRUE}),
+  otherwise a vector of such with each element corresponding to a term.
+  If the value is \code{TRUE} then I suggest replacing the VGAM
+  by a similar model fitted by \code{\link{vglm}} and using
+  regression splines, e.g., \code{\link[splines]{bs}},
+  \code{\link[splines]{ns}}. 
+
+
+
+}
+%\references{
+
+
+%}
+
+\author{ T. W. Yee }
+\note{ 
+  When the bug is fixed this function may be withdrawn, otherwise
+  always return \code{FALSE}s!
+
+
+}
+\seealso{
+  \code{\link{vgam}}.
+  \code{\link{vglm}},
+  \code{\link[splines]{bs}},
+  \code{\link[splines]{ns}}. 
+
+
+}
+
+\examples{
+fit1 <- vgam(cbind(agaaus, kniexc) ~ s(altitude, df = c(3, 4)),
+             binomialff(multiple.responses = TRUE), data = hunua)
+is.buggy(fit1)  # Okay
+is.buggy(fit1, each.term = TRUE)  # No terms are buggy
+
+fit2 <- vgam(cbind(agaaus, kniexc) ~ s(altitude, df = c(3, 4)),
+             binomialff(multiple.responses = TRUE), data = hunua,
+             constraints =
+             list("(Intercept)" = diag(2),
+                  "s(altitude, df = c(3, 4))" = matrix(c(1, 1, 0, 1), 2, 2)))
+is.buggy(fit2)  # TRUE
+is.buggy(fit2, each.term = TRUE)
+constraints(fit2)
+
+# fit2b is an approximate alternative to fit2:
+fit2b <- vglm(cbind(agaaus, kniexc) ~ bs(altitude, df = 3) + bs(altitude, df = 4),
+              binomialff(multiple.responses = TRUE), data = hunua,
+              constraints =
+              list("(Intercept)" = diag(2),
+                   "bs(altitude, df = 3)" = rbind(1, 1),
+                   "bs(altitude, df = 4)" = rbind(0, 1)))
+is.buggy(fit2b)  # Okay
+is.buggy(fit2b, each.term = TRUE)
+constraints(fit2b)
+}
+\keyword{models}
+\keyword{regression}
+
diff --git a/man/lms.bcg.Rd b/man/lms.bcg.Rd
index 6905833..1484e17 100644
--- a/man/lms.bcg.Rd
+++ b/man/lms.bcg.Rd
@@ -62,13 +62,15 @@ lms.bcg(percentiles = c(25, 50, 75), zero = c(1, 3),
 Lopatatzidis A. and Green, P. J. (unpublished manuscript)
 Semiparametric quantile regression using the gamma distribution.
 
+
 Yee, T. W. (2004)
 Quantile regression via vector generalized additive models.
 \emph{Statistics in Medicine}, \bold{23}, 2295--2315.
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{https://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/lms.bcn.Rd b/man/lms.bcn.Rd
index c495f77..860ce77 100644
--- a/man/lms.bcn.Rd
+++ b/man/lms.bcn.Rd
@@ -180,9 +180,9 @@ Quantile regression via vector generalized additive models.
 \bold{23}, 2295--2315.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 }
 \author{ Thomas W. Yee }
@@ -203,12 +203,13 @@ contains further information and examples.
   is often a good idea. See the example below.
 
 
-  While it is usual to regress the response against a single
-  covariate, it is possible to add other explanatory variables,
-  e.g., gender.
-  See
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  for further information and examples about this feature.
+% While it is usual to regress the response against a single
+% covariate, it is possible to add other explanatory variables,
+% e.g., gender.
+% See
+% \url{http://www.stat.auckland.ac.nz/~yee}
+% for further information and examples about this feature.
+
 
 }
 
diff --git a/man/lms.yjn.Rd b/man/lms.yjn.Rd
index 48a00d0..ec5ee15 100644
--- a/man/lms.yjn.Rd
+++ b/man/lms.yjn.Rd
@@ -119,9 +119,11 @@ In: Haerdle, W. and Ronz, B.,
 \emph{Proceedings in Computational Statistics COMPSTAT 2002}.
 Heidelberg: Physica-Verlag.
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/loglinb3.Rd b/man/loglinb3.Rd
index 8c8b273..fca6d61 100644
--- a/man/loglinb3.Rd
+++ b/man/loglinb3.Rd
@@ -75,9 +75,9 @@ McCullagh, P. and Nelder, J. A. (1989)
   \emph{Generalized Linear Models}, 2nd ed. London: Chapman & Hall.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/lomax.Rd b/man/lomax.Rd
index fe0da7e..b8797f2 100644
--- a/man/lomax.Rd
+++ b/man/lomax.Rd
@@ -8,9 +8,9 @@
 
 }
 \usage{
-lomax(lscale = "loge", lshape3.q = "loge",
-      iscale = NULL,   ishape3.q = NULL,
-      gshape3.q = exp(-5:5), zero = NULL)
+lomax(lscale = "loge", lshape3.q = "loge", iscale = NULL, 
+      ishape3.q = NULL, imethod = 1, gscale = exp(-5:5),
+      gshape3.q = exp(-5:5), probs.y = c(0.25, 0.5, 0.75), zero = -2)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -21,13 +21,16 @@ lomax(lscale = "loge", lshape3.q = "loge",
 
 
   }
-  \item{iscale, ishape3.q}{
-  Optional initial values for \code{scale} and \code{q}.
+  \item{iscale, ishape3.q, imethod}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for
+  \code{iscale} is needed to obtain a good estimate for
+  the other parameter.
 
 
   }
 
-  \item{gshape3.q, zero}{
+  \item{gscale, gshape3.q, zero, probs.y}{
   See
   \code{\link{CommonVGAMffArguments}}.
 
@@ -68,6 +71,8 @@ The mean is
   \deqn{E(Y) = b/(q-1)}{%
         E(Y) = b/(q-1)}
 provided \eqn{q > 1}; these are returned as the fitted values.
+This family function handles multiple responses.
+
 
 
 }
@@ -90,7 +95,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
diff --git a/man/micmen.Rd b/man/micmen.Rd
index fd16337..80434e7 100644
--- a/man/micmen.Rd
+++ b/man/micmen.Rd
@@ -108,9 +108,9 @@ micmen(rpar = 0.001, divisor = 10, init1 = NULL, init2 = NULL,
   New York: Wiley.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+% Documentation accompanying the \pkg{VGAM} package at
+% \url{http://www.stat.auckland.ac.nz/~yee}
+% contains further information and examples.
 
 
 }
diff --git a/man/multinomial.Rd b/man/multinomial.Rd
index da3f1ae..ec04e32 100644
--- a/man/multinomial.Rd
+++ b/man/multinomial.Rd
@@ -140,9 +140,9 @@ Cambridge University Press.
 
 
 
-Further information and examples on categorical data analysis
-by the \pkg{VGAM} package can be found at
-\url{http://www.stat.auckland.ac.nz/~yee/VGAM/doc/categorical.pdf}.
+%Further information and examples on categorical data analysis
+%by the \pkg{VGAM} package can be found at
+%\url{http://www.stat.auckland.ac.nz/~yee/VGAM/doc/categorical.pdf}.
 
 
 }
diff --git a/man/negbinomial.Rd b/man/negbinomial.Rd
index 6f90c66..7cdffe6 100644
--- a/man/negbinomial.Rd
+++ b/man/negbinomial.Rd
@@ -182,11 +182,13 @@ polyaR(lsize = "loge", lprob = "logit",
   Setting \code{parallel = TRUE} is useful in order to get
   something similar to \code{\link{quasipoissonff}} or
   what is known as NB-1.
-  The parallelism constraint does not apply to any intercept term.
+  If \code{parallel = TRUE} then the parallelism constraint
+  does not apply to any intercept term.
   You should set \code{zero = NULL} too if \code{parallel = TRUE} to
   avoid a conflict.
 
 
+
   }
   \item{probs.y}{
   Passed into the \code{probs} argument
diff --git a/man/notdocumentedyet.Rd b/man/notdocumentedyet.Rd
index afb482c..c285e0c 100644
--- a/man/notdocumentedyet.Rd
+++ b/man/notdocumentedyet.Rd
@@ -4,6 +4,12 @@
 %
 %
 %
+% 201503, 201504, 201505;
+\alias{dbetaII}
+\alias{AR1.control}
+\alias{param.names}
+%\alias{is.buggy}
+%\alias{is.buggy.vlm}
 %
 % 201412;
 %\alias{linkfun.vglm}
@@ -32,7 +38,7 @@
 %
 % 201312;
 % \alias{simulate.vlm}
-% 201311;
+% 201311;             20150316: modified to familyname
 \alias{familyname}
 \alias{familyname.vlm}
 \alias{familyname.vglmff}
@@ -273,7 +279,7 @@
 \alias{Tol.qrrvglm}
 %\alias{Tol.uqo}
 \alias{a2m}
-\alias{abbott}
+% \alias{abbott}  % 20150320; no longer releasing family.quantal.R.
 % \alias{acat.deriv}
 % \alias{add.arg}
 % \alias{add.constraints}
diff --git a/man/paralogistic.Rd b/man/paralogistic.Rd
index d3af56f..0f99576 100644
--- a/man/paralogistic.Rd
+++ b/man/paralogistic.Rd
@@ -9,8 +9,10 @@
 
 }
 \usage{
-paralogistic(lss, lshape1.a = "loge", lscale = "loge",
-                  ishape1.a = 2, iscale = NULL, zero = NULL)
+paralogistic(lscale = "loge", lshape1.a = "loge", iscale = NULL, 
+    ishape1.a = NULL, imethod = 1, lss = TRUE, gscale = exp(-5:5), 
+    gshape1.a = exp(-5:5), probs.y = c(0.25, 0.5, 0.75),
+    zero = ifelse(lss, -2, -1))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -18,23 +20,28 @@ paralogistic(lss, lshape1.a = "loge", lscale = "loge",
   
   
   }
-
   \item{lshape1.a, lscale}{
   Parameter link functions applied to the
-  (positive) shape parameter \code{a} and
-  (positive) scale parameter \code{scale}.
+  (positive) parameters \eqn{a} and \code{scale}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{ishape1.a, iscale}{
-  Optional initial values for \code{a} and \code{scale}.
+  \item{iscale, ishape1.a, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for
+  \code{ishape1.a} is needed to obtain good estimates for
+  the other parameter.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2\} which correspond to
-  \code{a}, \code{scale}, respectively.
+  \item{gscale, gshape1.a}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
 }
@@ -56,6 +63,8 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(1 + 1/a) \, \Gamma(a - 1/a) / \Gamma(a)}{%
         E(Y) = b  gamma(1 + 1/a)  gamma(a - 1/a) /  gamma(a)}
 provided \eqn{a > 1}; these are returned as the fitted values.
+This family function handles multiple responses.
+
 
 
 }
@@ -78,13 +87,14 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
 
 \seealso{
     \code{\link{Paralogistic}},
+    \code{\link{sinmad}},
     \code{\link{genbetaII}},
     \code{\link{betaII}},
     \code{\link{dagum}},
@@ -96,10 +106,10 @@ Hoboken, NJ, USA: Wiley-Interscience.
 }
 
 \examples{
-pdata <- data.frame(y = rparalogistic(n = 3000, exp(1), scale = exp(2)))
+pdata <- data.frame(y = rparalogistic(n = 3000, exp(1), scale = exp(1)))
 fit <- vglm(y ~ 1, paralogistic(lss = FALSE), data = pdata, trace = TRUE)
-fit <- vglm(y ~ 1, paralogistic(lss = FALSE, ishape1.a = 2.3, iscale = 7),
-            data = pdata, trace = TRUE, epsilon = 1e-8)
+fit <- vglm(y ~ 1, paralogistic(ishape1.a = 2.3, iscale = 5),
+            data = pdata, trace = TRUE)
 coef(fit, matrix = TRUE)
 Coef(fit)
 summary(fit)
diff --git a/man/plotdeplot.lmscreg.Rd b/man/plotdeplot.lmscreg.Rd
index ea2b202..aaa406c 100644
--- a/man/plotdeplot.lmscreg.Rd
+++ b/man/plotdeplot.lmscreg.Rd
@@ -86,9 +86,9 @@ Quantile regression via vector generalized additive models.
 \emph{Statistics in Medicine}, \bold{23}, 2295--2315.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/plotqtplot.lmscreg.Rd b/man/plotqtplot.lmscreg.Rd
index 21597a3..1bb5cea 100644
--- a/man/plotqtplot.lmscreg.Rd
+++ b/man/plotqtplot.lmscreg.Rd
@@ -82,9 +82,9 @@ Yee, T. W. (2004)
 Quantile regression via vector generalized additive models.
 \emph{Statistics in Medicine}, \bold{23}, 2295--2315.
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/posbernoulli.t.Rd b/man/posbernoulli.t.Rd
index 4c67249..4724c5f 100644
--- a/man/posbernoulli.t.Rd
+++ b/man/posbernoulli.t.Rd
@@ -157,15 +157,10 @@ Yee, T. W. and Stoklosa, J. and Huggins, R. M. (2015)
 The \pkg{VGAM} package for capture--recapture data using the
 conditional likelihood.
   \emph{Journal of Statistical Software},
-  in press.
-
-% \bold{62}, 3--135.
-
-
-%\emph{Journal of Statistical Software},
-%\bold{32}, 1--34.
-%\url{http://www.jstatsoft.org/v32/i10/}.
+  \bold{65}, 1--33.
+  \url{http://www.jstatsoft.org/v65/i05/}.
 
+% \bold{65}(5), 1--33.
 
 
 
diff --git a/man/posbernoulli.tb.Rd b/man/posbernoulli.tb.Rd
index b53624d..56fcca2 100644
--- a/man/posbernoulli.tb.Rd
+++ b/man/posbernoulli.tb.Rd
@@ -203,6 +203,7 @@ posbernoulli.tb(link = "logit", parallel.t = FALSE ~ 1,
   \code{\link{posbernoulli.t}},
   \code{\link{posbinomial}},
   \code{\link{Select}},
+  \code{\link{fill1}},
   \code{\link{Huggins89table1}},
   \code{\link{Huggins89.t1}},
   \code{\link{deermice}},
diff --git a/man/posnormal.Rd b/man/posnormal.Rd
index 62df13f..6250b24 100644
--- a/man/posnormal.Rd
+++ b/man/posnormal.Rd
@@ -80,14 +80,14 @@ posnormal(lmean = "identitylink", lsd = "loge",
 
 
 }
-\references{
-
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
-
-
-}
+%\references{
+%
+%  Documentation accompanying the \pkg{VGAM} package at
+%  \url{http://www.stat.auckland.ac.nz/~yee}
+%  contains further information and examples.
+%
+%
+%}
 \author{ Thomas W. Yee }
 \note{
   The response variable for this family function is the same as
diff --git a/man/pospoisson.Rd b/man/pospoisson.Rd
index 497f8c6..84f3d1b 100644
--- a/man/pospoisson.Rd
+++ b/man/pospoisson.Rd
@@ -61,9 +61,9 @@ The equilibrium size distribution of freely-forming groups.
 \emph{Sociometry}, \bold{24}, 36--45.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/propodds.Rd b/man/propodds.Rd
index 0901c3c..27db074 100644
--- a/man/propodds.Rd
+++ b/man/propodds.Rd
@@ -60,9 +60,9 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/qtplot.lmscreg.Rd b/man/qtplot.lmscreg.Rd
index 25c2751..16524bb 100644
--- a/man/qtplot.lmscreg.Rd
+++ b/man/qtplot.lmscreg.Rd
@@ -50,9 +50,9 @@ Yee, T. W. (2004)
 Quantile regression via vector generalized additive models.
 \emph{Statistics in Medicine}, \bold{23}, 2295--2315.
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/rhobit.Rd b/man/rhobit.Rd
index 6ad52e1..dec88fe 100644
--- a/man/rhobit.Rd
+++ b/man/rhobit.Rd
@@ -59,13 +59,13 @@ rhobit(theta, bminvalue = NULL, bmaxvalue = NULL,
 
 
 }
-\references{
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
-
-
-}
+%\references{
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+%
+%
+%}
 \author{ Thomas W. Yee }
 
 \note{
diff --git a/man/rrvglm-class.Rd b/man/rrvglm-class.Rd
index 6035ec7..299837a 100644
--- a/man/rrvglm-class.Rd
+++ b/man/rrvglm-class.Rd
@@ -232,7 +232,7 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee}
+%\url{http://www.stat.auckland.ac.nz/~yee}
 
 
 }
diff --git a/man/rrvglm.Rd b/man/rrvglm.Rd
index 2df90b6..87c2f19 100644
--- a/man/rrvglm.Rd
+++ b/man/rrvglm.Rd
@@ -148,13 +148,14 @@ Regression and ordered categorical variables.
 
   Yee, T. W. (2014)
   Reduced-rank vector generalized linear models with two linear predictors.
-  \emph{Computational Statistics and Data Analysis}.
+  \emph{Computational Statistics and Data Analysis},
+  \bold{71}, 889--902.
 
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+%  Documentation accompanying the \pkg{VGAM} package at
+%  \url{http://www.stat.auckland.ac.nz/~yee}
+%  contains further information and examples.
 
 }
 
diff --git a/man/s.Rd b/man/s.Rd
index 308de92..b2aaa41 100644
--- a/man/s.Rd
+++ b/man/s.Rd
@@ -117,6 +117,7 @@ Vector generalized additive models.
 
 \seealso{
   \code{\link{vgam}},
+  \code{\link{is.buggy}},
   \code{\link{vsmooth.spline}}.
 
 
diff --git a/man/sinmad.Rd b/man/sinmad.Rd
index 1e83ac3..50c9d16 100644
--- a/man/sinmad.Rd
+++ b/man/sinmad.Rd
@@ -7,8 +7,11 @@
   Singh-Maddala distribution.
 }
 \usage{
-sinmad(lss, lshape1.a = "loge", lscale = "loge", lshape3.q = "loge",
-            ishape1.a = NULL, iscale = NULL, ishape3.q = 1, zero = NULL)
+sinmad(lscale = "loge", lshape1.a = "loge", lshape3.q = "loge", 
+       iscale = NULL, ishape1.a = NULL, ishape3.q = NULL, imethod = 1, 
+       lss = TRUE, gscale = exp(-5:5), gshape1.a = exp(-5:5),
+       gshape3.q = exp(-5:5), probs.y = c(0.25, 0.5, 0.75),
+       zero = ifelse(lss, -(2:3), -c(1, 3)))
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
@@ -19,19 +22,26 @@ sinmad(lss, lshape1.a = "loge", lscale = "loge", lshape3.q = "loge",
 
   \item{lshape1.a, lscale, lshape3.q}{
   Parameter link functions applied to the
-  (positive) parameters \code{a}, \code{scale}, and \code{q}.
+  (positive) parameters \eqn{a}, \code{scale}, and \eqn{q}.
   See \code{\link{Links}} for more choices.
 
   }
-  \item{ishape1.a, iscale, ishape3.q}{
-  Optional initial values for \code{a}, \code{scale}, and \code{q}.
+  \item{iscale, ishape1.a, ishape3.q, imethod, zero}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+  For \code{imethod = 2} a good initial value for
+  \code{ishape3.q} is needed to obtain good estimates for
+  the other parameters.
+
 
   }
-  \item{zero}{
-  An integer-valued vector specifying which
-  linear/additive predictors are modelled as intercepts only.
-  Here, the values must be from the set \{1,2,3\} which correspond to
-  \code{a}, \code{scale}, \code{q}, respectively.
+  \item{gscale, gshape1.a, gshape3.q}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
+
+  }
+  \item{probs.y}{
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
 }
@@ -44,11 +54,13 @@ beta-P, and generalized log-logistic distribution.
   More details can be found in Kleiber and Kotz (2003).
 
 
+
 Some distributions which are special cases of the 3-parameter Singh-Maddala
 are the Lomax (\eqn{a=1}), Fisk (\eqn{q=1}), and
 paralogistic (\eqn{a=q}).
 
 
+
 The Singh-Maddala distribution has density
   \deqn{f(y) = aq y^{a-1} / [b^a \{1 + (y/b)^a\}^{1+q}]}{%
         f(y) = aq y^(a-1) / [b^a (1 + (y/b)^a)^(1+q)]}
@@ -62,6 +74,7 @@ The mean is
   \deqn{E(Y) = b \, \Gamma(1 + 1/a) \, \Gamma(q - 1/a) / \Gamma(q)}{%
         E(Y) = b  gamma(1 + 1/a)  gamma(q - 1/a) /  gamma(q)}
 provided \eqn{-a < 1 < aq}; these are returned as the fitted values.
+This family function handles multiple responses.
 
 
 }
@@ -84,7 +97,7 @@ Hoboken, NJ, USA: Wiley-Interscience.
 
 \author{ T. W. Yee }
 \note{
-  See the note in \code{\link{genbetaII}}.
+  See the notes in \code{\link{genbetaII}}.
 
 
 }
diff --git a/man/sinmadUC.Rd b/man/sinmadUC.Rd
index 35db9b5..9b66908 100644
--- a/man/sinmadUC.Rd
+++ b/man/sinmadUC.Rd
@@ -51,7 +51,7 @@ rsinmad(n, scale = 1, shape1.a, shape3.q)
 Kleiber, C. and Kotz, S. (2003)
 \emph{Statistical Size Distributions in Economics and
              Actuarial Sciences},
-Hoboken, NJ: Wiley-Interscience.
+Hoboken, NJ, USA: Wiley-Interscience.
 
 
 }
@@ -75,7 +75,8 @@ Hoboken, NJ: Wiley-Interscience.
 
 }
 \examples{
-sdata <- data.frame(y = rsinmad(n = 3000, scale = exp(2), shape1 = exp(1), exp(1)))
+sdata <- data.frame(y = rsinmad(n = 3000, scale = exp(2),
+                                shape1 = exp(1), shape3 = exp(1)))
 fit <- vglm(y ~ 1, sinmad(lss = FALSE, ishape1.a = 2.1), data = sdata,
             trace = TRUE, crit = "coef")
 coef(fit, matrix = TRUE)
diff --git a/man/smartpred.Rd b/man/smartpred.Rd
index d17a399..7b66e2a 100644
--- a/man/smartpred.Rd
+++ b/man/smartpred.Rd
@@ -193,9 +193,9 @@ sm.scale(x, center = TRUE, scale = TRUE)
   \code{\link[splines]{bs}} and \code{\link[splines]{ns}} call.
 
 
-  The website \url{http://www.stat.auckland.ac.nz/~yee}
-  contains more information such as how to write a
-  smart function, and other technical details.
+% The website \url{http://www.stat.auckland.ac.nz/~yee}
+% contains more information such as how to write a
+% smart function, and other technical details.
 
 
   The functions \code{\link[VGAM]{vglm}},
diff --git a/man/sratio.Rd b/man/sratio.Rd
index 4b77baf..554e825 100644
--- a/man/sratio.Rd
+++ b/man/sratio.Rd
@@ -90,9 +90,12 @@ The \pkg{VGAM} package for categorical data analysis.
 \url{http://www.jstatsoft.org/v32/i10/}.
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
+
+
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/tobit.Rd b/man/tobit.Rd
index ef3a7d4..a6adee3 100644
--- a/man/tobit.Rd
+++ b/man/tobit.Rd
@@ -23,6 +23,7 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   variable.
   May be a vector (see below for more information).
 
+
   }
   \item{Upper}{
   Numeric. It is the value \eqn{U} described below. 
@@ -33,6 +34,7 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   variable.
   May be a vector (see below for more information).
 
+
   }
   \item{lmu, lsd}{
   Parameter link functions for the mean and standard deviation parameters.
@@ -40,11 +42,13 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   The standard deviation is a positive quantity, therefore a log link 
   is its default.
 
+
   }
 
   \item{imu, isd}{
   See \code{\link{CommonVGAMffArguments}} for information.
 
+
   }
   \item{type.fitted}{
   Type of fitted value returned.
@@ -62,11 +66,13 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   Initialization method. Either 1 or 2, this specifies
   two methods for obtaining initial values for the parameters.
 
+
   }
   \item{nsimEIM}{
-  Used if nonstandard Tobit model.
+  Used for the nonstandard Tobit model.
   See \code{\link{CommonVGAMffArguments}} for information.
 
+
   }
   \item{zero}{
   An integer vector, containing the value 1 or 2. If so,
@@ -74,6 +80,8 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   as an intercept-only.
   Setting \code{zero = NULL} means both linear/additive predictors
   are modelled as functions of the explanatory variables.
+  See \code{\link{CommonVGAMffArguments}} for information.
+
 
   }
 }
@@ -157,6 +165,19 @@ tobit(Lower = 0, Upper = Inf, lmu = "identitylink", lsd = "loge",
   to \code{tobit()}.
 
 
+
+
+% 20150417; McClelland Kemp bug:
+
+
+  When obtaining initial values, if the algorithm would
+  otherwise want to fit an underdetermined system of
+  equations, then it uses the entire data set instead.
+  This might result in rather poor quality initial values,
+  and consequently, monitoring convergence is advised.
+
+
+
 }
 \seealso{
   \code{\link{rtobit}},
diff --git a/man/undocumented-methods.Rd b/man/undocumented-methods.Rd
index 16b923d..e9e69c0 100644
--- a/man/undocumented-methods.Rd
+++ b/man/undocumented-methods.Rd
@@ -5,6 +5,13 @@
 %
 %
 %
+% 201503, for R 3.1.3:
+\alias{is.buggy,ANY-method}
+\alias{is.buggy,vlm-method}
+\alias{familyname,ANY-method}
+\alias{familyname,vlm-method}
+\alias{familyname,vglmff-method}
+%
 % 201412
 \alias{nparam,ANY-method}
 \alias{nparam,vlm-method}
@@ -35,9 +42,9 @@
 \alias{simulate,vlm-method}
 %
 % 20131104
-\alias{familyname,ANY-method}
-\alias{familyname,vlm-method}
-\alias{familyname,vglmff-method}
+\alias{family.name,ANY-method}
+\alias{family.name,vlm-method}
+\alias{family.name,vglmff-method}
 % 20130903
 \alias{BIC,ANY-method}
 \alias{BIC,vlm-method}
diff --git a/man/uninormal.Rd b/man/uninormal.Rd
index 74962d2..2d0fafe 100644
--- a/man/uninormal.Rd
+++ b/man/uninormal.Rd
@@ -114,6 +114,7 @@ Hoboken, NJ, USA: John Wiley and Sons, Fourth edition.
     \code{\link{skewnormal}},
     \code{\link{double.cens.normal}},
     \code{\link{SURff}},
+    \code{\link{AR1}},
     \code{\link{huber2}},
     \code{\link{studentt}},
     \code{\link{binormal}},
diff --git a/man/vgam-class.Rd b/man/vgam-class.Rd
index dd73aca..1aeaa0b 100644
--- a/man/vgam-class.Rd
+++ b/man/vgam-class.Rd
@@ -226,7 +226,7 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee}
+%\url{http://www.stat.auckland.ac.nz/~yee}
 
 }
 \author{ Thomas W. Yee }
diff --git a/man/vgam.Rd b/man/vgam.Rd
index 8ec8b59..4f5594b 100644
--- a/man/vgam.Rd
+++ b/man/vgam.Rd
@@ -167,9 +167,9 @@ The \code{VGAM} Package.
 \emph{R News}, \bold{8}, 28--39.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
@@ -215,7 +215,8 @@ The \code{VGAM} Package.
 \section{WARNING}{
   Currently \code{vgam} can only handle constraint matrices \code{cmat},
   say, such that \code{crossprod(cmat)} is diagonal.
-  This is a bug that I will try to fix up soon.
+  This is a bug that I will try to fix up soon;
+  see \code{\link{is.buggy}}.
 
 
   See warnings in \code{\link{vglm.control}}.
@@ -224,6 +225,7 @@ The \code{VGAM} Package.
 }
 
 \seealso{
+  \code{\link{is.buggy}},
   \code{\link{vgam.control}},
   \code{\link{vgam-class}},
   \code{\link{vglmff-class}},
diff --git a/man/vglm-class.Rd b/man/vglm-class.Rd
index 63453c2..14c364e 100644
--- a/man/vglm-class.Rd
+++ b/man/vglm-class.Rd
@@ -210,7 +210,7 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee}
+%\url{http://www.stat.auckland.ac.nz/~yee}
 
 
 }
diff --git a/man/vglm.Rd b/man/vglm.Rd
index 4820cd1..7da2c59 100644
--- a/man/vglm.Rd
+++ b/man/vglm.Rd
@@ -353,9 +353,9 @@ The \code{VGAM} Package.
 \emph{R News}, \bold{8}, 28--39.
 
 
-  Documentation accompanying the \pkg{VGAM} package at
-  \url{http://www.stat.auckland.ac.nz/~yee}
-  contains further information and examples.
+%  Documentation accompanying the \pkg{VGAM} package at
+%  \url{http://www.stat.auckland.ac.nz/~yee}
+%  contains further information and examples.
 
 
 }
diff --git a/man/vglm.control.Rd b/man/vglm.control.Rd
index 57fff4d..350a938 100644
--- a/man/vglm.control.Rd
+++ b/man/vglm.control.Rd
@@ -235,7 +235,7 @@ vglm.control(checkwz = TRUE, Check.rank = TRUE, Check.cm.rank = TRUE,
 
 \seealso{
   \code{\link{vglm}},
-  \code{\link{fill}}.
+  \code{\link{fill1}}.
   The author's homepage has further documentation about
   the \code{xij} argument;
   see also \code{\link{Select}}.
diff --git a/man/vglmff-class.Rd b/man/vglmff-class.Rd
index 6006ecd..fd3917d 100644
--- a/man/vglmff-class.Rd
+++ b/man/vglmff-class.Rd
@@ -195,9 +195,9 @@ Vector generalized additive models.
 \bold{58}, 481--493.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee} contains further
-information on how to write \pkg{VGAM} family functions.
-The file is amongst other \pkg{VGAM} PDF documentation.
+%\url{http://www.stat.auckland.ac.nz/~yee} contains further
+%information on how to write \pkg{VGAM} family functions.
+%The file is amongst other \pkg{VGAM} PDF documentation.
 
 
 }
diff --git a/man/wrapup.smart.Rd b/man/wrapup.smart.Rd
index 89086e8..825bfaa 100644
--- a/man/wrapup.smart.Rd
+++ b/man/wrapup.smart.Rd
@@ -24,13 +24,13 @@ wrapup.smart()
 
 
 }
-\references{
-  See the technical help file at \url{http://www.stat.auckland.ac.nz/~yee}
-  for details.
-
-
-
-}
+%\references{
+% See the technical help file at \url{http://www.stat.auckland.ac.nz/~yee}
+% for details.
+%
+%
+%
+%}
 \seealso{
   \code{\link{setup.smart}}. 
 
diff --git a/man/zapoisson.Rd b/man/zapoisson.Rd
index 06999c4..3b1c93a 100644
--- a/man/zapoisson.Rd
+++ b/man/zapoisson.Rd
@@ -143,9 +143,9 @@ A Bayesian analysis of zero-inflated generalized Poisson model.
 
 
 
-Documentation accompanying the \pkg{VGAM} package at
-\url{http://www.stat.auckland.ac.nz/~yee}
-contains further information and examples.
+%Documentation accompanying the \pkg{VGAM} package at
+%\url{http://www.stat.auckland.ac.nz/~yee}
+%contains further information and examples.
 
 
 }
diff --git a/man/zero.Rd b/man/zero.Rd
index 4dc149a..6b8925d 100644
--- a/man/zero.Rd
+++ b/man/zero.Rd
@@ -88,7 +88,7 @@ Reduced-rank vector generalized linear models.
 \bold{3}, 15--41.
 
 
-\url{http://www.stat.auckland.ac.nz/~yee}
+%\url{http://www.stat.auckland.ac.nz/~yee}
 
 
 }
diff --git a/vignettes/categoricalVGAM.Rnw b/vignettes/categoricalVGAM.Rnw
new file mode 100644
index 0000000..8394144
--- /dev/null
+++ b/vignettes/categoricalVGAM.Rnw
@@ -0,0 +1,2325 @@
+\documentclass[article,shortnames,nojss]{jss}
+\usepackage{thumbpdf}
+%% need no \usepackage{Sweave.sty}
+
+\SweaveOpts{engine=R,eps=FALSE}
+%\VignetteIndexEntry{The VGAM Package for Categorical Data Analysis}
+%\VignetteDepends{VGAM}
+%\VignetteKeywords{categorical data analysis, Fisher scoring, iteratively reweighted least squares, multinomial distribution, nominal and ordinal polytomous responses, smoothing, vector generalized linear and additive models, VGAM R package}
+%\VignettePackage{VGAM}
+
+%% new commands
+\newcommand{\sVLM}{\mbox{\scriptsize VLM}}
+\newcommand{\sformtwo}{\mbox{\scriptsize F2}}
+\newcommand{\pr}{\mbox{$P$}}
+\newcommand{\logit}{\mbox{\rm logit}}
+\newcommand{\bzero}{{\bf 0}}
+\newcommand{\bone}{{\bf 1}}
+\newcommand{\bid}{\mbox{\boldmath $d$}}
+\newcommand{\bie}{\mbox{\boldmath $e$}}
+\newcommand{\bif}{\mbox{\boldmath $f$}}
+\newcommand{\bix}{\mbox{\boldmath $x$}}
+\newcommand{\biy}{\mbox{\boldmath $y$}}
+\newcommand{\biz}{\mbox{\boldmath $z$}}
+\newcommand{\biY}{\mbox{\boldmath $Y$}}
+\newcommand{\bA}{\mbox{\rm \bf A}}
+\newcommand{\bB}{\mbox{\rm \bf B}}
+\newcommand{\bC}{\mbox{\rm \bf C}}
+\newcommand{\bH}{\mbox{\rm \bf H}}
+\newcommand{\bI}{\mbox{\rm \bf I}}
+\newcommand{\bX}{\mbox{\rm \bf X}}
+\newcommand{\bW}{\mbox{\rm \bf W}}
+\newcommand{\bY}{\mbox{\rm \bf Y}}
+\newcommand{\bbeta}{\mbox{\boldmath $\beta$}}
+\newcommand{\boldeta}{\mbox{\boldmath $\eta$}}
+\newcommand{\bmu}{\mbox{\boldmath $\mu$}}
+\newcommand{\bnu}{\mbox{\boldmath $\nu$}}
+\newcommand{\diag}{ \mbox{\rm diag} }
+\newcommand{\Var}{ \mbox{\rm Var} }
+\newcommand{\R}{{\textsf{R}}}
+\newcommand{\VGAM}{\pkg{VGAM}}
+
+
+\author{Thomas W. Yee\\University of Auckland}
+\Plainauthor{Thomas W. Yee}
+
+\title{The \pkg{VGAM} Package for Categorical Data Analysis}
+\Plaintitle{The VGAM Package for Categorical Data Analysis}
+
+\Abstract{
+  Classical categorical regression models such as the multinomial logit and
+  proportional odds models are shown to be readily handled by the  vector
+  generalized linear and additive model (VGLM/VGAM) framework. Additionally,
+  there are natural extensions, such as reduced-rank VGLMs for
+  dimension reduction, and allowing covariates that have values
+  specific to each linear/additive predictor,
+  e.g., for consumer choice modeling. This article describes some of the
+  framework behind the \pkg{VGAM} \R{} package, its usage and implementation
+  details.
+}
+\Keywords{categorical data analysis, Fisher scoring,
+  iteratively reweighted least squares,
+  multinomial distribution, nominal and ordinal polytomous responses,
+  smoothing, vector generalized linear and additive models,
+  \VGAM{} \R{} package}
+\Plainkeywords{categorical data analysis, Fisher scoring,
+  iteratively reweighted least squares, multinomial distribution,
+  nominal and ordinal polytomous responses, smoothing,
+  vector generalized linear and additive models, VGAM R package}
+
+\Address{
+  Thomas W. Yee \\
+  Department of Statistics \\
+  University of Auckland, Private Bag 92019 \\
+  Auckland Mail Centre \\
+  Auckland 1142, New Zealand \\
+  E-mail: \email{t.yee at auckland.ac.nz}\\
+  URL: \url{http://www.stat.auckland.ac.nz/~yee/}
+}
+
+
+\begin{document}
+
+
+<<echo=FALSE, results=hide>>=
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+@
+
+
+% ----------------------------------------------------------------------
+\section{Introduction}
+\label{sec:jsscat.intoduction}
+
+
+This is a \pkg{VGAM} vignette for categorical data analysis (CDA)
+based on \cite{Yee:2010}.
+Any subsequent features (especially non-backward compatible ones)
+will appear here.
+
+The subject of CDA is concerned with
+analyses where the response is categorical regardless of whether
+the explanatory variables are continuous or categorical. It is a
+very frequent form of data. Over the years several CDA regression
+models for polytomous responses have become popular, e.g., those
+in Table \ref{tab:cat.quantities}. Not surprisingly, the models
+are interrelated: their foundation is the multinomial distribution
+and consequently they share similar and overlapping properties which
+modellers should know and exploit. Unfortunately, software has been
+slow to reflect their commonality and this makes analyses unnecessarily
+difficult for the practitioner on several fronts, e.g., using different
+functions/procedures to fit different models which does not aid the
+understanding of their connections.
+
+
+This historical misfortune can be seen by considering \R{} functions
+for CDA. From the Comprehensive \proglang{R} Archive Network
+(CRAN, \url{http://CRAN.R-project.org/}) there is \texttt{polr()}
+\citep[in \pkg{MASS};][]{Venables+Ripley:2002} for a proportional odds
+model and \texttt{multinom()}
+\citep[in \pkg{nnet};][]{Venables+Ripley:2002} for the multinomial
+logit model. However, both of these can be considered `one-off'
+modeling functions rather than providing a unified offering for CDA.
+The function \texttt{lrm()} \citep[in \pkg{rms};][]{Harrell:2009}
+has greater functionality: it can fit the proportional odds model
+(and the forward continuation ratio model upon preprocessing). Neither
+\texttt{polr()} or \texttt{lrm()} appear able to fit the nonproportional
+odds model. There are non-CRAN packages too, such as the modeling
+function \texttt{nordr()} \citep[in \pkg{gnlm};][]{gnlm:2007}, which can fit
+the proportional odds, continuation ratio and adjacent categories models;
+however it calls \texttt{nlm()} and the user must supply starting values.
+In general these \R{} \citep{R} modeling functions are not modular
+and often require preprocessing and sometimes are not self-starting.
+The implementations can be perceived as a smattering and piecemeal
+in nature. Consequently if the practitioner wishes to fit the models
+of Table \ref{tab:cat.quantities} then there is a need to master several
+modeling functions from several packages each having different syntaxes
+etc. This is a hindrance to efficient CDA.
+
+
+ 
+\begin{table}[tt]
+\centering
+\begin{tabular}{|c|c|l|}
+\hline
+Quantity & Notation &
+%Range of $j$ &
+\VGAM{} family function \\
+\hline
+%
+$\pr(Y=j+1) / \pr(Y=j)$ &$\zeta_{j}$ &
+%$1,\ldots,M$ &
+\texttt{acat()} \\
+%
+$\pr(Y=j) / \pr(Y=j+1)$ &$\zeta_{j}^{R}$ &
+%$2,\ldots,M+1$ &
+\texttt{acat(reverse = TRUE)} \\
+%
+$\pr(Y>j|Y \geq j)$ &$\delta_{j}^*$ &
+%$1,\ldots,M$ & 
+\texttt{cratio()} \\
+%
+$\pr(Y<j|Y \leq j)$ &$\delta_{j}^{*R}$ &
+%$2,\ldots,M+1$ &
+\texttt{cratio(reverse = TRUE)} \\
+%
+$\pr(Y\leq j)$ &$\gamma_{j}$ &
+%$1,\ldots,M$ &
+\texttt{cumulative()} \\
+%
+$\pr(Y\geq j)$ &$\gamma_{j}^R$&
+%$2,\ldots,M+1$ &
+\texttt{cumulative(reverse = TRUE)} \\
+%
+$\log\{\pr(Y=j)/\pr(Y=M+1)\}$ & &
+%$1,\ldots,M$ &
+\texttt{multinomial()} \\
+%
+$\pr(Y=j|Y \geq j)$ &$\delta_{j}$ &
+%$1,\ldots,M$ &
+\texttt{sratio()} \\
+%
+$\pr(Y=j|Y \leq j)$ &$\delta_{j}^R$ &
+%$2,\ldots,M+1$ &
+\texttt{sratio(reverse = TRUE)} \\
+%
+\hline
+\end{tabular}
+\caption{
+Quantities defined in \VGAM{} for a
+categorical response $Y$ taking values $1,\ldots,M+1$.
+Covariates \bix{} have been omitted for clarity.
+The LHS quantities are $\eta_{j}$
+or $\eta_{j-1}$ for $j=1,\ldots,M$ (not reversed)
+and $j=2,\ldots,M+1$ (if reversed), respectively.
+All models are estimated by minimizing the deviance.
+All except for \texttt{multinomial()} are suited to ordinal $Y$.
+\label{tab:cat.quantities}
+}
+\end{table}
+ 
+
+
+
+\proglang{SAS} \citep{SAS} does not fare much better than \R. Indeed,
+it could be considered as having an \textit{excess} of options which
+bewilders the non-expert user; there is little coherent overriding
+structure. Its \code{proc logistic} handles the multinomial logit
+and proportional odds models, as well as exact logistic regression
+\citep[see][which is for Version 8 of \proglang{SAS}]{stok:davi:koch:2000}.
+The fact that the proportional odds model may be fitted by \code{proc
+logistic}, \code{proc genmod} and \code{proc probit} arguably leads
+to possible confusion rather than the making of connections, e.g.,
+\code{genmod} is primarily for GLMs and the proportional odds model is not
+a GLM in the classical \cite{neld:wedd:1972} sense. Also, \code{proc
+phreg} fits the multinomial logit model, and \code{proc catmod} with
+its WLS implementation adds to further potential confusion.
+
+
+This article attempts to show how these deficiencies can be addressed
+by considering the vector generalized linear and additive model
+(VGLM/VGAM) framework, as implemented by the author's \pkg{VGAM}
+package for \R{}. The main purpose of this paper is to demonstrate
+how the framework is very well suited to many `classical' regression
+models for categorical responses, and to describe the implementation and
+usage of \pkg{VGAM} for such. To this end an outline of this article
+is as follows. Section \ref{sec:jsscat.VGLMVGAMoverview} summarizes
+the basic VGLM/VGAM framework. Section \ref{sec:jsscat.vgamff}
+centers on functions for CDA in \VGAM. Given an adequate framework,
+some natural extensions of Section \ref{sec:jsscat.VGLMVGAMoverview} are
+described in Section \ref{sec:jsscat.othermodels}. Users of \pkg{VGAM}
+can benefit from Section \ref{sec:jsscat.userTopics} which shows how
+the software reflects their common theory. Some examples are given in
+Section \ref{sec:jsscat.eg}. Section \ref{sec:jsscat.implementDetails}
+contains selected topics in statistial computing that are
+more relevant to programmers interested in the underlying code.
+Section \ref{sec:jsscat.extnUtil} discusses several utilities and
+extensions needed for advanced CDA modeling, and the article concludes
+with a discussion. This document was run using \pkg{VGAM} 0.7-10
+\citep{yee:VGAM:2010} under \R 2.10.0.
+
+
+Some general references for categorical data providing
+background to this article include
+\cite{agre:2010},
+\cite{agre:2013},
+\cite{fahr:tutz:2001},
+\cite{leon:2000},
+\cite{lloy:1999},
+\cite{long:1997},
+\cite{mccu:neld:1989},
+\cite{simo:2003},
+\citet{smit:merk:2013} and
+\cite{tutz:2012}.
+An overview of models for ordinal responses is \cite{liu:agre:2005},
+and a manual for fitting common models found in \cite{agre:2002}
+to polytomous responses with various software is \cite{thom:2009}.
+A package for visualizing categorical data in \R{} is \pkg{vcd}
+\citep{Meyer+Zeileis+Hornik:2006,Meyer+Zeileis+Hornik:2009}.
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{VGLM/VGAM overview}
+\label{sec:jsscat.VGLMVGAMoverview}
+
+
+This section summarizes the VGLM/VGAM framework with a particular emphasis
+toward categorical models since the classes encapsulates many multivariate
+response models in, e.g., survival analysis, extreme value analysis,
+quantile and expectile regression, time series, bioassay data, nonlinear
+least-squares models, and scores of standard and nonstandard univariate
+and continuous distributions. The framework is partially summarized by
+Table \ref{tab:rrvglam.jss.subset}. More general details about VGLMs
+and VGAMs can be found in \cite{yee:hast:2003} and \cite{yee:wild:1996}
+respectively. An informal and practical article connecting the general
+framework with the software is \cite{Rnews:Yee:2008}.
+
+
+
+\subsection{VGLMs}
+\label{sec:wffc.appendixa.vglms}
+
+Suppose the observed response \biy{} is a $q$-dimensional vector.
+VGLMs are defined as a model for which the conditional distribution
+of $\biY$ given explanatory $\bix$ is of the form
+\begin{eqnarray}
+f(\biy | \bix ; \bB, \phi)  =  h(\biy, \eta_1,\ldots, \eta_M, \phi)
+\label{gammod}
+\end{eqnarray}
+for some known function $h(\cdot)$, where $\bB = (\bbeta_1 \,
+\bbeta_2 \, \cdots \, \bbeta_M)$ is a $p \times M$ matrix of
+unknown regression coefficients,
+and the $j$th linear predictor is
+\begin{equation}
+\eta_j  =  \eta_j(\bix)  =  \bbeta_j^{\top} \bix  = 
+\sum_{k=1}^p \beta_{(j)k} \, x_k ,  \qquad j=1,\ldots,M.
+\label{gammod2}
+\end{equation}
+Here $\bix=(x_1,\ldots,x_p)^{\top}$ with $x_1 = 1$ if there is an intercept.
+Note that (\ref{gammod2}) means that \textit{all} the parameters may be
+potentially modelled as functions of \bix. It can be seen that VGLMs are
+like GLMs but allow for multiple linear predictors, and they encompass
+models outside the small confines of the exponential family.
+In (\ref{gammod}) the quantity $\phi$ is an optional scaling parameter
+which is included for backward compatibility with common adjustments
+to overdispersion, e.g., with respect to GLMs.
+
+
+In general there is no relationship between $q$ and $M$: it
+depends specifically on the model or distribution to be fitted.
+However, for the `classical' categorical regression models of
+Table \ref{tab:cat.quantities} we have $M=q-1$ since $q$ is the number
+of levels the multi-category response $Y$ has.
+
+
+
+
+
+The $\eta_j$ of VGLMs may be applied directly to parameters of a
+distribution rather than just to a mean for GLMs. A simple example is
+a univariate distribution with a location parameter $\xi$ and a scale
+parameter $\sigma > 0$, where we may take $\eta_1 = \xi$ and $\eta_2 =
+\log\,\sigma$. In general, $\eta_{j}=g_{j}(\theta_{j})$ for some parameter
+link function $g_{j}$ and parameter $\theta_{j}$.
+For example, the adjacent categories models in
+Table \ref{tab:cat.quantities} are ratios of two probabilities, therefore
+a log link of $\zeta_{j}^{R}$ or $\zeta_{j}$ is the default.
+In \VGAM{}, there are currently over a dozen links to choose from, of
+which any can be assigned to any parameter, ensuring maximum flexibility.
+Table \ref{tab:jsscat.links} lists some of them.
+
+
+
+\begin{table}[tt]
+\centering
+%\  ~~~ \par
+\begin{tabular}{|l|l|l|l|}
+\hline
+\qquad \qquad $\boldeta$ & 
+Model & Modeling & Reference \\
+ & & function & \\
+%-------------------------------------------------------------
+\hline
+\hline
+%-------------------------------------------------------------
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} + \bB_2^{\top} \bix_{2}\ ( = \bB^{\top} \bix)$ &
+VGLM & \texttt{vglm()}
+&
+\cite{yee:hast:2003} \\[1.6ex]
+%Yee \& Hastie (2003) \\[1.6ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} +
+ \sum\limits_{k=p_1+1}^{p_1+p_2} \bH_k \, \bif_{k}^{*}(x_k)$ &
+%\sum\limits_{k=1}^{p_2} \bH_k \, \bif_k(x_k)$ &
+VGAM & \texttt{vgam()}
+&
+\cite{yee:wild:1996} \\[2.2ex]
+%Yee \& Wild (1996) \\[2.2ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+$\bB_1^{\top} \bix_{1} + \bA \, \bnu$ &
+RR-VGLM & \texttt{rrvglm()}
+&
+\cite{yee:hast:2003} \\[1.8ex]
+%Yee \& Hastie (2003) \\[1.8ex]
+%-------------------------------------------------------------
+\hline
+ &&&\\[-1.1ex]
+See \cite{yee:hast:2003} &
+Goodman's RC & \texttt{grc()}
+&
+%\cite{yee:hast:2003} \\[1.8ex]
+\cite{good:1981} \\[1.8ex]
+%-------------------------------------------------------------
+\hline
+\end{tabular}
+\caption{
+Some of 
+the package \VGAM{} and
+its framework.
+The vector of latent variables $\bnu = \bC^{\top} \bix_2$
+where
+$\bix^{\top} = (\bix_1^{\top}, \bix_2^{\top})$.
+\label{tab:rrvglam.jss.subset}
+}
+%\medskip
+\end{table}
+
+
+
+
+
+
+VGLMs are estimated using iteratively reweighted least squares (IRLS) 
+which is particularly suitable for categorical models
+\citep{gree:1984}.
+All models in this article have a log-likelihood
+\begin{equation}
+\ell  =  \sum_{i=1}^n \, w_i \, \ell_i
+\label{eq:log-likelihood.VGAM}
+\end{equation}
+where the $w_i$ are known positive prior weights.
+Let $\bix_i$ denote the explanatory vector for the $i$th observation,
+for $i=1,\dots,n$.
+Then one can write
+\begin{eqnarray}
+\boldeta_i &=& \boldeta(\bix_i)  = 
+\left(
+\begin{array}{c}
+\eta_1(\bix_i) \\
+\vdots \\
+\eta_M(\bix_i)
+\end{array} \right)  = 
+\bB^{\top} \bix_i  =  
+\left(
+\begin{array}{c}
+\bbeta_1^{\top} \bix_i \\
+\vdots \\
+\bbeta_M^{\top} \bix_i
+\end{array} \right)
+\nonumber
+\\
+&=& 
+\left(
+\begin{array}{cccc}
+\beta_{(1)1} & \cdots & \beta_{(1)p} \\
+\vdots \\
+\beta_{(M)1} & \cdots & \beta_{(M)p} \\
+\end{array} \right)
+\bix_i  = 
+\left(
+\bbeta_{(1)} \; \cdots \; \bbeta_{(p)}
+\right)
+\bix_i .
+\label{eq:lin.pred}
+\end{eqnarray}
+In IRLS,
+an adjusted dependent vector $\biz_i = \boldeta_i + \bW_i^{-1} \bid_i$
+is regressed upon a large (VLM) model matrix, with
+$\bid_i = w_i \, \partial \ell_i / \partial \boldeta_i$.
+The working weights $\bW_i$ here are 
+$w_i \Var(\partial \ell_i / \partial \boldeta_i)$
+(which, under regularity conditions, is equal to
+$-w_i \, E[ \partial^2 \ell_i / (\partial \boldeta_i \,
+\partial \boldeta_i^{\top})]$),
+giving rise to the Fisher scoring algorithm.
+
+
+Let $\bX=(\bix_1,\ldots,\bix_n)^{\top}$ be the usual $n \times p$
+(LM) model matrix
+obtained from the \texttt{formula} argument of \texttt{vglm()}.
+Given $\biz_i$, $\bW_i$ and $\bX{}$ at the current IRLS iteration,
+a weighted multivariate regression is performed.
+To do this, a \textit{vector linear model} (VLM) model matrix 
+$\bX_{\sVLM}$ is formed from $\bX{}$ and $\bH_k$
+(see Section \ref{sec:wffc.appendixa.vgams}).
+This is has $nM$ rows, and if there are no constraints then $Mp$ columns.
+Then $\left(\biz_1^{\top},\ldots,\biz_n^{\top}\right)^{\top}$ is regressed
+upon $\bX_{\sVLM}$
+with variance-covariance matrix $\diag(\bW_1^{-1},\ldots,\bW_n^{-1})$.
+This system of linear equations is converted to one large
+WLS fit by premultiplication of the output of
+a Cholesky decomposition of the $\bW_i$.
+
+
+Fisher scoring usually has good numerical stability
+because the $\bW_i$ are positive-definite over a larger
+region of parameter space than Newton-Raphson. 
+For the categorical models in this article the expected
+information matrices are simpler than the observed
+information matrices, and are easily derived,
+therefore all the families in Table \ref{tab:cat.quantities}
+implement Fisher scoring.
+
+
+
+\subsection{VGAMs and constraint matrices}
+\label{sec:wffc.appendixa.vgams}
+
+
+VGAMs provide additive-model extensions to VGLMs, that is,
+(\ref{gammod2}) is generalized to
+\begin{equation}
+\eta_j(\bix)  =  \beta_{(j)1} +
+\sum_{k=2}^p \; f_{(j)k}(x_k), \qquad j = 1,\ldots, M,
+\label{addmod}
+\end{equation}
+a sum of smooth functions of the individual covariates, just as
+with ordinary GAMs \citep{hast:tibs:1990}. The $\bif_k =
+(f_{(1)k}(x_k),\ldots,f_{(M)k}(x_k))^{\top}$ are centered for uniqueness,
+and are estimated simultaneously using \textit{vector smoothers}.
+VGAMs are thus a visual data-driven method that is well suited to
+exploring data, and they retain the simplicity of interpretation that
+GAMs possess.
+
+
+
+An important concept, especially for CDA, is the idea of
+`constraints-on-the functions'.
+In practice we often wish to constrain the effect of a covariate to
+be the same for some of the $\eta_j$ and to have no effect for others.
+We shall see below that this constraints idea is important
+for several categorical models because of a popular parallelism assumption.
+As a specific example, for VGAMs we may wish to take
+\begin{eqnarray*}
+\eta_1 & = & \beta_{(1)1} + f_{(1)2}(x_2) + f_{(1)3}(x_3), \\
+\eta_2 & = & \beta_{(2)1} + f_{(1)2}(x_2),
+\end{eqnarray*}
+so that $f_{(1)2} \equiv f_{(2)2}$ and $f_{(2)3} \equiv 0$.
+For VGAMs, we can represent these models using
+\begin{eqnarray}
+\boldeta(\bix) & = & \bbeta_{(1)} + \sum_{k=2}^p \, \bif_k(x_k)
+\ =\ \bH_1 \, \bbeta_{(1)}^* + \sum_{k=2}^p \, \bH_k \, \bif_k^*(x_k)
+\label{eqn:constraints.VGAM}
+\end{eqnarray}
+where $\bH_1,\bH_2,\ldots,\bH_p$ are known full-column rank
+\textit{constraint matrices}, $\bif_k^*$ is a vector containing a
+possibly reduced set of component functions and $\bbeta_{(1)}^*$ is a
+vector of unknown intercepts. With no constraints at all, $\bH_1 =
+\bH_2 = \cdots = \bH_p = \bI_M$ and $\bbeta_{(1)}^* = \bbeta_{(1)}$.
+Like the $\bif_k$, the $\bif_k^*$ are centered for uniqueness.
+For VGLMs, the $\bif_k$ are linear so that
+\begin{eqnarray}
+{\bB}^{\top} &=&
+\left(
+\bH_1 \bbeta_{(1)}^*
+ \;
+\Bigg|
+ \;
+\bH_2 \bbeta_{(2)}^*
+ \;
+\Bigg|
+ \;
+\cdots
+ \;
+\Bigg|
+ \;
+\bH_p \bbeta_{(p)}^*
+\right) 
+\label{eqn:lin.coefs4}
+\end{eqnarray}
+for some vectors
+$\bbeta_{(1)}^*,\ldots,\bbeta_{(p)}^*$.
+
+
+The
+$\bX_{\sVLM}$ matrix is constructed from \bX{} and the $\bH_k$ using
+Kronecker product operations.
+For example, with trivial constraints,
+$\bX_{\sVLM} = \bX \otimes \bI_M$.
+More generally,
+\begin{eqnarray}
+\bX_{\sVLM} &=& 
+\left(
+\left( \bX \, \bie_{1} \right) \otimes \bH_1
+ \;
+\Bigg|
+ \;
+\left( \bX \, \bie_{2} \right) \otimes \bH_2
+ \;
+\Bigg|
+ \;
+\cdots
+ \;
+\Bigg|
+ \;
+\left( \bX \, \bie_{p} \right) \otimes \bH_p
+\right)
+\label{eqn:X_vlm_Hk}
+\end{eqnarray}
+($\bie_{k}$ is a vector of zeros except for a one in the $k$th position)
+so that 
+$\bX_{\sVLM}$ is $(nM) \times p^*$ where
+$p^* = \sum_{k=1}^{p} \mbox{\textrm{ncol}}(\bH_k)$ is the total number
+of columns of all the constraint matrices.
+Note that $\bX_{\sVLM}$ and \bX{} can be obtained by
+\texttt{model.matrix(vglmObject, type = "vlm")}
+and
+\texttt{model.matrix(vglmObject, type = "lm")}
+respectively.
+Equation \ref{eqn:lin.coefs4} focusses on the rows of \bB{} whereas
+\ref{eq:lin.pred} is on the columns.
+
+
+VGAMs are estimated by applying a modified vector backfitting algorithm
+\citep[cf.][]{buja:hast:tibs:1989} to the $\biz_i$.
+
+
+
+\subsection{Vector splines and penalized likelihood}
+\label{sec:ex.vspline}
+
+If (\ref{eqn:constraints.VGAM}) is estimated using a vector spline (a
+natural extension of the cubic smoothing spline to vector responses)
+then it can be shown that the resulting solution maximizes a penalized
+likelihood; some details are sketched in \cite{yee:step:2007}. In fact,
+knot selection for vector spline follows the same idea as O-splines
+\citep[see][]{wand:orme:2008} in order to lower the computational cost.
+
+
+The usage of \texttt{vgam()} with smoothing is very similar
+to \texttt{gam()} \citep{gam:pack:2009}, e.g.,
+to fit a nonparametric proportional odds model
+\citep[cf. p.179 of][]{mccu:neld:1989}
+to the pneumoconiosis data one could try
+<<label = pneumocat, eval=T>>=
+pneumo <- transform(pneumo, let = log(exposure.time))
+fit <- vgam(cbind(normal, mild, severe) ~ s(let, df = 2),
+            cumulative(reverse = TRUE, parallel = TRUE), data = pneumo)
+@
+Here, setting \texttt{df = 1} means a linear fit so that
+\texttt{df = 2} affords a little nonlinearity.
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section[VGAM family functions]{\pkg{VGAM} family functions}
+\label{sec:jsscat.vgamff}
+
+
+
+This section summarizes and comments on the \VGAM{} family functions
+of Table \ref{tab:cat.quantities} for a categorical response variable
+taking values $Y=1,2,\ldots,M+1$. In its most basic invokation, the usage
+entails a trivial change compared to \texttt{glm()}: use \texttt{vglm()}
+instead and assign the \texttt{family} argument a \VGAM{} family function.
+The use of a \VGAM{} family function to fit a specific model is far
+simpler than having a different modeling function for each model.
+Options specific to that model appear as arguments of that \VGAM{} family
+function.
+
+
+
+
+
+While writing \texttt{cratio()} it was found that various authors defined
+the quantity ``continuation ratio'' differently, therefore it became
+necessary to define a ``stopping ratio''. Table \ref{tab:cat.quantities}
+defines these quantities for \VGAM{}.
+
+
+
+
+The multinomial logit model is usually described by choosing the first or
+last level of the factor to be baseline. \VGAM{} chooses the last level
+(Table \ref{tab:cat.quantities}) by default, however that can be changed
+to any other level by use of the \texttt{refLevel} argument.
+
+
+
+
+If the proportional odds assumption is inadequate then one strategy is
+to try use a different link function (see Section \ref{sec:jsscat.links}
+for a selection). Another alternative is to add extra terms such as
+interaction terms into the linear predictor
+\citep[available in the \proglang{S} language;][]{cham:hast:1993}.
+Another is to fit the so-called \textit{partial}
+proportional odds model \citep{pete:harr:1990}
+which \VGAM{} can fit via constraint matrices.
+
+
+
+In the terminology of \cite{agre:2002},
+\texttt{cumulative()} fits the class of \textit{cumulative link models},
+e.g.,
+\texttt{cumulative(link = probit)} is a cumulative probit model.
+For \texttt{cumulative()}
+it was difficult to decide whether
+\texttt{parallel = TRUE}
+or
+\texttt{parallel = FALSE}
+should be the default.
+In fact, the latter is (for now?).
+Users need to set
+\texttt{cumulative(parallel = TRUE)} explicitly to
+fit a proportional odds model---hopefully this will alert
+them to the fact that they are making
+the proportional odds assumption and
+check its validity (\cite{pete:1990}; e.g., through a deviance or
+likelihood ratio test). However the default means numerical problems
+can occur with far greater likelihood.
+Thus there is tension between the two options.
+As a compromise there is now a \VGAM{} family function
+called \texttt{propodds(reverse = TRUE)} which is equivalent to
+\texttt{cumulative(parallel = TRUE, reverse = reverse, link = "logit")}.
+
+
+
+By the way, note that arguments such as 
+\texttt{parallel}
+can handle a slightly more complex syntax.
+A call such as
+\code{parallel = TRUE ~ x2 + x5 - 1} means the parallelism assumption
+is only applied to $X_2$ and $X_5$.
+This might be equivalent to something like
+\code{parallel = FALSE ~ x3 + x4}, i.e., to the remaining
+explanatory variables.
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Other models}
+\label{sec:jsscat.othermodels}
+
+
+Given the VGLM/VGAM framework of Section \ref{sec:jsscat.VGLMVGAMoverview}
+it is found that natural extensions are readily proposed in several
+directions. This section describes some such extensions.
+
+
+
+
+\subsection{Reduced-rank VGLMs}
+\label{sec:jsscat.RRVGLMs}
+
+
+Consider a multinomial logit model where $p$ and $M$ are both large.
+A (not-too-convincing) example might be the data frame \texttt{vowel.test}
+in the package \pkg{ElemStatLearn} \citep[see][]{hast:tibs:buja:1994}.
+The vowel recognition data set involves $q=11$ symbols produced from
+8 speakers with 6 replications of each. The training data comprises
+$10$ input features (not including the intercept) based on digitized
+utterances. A multinomial logit model fitted to these data would
+have $\widehat{\bB}$ comprising of $p \times (q-1) = 110$ regression
+coefficients for $n=8\times 6\times 11 = 528$ observations. The ratio
+of $n$ to the number of parameters is small, and it would be good to
+introduce some parsimony into the model.
+
+
+
+A simple and elegant solution is to represent $\widehat{\bB}$ by
+its reduced-rank approximation. To do this, partition $\bix$ into
+$(\bix_1^{\top}, \bix_2^{\top})^{\top}$ and $\bB = (\bB_1^{\top} \;
+\bB_2^{\top})^{\top}$ so that the reduced-rank regression is applied
+to $\bix_2$. In general, \bB{} is a dense matrix of full rank, i.e., rank
+$=\min(M,p)$, and since there are $M \times p$ regression coefficients
+to estimate this is `too' large for some models and/or data sets.
+If we approximate $\bB_2$ by a reduced-rank regression \begin{equation}
+\label{eq:rrr.BAC} \bB_2  =  \bC{} \, \bA^{\top} \end{equation} and if
+the rank $R$ is kept low then this can cut down the number of regression
+coefficients dramatically. If $R=2$ then the results may be biplotted
+(\texttt{biplot()} in \VGAM{}). Here, \bC{} and \bA{} are $p_2 \times R$
+and $M \times R$ respectively, and usually they are `thin'.
+
+
+More generally, the class of \textit{reduced-rank VGLMs} (RR-VGLMs)
+is simply a VGLM where $\bB_2$ is expressed as a product of two thin
+estimated matrices (Table \ref{tab:rrvglam.jss.subset}). Indeed,
+\cite{yee:hast:2003} show that RR-VGLMs are VGLMs with constraint
+matrices that are unknown and estimated. Computationally, this is
+done using an alternating method: in (\ref{eq:rrr.BAC}) estimate \bA{}
+given the current estimate of \bC{}, and then estimate \bC{} given the
+current estimate of \bA{}. This alternating algorithm is repeated until
+convergence within each IRLS iteration.
+
+
+Incidentally, special cases of RR-VGLMs have appeared in the
+literature. For example, a RR-multinomial logit model, is known as the
+\textit{stereotype} model \citep{ande:1984}. Another is \cite{good:1981}'s
+RC model (see Section \ref{sec:jsscat.rrr.goodman}) which is reduced-rank
+multivariate Poisson model. Note that the parallelism assumption of the
+proportional odds model \citep{mccu:neld:1989} can be thought of as a
+type of reduced-rank regression where the constraint matrices are thin
+($\bone_M$, actually) and known.
+
+
+
+The modeling function \texttt{rrvglm()} should work with any \VGAM{}
+family function compatible with \texttt{vglm()}. Of course, its
+applicability should be restricted to models where a reduced-rank
+regression of $\bB_2$ makes sense.
+
+
+
+
+
+
+
+
+
+\subsection[Goodman's R x C association model]{Goodman's $R \times C$ association model}
+\label{sec:jsscat.rrr.goodman}
+
+
+
+
+
+Let $\bY = [(y_{ij})]$ be a $n \times M$ matrix of counts.
+Section 4.2 of \cite{yee:hast:2003} shows that Goodman's RC$(R)$ association
+model \citep{good:1981} fits within the VGLM framework by setting up
+the appropriate indicator variables, structural zeros and constraint
+matrices. Goodman's model fits a reduced-rank type model to \bY{}
+by firstly assuming that $Y_{ij}$ has a Poisson distribution, and that
+\begin{eqnarray}
+\log \, \mu_{ij} &=& \mu + \alpha_{i} + \gamma_{j} + 
+\sum_{k=1}^R a_{ik} \, c_{jk} , 
+\ \ \ i=1,\ldots,n;\ \ j=1,\ldots,M,
+\label{eqn:goodmanrc}
+\end{eqnarray}
+where $\mu_{ij} = E(Y_{ij})$ is the mean of the $i$-$j$ cell, and the
+rank $R$ satisfies $R < \min(n,M)$.
+
+
+The modeling function \texttt{grc()} should work on any two-way
+table \bY{} of counts generated by (\ref{eqn:goodmanrc}) provided
+the number of 0's is not too large. Its usage is quite simple, e.g.,
+\texttt{grc(Ymatrix, Rank = 2)} fits a rank-2 model to a matrix of counts.
+By default a \texttt{Rank = 1} model is fitted.
+
+
+
+
+\subsection{Bradley-Terry models}
+\label{sec:jsscat.brat}
+
+Consider
+an experiment consists of $n_{ij}$ judges who compare
+pairs of items $T_i$, $i=1,\ldots,M+1$.
+They express their preferences between $T_i$ and $T_j$. 
+Let $N=\sum \sum_{i<j} n_{ij}$ be the total number of pairwise
+comparisons, and assume independence for ratings of the same pair
+by different judges and for ratings of different pairs by the same judge.
+Let $\pi_i$ be the \textit{worth} of item $T_i$,
+\[
+\pr(T_i > T_j)  =  p_{i/ij}  =  \frac{\pi_i}{\pi_i + \pi_j},
+\  \qquad i \neq {j},
+\]
+where ``$T_i>T_j$'' means $i$ is preferred over $j$.
+Suppose that $\pi_i > 0$.
+Let $Y_{ij}$ be the number of times that $T_i$ is preferred
+over $T_j$ in the $n_{ij}$ comparisons of the pairs.
+Then $Y_{ij} \sim {\rm Bin}(n_{ij},p_{i/ij})$.
+This is a Bradley-Terry model (without ties),
+and the \VGAM{} family function is \texttt{brat()}.
+
+
+Maximum likelihood estimation of the parameters $\pi_1,\ldots,\pi_{M+1}$
+involves maximizing
+\[
+\prod_{i<j}^{M+1}
+\left(
+\begin{array}{c}
+n_{ij} \\
+y_{ij}
+\end{array} \right)
+\left(
+\frac{\pi_i}{\pi_i + \pi_j}
+\right)^{y_{ij}}
+\left(
+\frac{\pi_j}{\pi_i + \pi_j}
+\right)^{n_{ij}-y_{ij}} .
+\]
+By default, $\pi_{M+1} \equiv 1$ is used for identifiability,
+however, this can be changed very easily.
+Note that one can define 
+linear predictors $\eta_{ij}$ of the form
+\begin{equation}
+\label{eq:bradter.logit}
+\logit 
+\left(
+\frac{\pi_i}{\pi_i + \pi_j}
+\right)  =  \log 
+\left(
+\frac{\pi_i}{\pi_j}
+\right)  =  \lambda_i - \lambda_j .
+\end{equation}
+The VGAM{} framework can handle the Bradley-Terry model only for
+intercept-only models; it has
+\begin{equation}
+\label{eq:bradter}
+\lambda_j  =  \eta_j  =  \log\, \pi_j = \beta_{(1)j},
+\ \ \ \ j=1,\ldots,M.
+\end{equation}
+
+
+As well as having many applications in the field of preferences,
+the Bradley-Terry model has many uses in modeling `contests' between
+teams $i$ and $j$, where only one of the teams can win in each
+contest (ties are not allowed under the classical model).
+The {packaging} function \texttt{Brat()} can be used to
+convert a square matrix into one that has more columns, to
+serve as input to \texttt{vglm()}.
+For example,
+for journal citation data where a citation of article B
+by article A is a win for article B and a loss for article A.
+On a specific data set,
+<<>>=
+journal <- c("Biometrika", "Comm.Statist", "JASA", "JRSS-B")
+squaremat <- matrix(c(NA, 33, 320, 284,   730, NA, 813, 276,
+                      498, 68, NA, 325,   221, 17, 142, NA), 4, 4)
+dimnames(squaremat) <- list(winner = journal, loser = journal)
+@
+then \texttt{Brat(squaremat)} returns a $1 \times 12$ matrix.
+
+
+
+
+
+
+
+\subsubsection{Bradley-Terry model with ties}
+\label{sec:cat.bratt}
+
+
+The \VGAM{} family function \texttt{bratt()} implements
+a Bradley-Terry model with ties (no preference), e.g.,
+where both $T_i$ and $T_j$ are equally good or bad.
+Here we assume
+\begin{eqnarray*}
+ \pr(T_i > T_j) &=& \frac{\pi_i}{\pi_i + \pi_j + \pi_0},
+\ \qquad
+ \pr(T_i = T_j)  =  \frac{\pi_0}{\pi_i + \pi_j + \pi_0},
+\end{eqnarray*}
+with $\pi_0 > 0$ as an extra parameter.
+It has 
+\[
+\boldeta=(\log \pi_1,\ldots, \log \pi_{M-1}, \log \pi_{0})^{\top}
+\]
+by default, where there are $M$ competitors and $\pi_M \equiv 1$.
+Like \texttt{brat()}, one can choose a different reference group
+and reference value.
+
+
+Other \R{} packages for the Bradley-Terry model
+include \pkg{BradleyTerry2}
+by H. Turner and D. Firth
+\citep[with and without ties;][]{firth:2005,firth:2008}
+and \pkg{prefmod} \citep{Hatzinger:2009}.
+
+
+
+
+\begin{table}[tt]
+\centering
+\begin{tabular}[small]{|l|c|}
+\hline
+\pkg{VGAM} family function & Independent parameters \\
+\hline
+\texttt{ABO()} & $p, q$ \\
+\texttt{MNSs()} & $m_S, m_s, n_S$ \\
+\texttt{AB.Ab.aB.ab()} & $p$ \\
+\texttt{AB.Ab.aB.ab2()} & $p$ \\
+\texttt{AA.Aa.aa()} & $p_A$ \\
+\texttt{G1G2G3()} & $p_1, p_2, f$ \\
+\hline
+\end{tabular}
+\caption{Some genetic models currently implemented
+and their unique parameters.
+\label{tab:gen.all}
+}
+\end{table}
+
+
+
+
+
+\subsection{Genetic models}
+\label{sec:jsscat.genetic}
+
+
+There are quite a number of population genetic models based on the
+multinomial distribution,
+e.g., \cite{weir:1996}, \cite{lang:2002}.
+Table \ref{tab:gen.all} lists some \pkg{VGAM} family functions for such.
+
+
+
+
+For example the ABO blood group system
+has two independent parameters $p$ and $q$, say.
+Here,
+the blood groups A, B and O form six possible combinations (genotypes)
+consisting of AA, AO, BB, BO, AB, OO
+(see Table \ref{tab:ABO}). A and B are dominant over
+bloodtype O. Let $p$, $q$ and $r$ be the probabilities
+for A, B and O respectively (so that
+$p+q+r=1$) for a given population. 
+The log-likelihood function is 
+\[
+\ell(p,q) \;=\; n_A\, \log(p^2 + 2pr) + n_B\, \log(q^2 + 2qr) + n_{AB}\,
+\log(2pq) + 2 n_O\, \log(1-p-q),
+\]
+where $r = 1 - p -q$, $p \in (\,0,1\,)$,
+$q \in (\,0,1\,)$, $p+q<1$.
+We let $\boldeta = (g(p), g(r))^{\top}$ where $g$ is the link function.
+Any $g$ from Table \ref{tab:jsscat.links} appropriate for
+a parameter $\theta \in (0,1)$ will do.
+
+
+A toy example where $p=p_A$ and $q=p_B$ is
+<<>>=
+abodat <- data.frame(A = 725, B = 258, AB = 72, O = 1073)
+fit <- vglm(cbind(A, B, AB, O) ~ 1, ABO, data = abodat)
+coef(fit, matrix = TRUE)
+Coef(fit)  # Estimated pA and pB
+@
+The function \texttt{Coef()}, which applies only to intercept-only models,
+applies to $g_{j}(\theta_{j})=\eta_{j}$
+the inverse link function $g_{j}^{-1}$ to $\widehat{\eta}_{j}$
+to give $\widehat{\theta}_{j}$.
+
+
+
+
+
+
+
+\begin{table}[tt]
+% Same as Table 14.1 of E-J, and Table 2.6 of Weir 1996
+\begin{center}
+\begin{tabular}{|l|cc|cc|c|c|}
+\hline
+Genotype   & AA  & AO  & BB  &  BO  & AB  &  OO  \\
+Probability&$p^2$&$2pr$&$q^2$&$ 2qr$&$2pq$& $r^2$\\
+Blood group&  A  &  A  &  B  &  B   &  AB &  O \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Probability table for the ABO blood group system.
+Note that $p$ and $q$ are the parameters and $r=1-p-q$.
+\label{tab:ABO}
+}
+\end{table}
+
+
+
+
+
+\subsection{Three main distributions}
+\label{sec:jsscat.3maindist}
+
+\cite{agre:2002} discusses three main distributions for categorical
+variables: binomial, multinomial, and Poisson
+\citep{thom:2009}.
+All these are well-represented in the \VGAM{} package,
+accompanied by variant forms.
+For example,
+there is a
+\VGAM{} family function named \texttt{mbinomial()}
+which implements a 
+matched-binomial (suitable for matched case-control studies),
+Poisson ordination (useful in ecology for multi-species-environmental data),
+negative binomial families,
+positive and zero-altered and zero-inflated variants,
+and the bivariate odds ratio model
+\citep[\texttt{binom2.or()}; see Section 6.5.6 of][]{mccu:neld:1989}.
+The latter has an \texttt{exchangeable} argument to allow for an
+exchangeable error structure:
+\begin{eqnarray}
+\bH_1  = 
+\left( \begin{array}{cc}
+1 & 0 \\
+1 & 0 \\
+0 & 1 \\
+\end{array} \right), \qquad
+\bH_k  = 
+\left( \begin{array}{cc}
+1 \\
+1 \\
+0 \\
+\end{array} \right), \quad k=2,\ldots,p,
+\label{eqn:blom.exchangeable}
+\end{eqnarray}
+since, for data $(Y_1,Y_2,\bix)$,
+$\logit \, P\!\left( Y_{j} = 1 \Big{|} \bix \right) = 
+\eta_{j}$ for ${j}=1,2$, and
+$\log \, \psi = \eta_{3}$
+where $\psi$ is the odds ratio,
+and so $\eta_{1}=\eta_{2}$.
+Here, \texttt{binom2.or(zero = 3)} by default meaning $\psi$ is
+modelled as an intercept-only
+(in general, \texttt{zero} may be assigned an integer vector
+such that the value $j$ means $\eta_{j} = \beta_{(j)1}$,
+i.e., the $j$th linear/additive predictor is an intercept-only).
+See the online help for all of these models.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Some user-oriented topics}
+\label{sec:jsscat.userTopics}
+
+
+Making the most of \VGAM{} requires an understanding of the general
+VGLM/VGAM framework described Section \ref{sec:jsscat.VGLMVGAMoverview}.
+In this section we connect elements of that framework with the software.
+Before doing so it is noted that
+a fitted \VGAM{} categorical model has access to the usual
+generic functions, e.g.,
+\texttt{coef()} for
+$\left(\widehat{\bbeta}_{(1)}^{*T},\ldots,\widehat{\bbeta}_{(p)}^{*T}\right)^{\top}$
+(see Equation \ref{eqn:lin.coefs4}),
+\texttt{constraints()} for $\bH_k$,
+\texttt{deviance()} for $2\left(\ell_{\mathrm{max}} - \ell\right)$,
+\texttt{fitted()} for $\widehat{\bmu}_i$,
+\texttt{logLik()} for $\ell$,
+\texttt{predict()} for $\widehat{\boldeta}_i$,
+\texttt{print()},
+\texttt{residuals(..., type = "response")} for $\biy_i - \widehat{\bmu}_i$ etc.,
+\texttt{summary()},
+\texttt{vcov()} for $\widehat{\Var}(\widehat{\bbeta})$,
+etc.
+The methods function for the extractor function
+\texttt{coef()} has an argument \texttt{matrix}
+which, when set \texttt{TRUE}, returns $\widehat{\bB}$
+(see Equation \ref{gammod}) as a $p \times M$ matrix,
+and this is particularly useful for confirming that a fit
+has made a parallelism assumption.
+
+
+
+
+
+
+
+\subsection{Common arguments}
+\label{sec:jsscat.commonArgs}
+
+
+The structure of the unified framework given in
+Section \ref{sec:jsscat.VGLMVGAMoverview}
+appears clearly through
+the pool of common arguments
+shared by the
+\VGAM{} family functions in Table \ref{tab:cat.quantities}.
+In particular,
+\texttt{reverse} and
+\texttt{parallel}
+are prominent with CDA.
+These are merely convenient shortcuts for the argument \texttt{constraints},
+which accepts a named list of constraint matrices $\bH_k$.
+For example, setting
+\texttt{cumulative(parallel = TRUE)} would constrain the coefficients $\beta_{(j)k}$
+in (\ref{gammod2}) to be equal for all $j=1,\ldots,M$,
+each separately for $k=2,\ldots,p$.
+That is, $\bH_k = \bone_M$.
+The argument \texttt{reverse} determines the `direction' of
+the parameter or quantity.
+
+Another argument not so much used with CDA is \texttt{zero};
+this accepts a vector specifying which $\eta_j$ is to be modelled as
+an intercept-only; assigning a \texttt{NULL} means none.
+
+
+
+
+
+
+
+
+\subsection{Link functions}
+\label{sec:jsscat.links}
+
+Almost all \VGAM{} family functions
+(one notable exception is \texttt{multinomial()})
+allow, in theory, for any link function to be assigned to each $\eta_j$.
+This provides maximum capability.
+If so then there is an extra argument to pass in any known parameter
+associated with the link function.
+For example, \texttt{link = "logoff", earg = list(offset = 1)}
+signifies a log link with a unit offset:
+$\eta_{j} = \log(\theta_{j} + 1)$ for some parameter $\theta_{j}\ (> -1)$.
+The name \texttt{earg} stands for ``extra argument''.
+Table \ref{tab:jsscat.links} lists some links relevant to categorical data.
+While the default gives a reasonable first choice,
+users are encouraged to try different links.
+For example, fitting a binary regression model
+(\texttt{binomialff()}) to the coal miners data set \texttt{coalminers} with
+respect to the response wheeze gives a
+nonsignificant regression coefficient for $\beta_{(1)3}$ with probit analysis
+but not with a logit link when
+$\eta = \beta_{(1)1} + \beta_{(1)2} \, \mathrm{age} + \beta_{(1)3} \, \mathrm{age}^2$.
+Developers and serious users are encouraged to write and use
+new link functions compatible with \VGAM.
+
+
+
+
+
+
+\begin{table*}[tt]
+\centering
+\medskip
+\begin{tabular}{|l|c|c|}
+\hline
+Link function & $g(\theta)$ & Range of $\theta$ \\
+\hline
+\texttt{cauchit()} & $\tan(\pi(\theta-\frac12))$ & $(0,1)$ \\
+\texttt{cloglog()} & $\log_e\{-\log_e(1 - \theta)\}$ & $(0,1)$ \\
+\texttt{fisherz()} & 
+$\frac12\,\log_e\{(1 + \theta)/(1 - \theta)\}$ & $(-1,1)$ \\
+\texttt{identity()} & $\theta$ & $(-\infty,\infty)$ \\
+\texttt{logc()} & $\log_e(1 - \theta)$ & $(-\infty,1)$ \\
+\texttt{loge()} & $\log_e(\theta)$ & $(0,\infty)$ \\
+\texttt{logit()} & $\log_e(\theta/(1 - \theta))$ & $(0,1)$ \\
+\texttt{logoff()} & $\log_e(\theta + A)$ & $(-A,\infty)$ \\
+\texttt{probit()} & $\Phi^{-1}(\theta)$ & $(0,1)$ \\
+\texttt{rhobit()} & $\log_e\{(1 + \theta)/(1 - \theta)\}$ & $(-1,1)$ \\
+\hline
+\end{tabular}
+\caption{
+Some \VGAM{} link functions pertinent to this article.
+\label{tab:jsscat.links}
+}
+\end{table*}
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Examples}
+\label{sec:jsscat.eg}
+
+This section illustrates CDA modeling on three
+data sets in order to give a flavour of what is available in the package.
+
+
+
+
+%20130919
+%Note: 
+%\subsection{2008 World Fly Fishing Championships}
+%\label{sec:jsscat.eg.WFFC}
+%are deleted since there are problems with accessing the \texttt{wffc.nc}
+%data etc. since they are now in \pkg{VGAMdata}.
+
+
+
+
+
+
+
+\subsection{Marital status data}
+\label{sec:jsscat.eg.mstatus}
+
+We fit a nonparametric multinomial logit model to data collected from
+a self-administered questionnaire administered in a large New Zealand
+workforce observational study conducted during 1992--3.
+The data were augmented by a second study consisting of retirees.
+For homogeneity, this analysis is restricted
+to a subset of 6053 European males with no missing values.
+The ages ranged between 16 and 88 years.
+The data can be considered a reasonable representation of the white
+male New Zealand population in the early 1990s, and
+are detailed in \cite{macm:etal:1995} and \cite{yee:wild:1996}.
+We are interested in exploring how $Y=$ marital status varies as a function
+of $x_2=$ age. The nominal response $Y$ has four levels;
+in sorted order, they are divorced or separated, married or partnered,
+single and widower.
+We will write these levels as $Y=1$, $2$, $3$, $4$, respectively,
+and will choose the married/partnered (second level) as the reference group
+because the other levels emanate directly from it.
+
+Suppose the data is in a data frame called \texttt{marital.nz}
+and looks like
+<<>>=
+head(marital.nz, 4)
+summary(marital.nz)
+@
+We fit the VGAM
+<<>>=
+fit.ms <- vgam(mstatus ~ s(age, df = 3), multinomial(refLevel = 2),
+               data = marital.nz)
+@
+
+Once again let's firstly check the input.
+<<>>=
+head(depvar(fit.ms), 4)
+colSums(depvar(fit.ms))
+@
+This seems okay.
+
+
+
+
+Now the estimated component functions $\widehat{f}_{(s)2}(x_2)$
+may be plotted with
+<<fig=F>>=
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+par(mfrow = c(2, 2))
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlayed
+#par(mfrow=c(1,1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+@
+to produce Figure \ref{fig:jsscat.eg.mstatus}.
+The \texttt{scale} argument is used here to ensure that the $y$-axes have
+a common scale---this makes comparisons between the component functions
+less susceptible to misinterpretation.
+The first three plots are the (centered) $\widehat{f}_{(s)2}(x_2)$ for
+$\eta_1$,
+$\eta_2$,
+$\eta_3$,
+where
+\begin{eqnarray}
+\label{eq:jsscat.eg.nzms.cf}
+\eta_{s}  = 
+\log(\pr(Y={t}) / \pr(Y={2}))  = 
+\beta_{(s)1} + f_{(s)2}(x_2),
+\end{eqnarray}
+$(s,t) = (1,1), (2,3), (3,4)$,
+and $x_2$ is \texttt{age}.
+The last plot are the smooths overlaid to aid comparison.
+
+
+It may be seen that the $\pm 2$ standard error bands
+about the \texttt{Widowed} group is particularly wide at
+young ages because of a paucity of data, and
+likewise at old ages amongst the \texttt{Single}s.
+The $\widehat{f}_{(s)2}(x_2)$ appear as one would expect.
+The log relative risk of
+being single relative to being married/partnered drops sharply from
+ages 16 to 40.
+The fitted function for the \texttt{Widowed} group increases
+with \texttt{age} and looks reasonably linear.
+The $\widehat{f}_{(1)2}(x_2)$
+suggests a possible maximum around 50 years old---this
+could indicate the greatest marital conflict occurs during
+the mid-life crisis years!
+
+
+
+\setkeys{Gin}{width=0.9\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.6,echo=FALSE>>=
+# Plot output
+mycol <- c("red", "darkgreen", "blue")
+ par(mfrow = c(2, 2))
+ par(mar = c(4.2, 4.0, 1.2, 2.2) + 0.1)
+plot(fit.ms, se = TRUE, scale = 12,
+         lcol = mycol, scol = mycol)
+
+# Plot output overlaid
+#par(mfrow = c(1, 1))
+plot(fit.ms, se = TRUE, scale = 12,
+         overlay = TRUE,
+         llwd = 2,
+         lcol = mycol, scol = mycol)
+@
+\caption{
+Fitted (and centered) component functions
+$\widehat{f}_{(s)2}(x_2)$
+from the NZ marital status data
+(see Equation \ref{eq:jsscat.eg.nzms.cf}).
+The bottom RHS plot are the smooths overlaid.
+\label{fig:jsscat.eg.mstatus}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+The methods function for \texttt{plot()} can also plot the
+derivatives of the smooths.
+The call
+<<fig=F>>=
+plot(fit.ms, deriv=1, lcol=mycol, scale=0.3)
+@
+results in Figure \ref{fig:jsscat.eg.mstatus.cf.deriv}.
+Once again the $y$-axis scales are commensurate.
+
+\setkeys{Gin}{width=\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=7.2,height=2.4,echo=FALSE>>=
+# Plot output
+ par(mfrow = c(1, 3))
+ par(mar = c(4.5, 4.0, 0.2, 2.2) + 0.1)
+plot(fit.ms, deriv = 1, lcol = mycol, scale = 0.3)
+@
+\caption{
+Estimated first derivatives of the component functions,
+$\widehat{f'}_{(s)2}(x_2)$,
+from the NZ marital status data
+(see Equation \ref{eq:jsscat.eg.nzms.cf}).
+\label{fig:jsscat.eg.mstatus.cf.deriv}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+The derivative for the \texttt{Divorced/Separated} group appears
+linear so that a quadratic component function could be tried.
+Not surprisingly the \texttt{Single} group shows the greatest change;
+also, $\widehat{f'}_{(2)2}(x_2)$ is approximately linear till 50
+and then flat---this suggests one could fit a piecewise quadratic
+function to model that component function up to 50 years.
+The \texttt{Widowed} group appears largely flat.
+We thus fit the parametric model
+<<>>=
+foo <- function(x, elbow = 50)
+  poly(pmin(x, elbow), 2)
+
+clist <- list("(Intercept)" = diag(3),
+             "poly(age, 2)" = rbind(1, 0, 0),
+             "foo(age)"     = rbind(0, 1, 0),
+             "age"          = rbind(0, 0, 1))
+fit2.ms <-
+    vglm(mstatus ~ poly(age, 2) + foo(age) + age,
+         family = multinomial(refLevel = 2),
+         constraints = clist,
+         data = marital.nz)
+@
+Then
+<<>>=
+coef(fit2.ms, matrix = TRUE)
+@
+confirms that one term was used for each component function.
+The plots from
+<<fig=F>>=
+par(mfrow = c(2, 2))
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol=mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+@
+are given in Figure \ref{fig:jsscat.eg.mstatus.vglm}
+and appear like
+Figure \ref{fig:jsscat.eg.mstatus}.
+
+
+\setkeys{Gin}{width=0.9\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.6,echo=FALSE>>=
+# Plot output
+par(mfrow=c(2,2))
+ par(mar=c(4.5,4.0,1.2,2.2)+0.1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[1], scol = mycol[1], which.term = 1)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[2], scol = mycol[2], which.term = 2)
+plotvgam(fit2.ms, se = TRUE, scale = 12,
+         lcol = mycol[3], scol = mycol[3], which.term = 3)
+@
+\caption{
+Parametric version of \texttt{fit.ms}: \texttt{fit2.ms}.
+The component functions are now quadratic, piecewise quadratic/zero,
+or linear.
+\label{fig:jsscat.eg.mstatus.vglm}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+It is possible to perform very crude inference based on heuristic theory
+of a deviance test:
+<<>>=
+deviance(fit.ms) - deviance(fit2.ms)
+@
+is small, so it seems the parametric model is quite reasonable
+against the original nonparametric model.
+Specifically,
+the difference in the number of `parameters' is approximately
+<<>>=
+(dfdiff <- df.residual(fit2.ms) - df.residual(fit.ms))
+@
+which gives an approximate $p$ value of
+<<>>=
+pchisq(deviance(fit.ms) - deviance(fit2.ms), df = dfdiff, lower.tail = FALSE)
+@
+Thus \texttt{fit2.ms} appears quite reasonable.
+
+
+
+
+
+
+
+
+The estimated probabilities of the original fit can be plotted
+against \texttt{age} using
+<<fig=F>>=
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo, ],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",  # main="Marital status amongst NZ Male Europeans",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,  # x="topright",
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+@
+which gives Figure \ref{fig:jsscat.eg.mstatus.fitted}.
+This shows that between 80--90\% of NZ white males
+aged between their early 30s to mid-70s
+were married/partnered.
+The proportion widowed
+started to rise steeply from 70 years onwards but remained below 0.5
+since males die younger than females on average.
+
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=4.8,echo=FALSE>>=
+ par(mfrow = c(1,1))
+ par(mar = c(4.5,4.0,0.2,0.2)+0.1)
+ooo <- with(marital.nz, order(age))
+with(marital.nz, matplot(age[ooo], fitted(fit.ms)[ooo,],
+     type = "l", las = 1, lwd = 2, ylim = 0:1,
+     ylab = "Fitted probabilities",
+     xlab = "Age",
+     col = c(mycol[1], "black", mycol[-1])))
+legend(x = 52.5, y = 0.62,
+       col = c(mycol[1], "black", mycol[-1]),
+       lty = 1:4,
+       legend = colnames(fit.ms at y), lwd = 2.1)
+abline(v = seq(10,90,by = 5), h = seq(0,1,by = 0.1), col = "gray", lty = "dashed")
+@
+\caption{
+Fitted probabilities for each class for the
+NZ male European
+marital status data
+(from Equation \ref{eq:jsscat.eg.nzms.cf}).
+\label{fig:jsscat.eg.mstatus.fitted}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+
+
+
+\subsection{Stereotype model}
+\label{sec:jsscat.eg.grc.stereotype}
+
+We reproduce some of the analyses of \cite{ande:1984} regarding the
+progress of 101 patients with back pain
+using the data frame \texttt{backPain} from \pkg{gnm}
+\citep{Rnews:Turner+Firth:2007,Turner+Firth:2009}.
+The three prognostic variables are
+length of previous attack ($x_1=1,2$),
+pain change ($x_2=1,2,3$) 
+and lordosis ($x_3=1,2$).
+Like him, we treat these as numerical and standardize and negate them.
+%
+The output
+<<>>=
+# Scale the variables? Yes; the Anderson (1984) paper did (see his Table 6).
+head(backPain, 4)
+summary(backPain)
+backPain <- transform(backPain, sx1 = -scale(x1), sx2 = -scale(x2), sx3 = -scale(x3))
+@
+displays the six ordered categories.
+Now a rank-1 stereotype model can be fitted with
+<<>>=
+bp.rrmlm1 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain)
+@
+Then
+<<>>=
+Coef(bp.rrmlm1)
+@
+are the fitted \bA, \bC{} and $\bB_1$ (see Equation \ref{eq:rrr.BAC}) and
+Table \ref{tab:rrvglam.jss.subset}) which agrees with his Table 6.
+Here, what is known as ``corner constraints'' is used
+($(1,1)$ element of \bA{} $\equiv 1$),
+and only the intercepts are not subject to any reduced-rank regression
+by default.
+The maximized log-likelihood from \textsl{\texttt{logLik(bp.rrmlm1)}}
+is $\Sexpr{round(logLik(bp.rrmlm1), 2)}$.
+The standard errors of each parameter can be obtained by
+\textsl{\texttt{summary(bp.rrmlm1)}}.
+The negative elements of $\widehat{\bC}$ imply the
+latent variable $\widehat{\nu}$ decreases in value with increasing
+\textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}}.
+The elements of $\widehat{\bA}$ tend to decrease so it suggests
+patients get worse as $\nu$ increases,
+i.e., get better as \textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}} increase.
+
+
+
+
+
+
+<<echo=FALSE>>=
+set.seed(123)
+@
+A rank-2 model fitted \textit{with a different normalization}
+<<>>=
+bp.rrmlm2 <- rrvglm(pain ~ sx1 + sx2 + sx3, multinomial, data = backPain, Rank = 2,
+                   Corner = FALSE, Uncor = TRUE)
+@
+produces uncorrelated $\widehat{\bnu}_i = \widehat{\bC}^{\top} \bix_{2i}$.
+In fact \textsl{\texttt{var(lv(bp.rrmlm2))}} equals $\bI_2$
+so that the latent variables are also scaled to have unit variance.
+The fit was biplotted
+(rows of $\widehat{\bC}$ plotted as arrow;
+ rows of $\widehat{\bA}$ plotted as labels) using
+<<figure=F>>=
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1, 6), ylim = c(-1.2, 4),  # Use this if not scaled
+       xlim = c(-4.5, 2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+@
+to give Figure \ref{fig:jsscat.eg.rrmlm2.backPain}.
+It is interpreted via inner products due to (\ref{eq:rrr.BAC}).
+The different normalization means that the interpretation of $\nu_1$
+and $\nu_2$ has changed, e.g., increasing
+\textsl{\texttt{sx1}},
+\textsl{\texttt{sx2}} and
+\textsl{\texttt{sx3}} results in increasing $\widehat{\nu}_1$ and
+patients improve more.
+Many of the latent variable points $\widehat{\bnu}_i$ are coincidental
+due to discrete nature of the $\bix_i$. The rows of $\widehat{\bA}$
+are centered on the blue labels (rather cluttered unfortunately) and
+do not seem to vary much as a function of $\nu_2$.
+In fact this is confirmed by \cite{ande:1984} who showed a rank-1
+model is to be preferred.
+
+
+
+This example demonstrates the ability to obtain a low dimensional view
+of higher dimensional data. The package's website has additional
+documentation including more detailed Goodman's RC and stereotype
+examples.
+
+
+
+
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+\begin{figure}[tt]
+\begin{center}
+<<fig=TRUE,width=8,height=5.3,echo=FALSE>>=
+# Plot output
+ par(mfrow=c(1,1))
+ par(mar=c(4.5,4.0,0.2,2.2)+0.1)
+
+biplot(bp.rrmlm2, Acol = "blue", Ccol = "darkgreen", scores = TRUE,
+#      xlim = c(-1,6), ylim = c(-1.2,4),  # Use this if not scaled
+       xlim = c(-4.5,2.2), ylim = c(-2.2, 2.2),  # Use this if scaled
+       chull = TRUE, clty = 2, ccol = "blue")
+@
+\caption{
+Biplot of a rank-2 reduced-rank multinomial logit (stereotype) model
+fitted to the back pain data.
+A convex hull surrounds the latent variable scores
+$\widehat{\bnu}_i$
+(whose observation numbers are obscured because of their discrete nature).
+The position of the $j$th row of $\widehat{\bA}$
+is the center of the label ``\texttt{log(mu[,j])/mu[,6])}''.
+\label{fig:jsscat.eg.rrmlm2.backPain}
+}
+\end{center}
+\end{figure}
+
+\setkeys{Gin}{width=0.8\textwidth} % 0.8 is the current default
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Some implementation details}
+\label{sec:jsscat.implementDetails}
+
+This section describes some implementation details of \VGAM{}
+which will be more of interest to the developer than to the casual user.
+
+
+
+\subsection{Common code}
+\label{sec:jsscat.implementDetails.code}
+
+It is good programming practice to write reusable code where possible.
+All the \VGAM{} family functions in Table \ref{tab:cat.quantities}
+process the response in the same way because the same segment of code
+is executed. This offers a degree of uniformity in terms of how input is
+handled, and also for software maintenance
+(\cite{altm:jack:2010} enumerates good programming techniques and references).
+As well, the default initial values are computed in the same manner
+based on sample proportions of each level of $Y$.
+
+
+
+
+
+\subsection[Matrix-band format of wz]{Matrix-band format of \texttt{wz}}
+\label{sec:jsscat.implementDetails.mbformat}
+
+The working weight matrices $\bW_i$ may become large for categorical
+regression models. In general, we have to evaluate the $\bW_i$
+for $i=1,\ldots,n$, and naively, this could be held in an \texttt{array} of
+dimension \texttt{c(M, M, n)}. However, since the $\bW_i$ are symmetric
+positive-definite it suffices to only store the upper or lower half of
+the matrix.
+
+
+
+The variable \texttt{wz} in \texttt{vglm.fit()}
+stores the working weight matrices $\bW_i$ in 
+a special format called the \textit{matrix-band} format. This
+format comprises a $n \times M^*$ matrix where
+\[
+M^*  =  \sum_{i=1}^{\footnotesize \textit{hbw}} \;
+\left(M-i+1\right)  =  
+\frac12 \, \textit{hbw}\, \left(2\,M - \textit{hbw} +1\right)
+\]
+is the number of columns. Here, \textit{hbw} refers to the
+\textit{half-bandwidth} of the matrix, which is an integer
+between 1 and $M$ inclusive. A diagonal matrix has
+unit half-bandwidth, a tridiagonal matrix has half-bandwidth 2, etc.
+
+
+Suppose $M=4$. Then \texttt{wz} will have up to $M^*=10$ columns
+enumerating the unique elements of $\bW_i$ as follows:
+\begin{eqnarray}
+\bW_i  =  
+\left( \begin{array}{rrrr}
+1 & 5 & 8 & 10 \\
+  & 2 & 6 & 9 \\
+  &   & 3 & 7 \\
+  &   &   & 4 
+\end{array} \right).
+\label{eqn:hbw.eg}
+\end{eqnarray}
+That is, the order is firstly the diagonal, then the band above that,
+followed by the second band above the diagonal etc.
+Why is such a format adopted? 
+For this example, if $\bW_i$ is diagonal then only the first 4 columns
+of \texttt{wz} are needed. If $\bW_i$ is tridiagonal then only the
+first 7 columns of \texttt{wz} are needed. 
+If $\bW_i$ \textit{is} banded then \texttt{wz} needs not have
+all $\frac12 M(M+1)$ columns; only $M^*$ columns suffice, and the
+rest of the elements of $\bW_i$ are implicitly zero.
+As well as reducing the size of \texttt{wz} itself in most cases, the
+matrix-band format often makes the computation of \texttt{wz} very
+simple and efficient. Furthermore, a Cholesky decomposition of a
+banded matrix will be banded. A final reason is that sometimes we
+want to input $\bW_i$ into \VGAM: if \texttt{wz} is $M \times M \times
+n$ then \texttt{vglm(\ldots, weights = wz)} will result in an error
+whereas it will work if \texttt{wz} is an $n \times M^*$ matrix.
+
+
+
+To facilitate the use of the matrix-band format,
+a few auxiliary functions have been written.
+In particular, there is \texttt{iam()} which gives the indices
+for an array-to-matrix.
+In the $4\times 4$ example above,
+<<>>=
+iam(NA, NA, M = 4, both = TRUE, diag = TRUE)
+@
+returns the indices for the respective array coordinates for
+successive columns of matrix-band format
+(see Equation \ref{eqn:hbw.eg}).
+If \texttt{diag = FALSE} then the first 4 elements in each vector
+are omitted. Note that the first two arguments of 
+\texttt{iam()} are not used here and have been assigned
+\texttt{NA}s for simplicity.
+For its use on the multinomial logit model, where
+$(\bW_i)_{jj} = w_i\,\mu_{ij} (1-\mu_{ij}),\ j=1,\ldots,M$, and 
+$(\bW_i)_{jk} = -w_i\,\mu_{ij} \mu_{ik},\ j\neq k$,
+this can be programmed succinctly like
+\begin{Code}
+wz <- mu[, 1:M] * (1 - mu[, 1:M])
+if (M > 1) {
+  index <- iam(NA, NA, M = M, both = TRUE, diag = FALSE)
+  wz <- cbind(wz, -mu[, index$row] * mu[, index$col])
+}
+wz <- w * wz
+\end{Code}
+(the actual code is slightly more complicated).
+In general, \VGAM{} family functions can be remarkably compact,
+e.g.,
+\texttt{acat()},
+\texttt{cratio()}
+and
+\texttt{multinomial()} are all less than 120 lines of code each.
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Extensions and utilities}
+\label{sec:jsscat.extnUtil}
+
+This section describes some useful utilities/extensions of the above.
+
+
+
+\subsection{Marginal effects}
+\label{sec:jsscat.extnUtil.margeff}
+
+
+Models such as the multinomial logit and cumulative link models
+model the posterior probability $p_{j} = \pr(Y=j|\bix)$ directly.
+In some applications, knowing the derivative of $p_{j}$
+with respect to some of the $x_k$ is useful;
+in fact, often just knowing the sign is important.
+The function \texttt{margeff()} computes the derivatives and
+returns them as a $p \times (M+1) \times n$ array.
+For the multinomial logit model it is easy to show
+\begin{eqnarray}
+\frac{\partial \, p_{j}(\bix_i)}{\partial \,
+\bix_{i}}
+&=&
+p_{j}(\bix_i)
+\left\{
+ \bbeta_{j} -
+\sum_{s=1}^{M+1}
+p_{s}(\bix_i)
+\,
+ \bbeta_{s}
+\right\},
+\label{eqn:multinomial.marginalEffects}
+\end{eqnarray}
+while for
+\texttt{cumulative(reverse = FALSE)}
+we have
+$p_{j} = \gamma_{j} - \gamma_{j-1} = h(\eta_{j}) - h(\eta_{j-1})$
+where $h=g^{-1}$ is the inverse of the link function
+(cf. Table \ref{tab:cat.quantities})
+so that
+\begin{eqnarray}
+\frac{\partial \, p_{j}(\bix_{})}{\partial \,
+\bix}
+&=&
+h'(\eta_{j}) \, \bbeta_{j} -
+h'(\eta_{j-1}) \, \bbeta_{j-1} .
+\label{eqn:cumulative.marginalEffects}
+\end{eqnarray}
+
+
+
+
+The function \texttt{margeff()} returns an array with these
+derivatives and should handle any value of
+\texttt{reverse} and \texttt{parallel}.
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\subsection[The xij argument]{The \texttt{xij} argument}
+\label{sec:jsscat.extnUtil.xij}
+
+There are many models, including those for categorical data,
+where the value of an explanatory variable $x_k$ differs depending
+on which linear/additive predictor $\eta_{j}$.
+Here is a well-known example from {consumer choice} modeling.
+Suppose an econometrician is interested in peoples'
+choice of transport for travelling to work
+and that there are four choices:
+$Y=1$ for ``bus'',
+$Y=2$ ``train'', 
+$Y=3$ ``car'' and
+$Y=4$ means ``walking''.
+Assume that people only choose one means to go to work.
+Suppose there are three covariates:
+$X_2=$ cost,
+$X_3=$ journey time, and
+$X_4=$ distance.
+Of the covariates only $X_4$ (and the intercept $X_1$)
+is the same for all transport choices;
+the cost and journey time differ according to the means chosen.
+Suppose a random sample of $n$ people is collected
+from some population, and that each person has
+access to all these transport modes.
+For such data, a natural regression model would be a 
+multinomial logit model with $M=3$:
+for $j=1,\ldots,M$, we have
+$\eta_{j} =$
+\begin{eqnarray}
+\log \frac{\pr(Y=j)}{\pr(Y=M+1)}
+&=&
+\beta_{(j)1}^{*} +
+\beta_{(1)2}^{*} \, (x_{i2j}-x_{i24}) +
+\beta_{(1)3}^{*} \, (x_{i3j}-x_{i34}) +
+\beta_{(1)4}^{*} \, x_{i4},
+\label{eqn:xij.eg.gotowork}
+\end{eqnarray}
+where, for the $i$th person,
+$x_{i2j}$ is the cost for the $j$th transport means, and
+$x_{i3j}$ is the journey time of the $j$th transport means.
+The distance to get to work is $x_{i4}$; it has the same value
+regardless of the transport means.
+
+
+Equation \ref{eqn:xij.eg.gotowork}
+implies $\bH_1=\bI_3$ and $\bH_2=\bH_3=\bH_4=\bone_3$.
+Note
+also that if the last response category is used as the baseline or
+reference group (the default of \texttt{multinomial()}) then $x_{ik,M+1}$
+can be subtracted from $x_{ikj}$ for $j=1,\ldots,M$---this
+is the natural way $x_{ik,M+1}$ enters into the model.
+
+
+
+
+Recall from (\ref{gammod2}) that we had
+\begin{equation}
+\eta_j(\bix_i)  =  \bbeta_j^{\top} \bix_i  = 
+\sum_{k=1}^{p} \, x_{ik} \, \beta_{(j)k} .
+\label{eqn:xij0}
+\end{equation}
+Importantly, this can be generalized to
+\begin{equation}
+\eta_j(\bix_{ij})  =  \bbeta_j^{\top} \bix_{ij}  = 
+\sum_{k=1}^{p} \, x_{ikj} \, \beta_{(j)k} ,
+\label{eqn:xij}
+\end{equation}
+or writing this another way (as a mixture or hybrid),
+\begin{equation}
+\eta_j(\bix_{i}^{*},\bix_{ij}^{*})  =  
+\bbeta_{j}^{*T} \bix_{i}^{*} + \bbeta_{j}^{**T} \bix_{ij}^{*} .
+\label{eqn:xij2}
+\end{equation}
+Often $\bbeta_{j}^{**} = \bbeta_{}^{**}$, say.
+In (\ref{eqn:xij2}) the variables in $\bix_{i}^{*}$ are common to
+all $\eta_{j}$, and the variables in $\bix_{ij}^{*}$ have
+different values for differing $\eta_{j}$.
+This allows for covariate values that are specific to each $\eta_j$,
+a facility which is very important in many applications.
+
+
+The use of the \texttt{xij} argument with the \VGAM{} family function
+\texttt{multinomial()} has very important applications in economics.
+In that field the term ``multinomial logit model'' includes a variety of
+models such as the ``generalized logit model'' where (\ref{eqn:xij0})
+holds, the ``conditional logit model'' where (\ref{eqn:xij}) holds,
+and the ``mixed logit model,'' which is a combination of the two,
+where (\ref{eqn:xij2}) holds.
+The generalized logit model focusses on the individual as the unit of
+analysis, and uses individual characteristics as explanatory variables,
+e.g., age of the person in the transport example.
+The conditional logit model assumes different values for each
+alternative and the impact of a unit of $x_k$ is assumed to be constant
+across alternatives, e.g., journey time in the choice of transport mode.
+Unfortunately, there is confusion in the literature for the terminology
+of the models. Some authors call \texttt{multinomial()}
+with (\ref{eqn:xij0}) the ``generalized logit model''.
+Others call the mixed
+logit model the ``multinomial logit model'' and view the generalized
+logit and conditional logit models as special cases.
+In \VGAM{} terminology there is no need to give different names to
+all these slightly differing special cases. They are all still called
+multinomial logit models, although it may be added that there are
+some covariate-specific linear/additive predictors.
+The important thing is that the framework accommodates $\bix_{ij}$,
+so one tries to avoid making life unnecessarily complicated.
+And \texttt{xij} can apply in theory to any VGLM and not just to the
+multinomial logit model.
+\cite{imai:king:lau:2008} present another perspective on the
+$\bix_{ij}$ problem with illustrations from \pkg{Zelig}
+\citep{Zelig:2009}.
+
+
+
+
+
+\subsubsection[Using the xij argument]{Using the \texttt{xij} argument}
+\label{sec:xij.sub}
+
+\VGAM{} handles variables whose values depend on $\eta_{j}$,
+(\ref{eqn:xij2}), using the \texttt{xij} argument.
+It is assigned an S formula or a list of \proglang{S} formulas.
+Each formula, which must have $M$ \textit{different} terms,
+forms a matrix that premultiplies a constraint matrix.
+In detail, (\ref{eqn:xij0}) can be written in vector form as
+\begin{equation}
+\boldeta(\bix_i)  =  \bB^{\top} \bix_i  = 
+\sum_{k=1}^{p} \, \bH_{k} \, \bbeta_{k}^{*} \, x_{ik},
+\label{eqn:xij0.vector}
+\end{equation}
+where
+$\bbeta_{k}^{*} =
+\left( \beta_{(1)k}^{*},\ldots,\beta_{(r_k)k}^{*} \right)^{\top}$
+is to be estimated.
+This may be written
+\begin{eqnarray}
+\boldeta(\bix_{i})
+&=&
+\sum_{k=1}^{p} \, \diag(x_{ik},\ldots,x_{ik}) \,
+\bH_k \, \bbeta_{k}^{*}.
+\label{eqn:xij.d.vector}
+\end{eqnarray}
+To handle (\ref{eqn:xij})--(\ref{eqn:xij2})
+we can generalize (\ref{eqn:xij.d.vector}) to
+\begin{eqnarray}
+\boldeta_i
+&=&
+\sum_{k=1}^{p} \, \diag(x_{ik1},\ldots,x_{ikM}) \;
+\bH_k \, \bbeta_{k}^{*}
+\ \ \ \ \left(=
+\sum_{k=1}^{p} \, \bX_{(ik)}^{*} \,
+\bH_k \, \bbeta_{k}^{*} ,
+\mathrm{\ say} \right).
+\label{eqn:xij.vector}
+\end{eqnarray}
+Each component of the list \texttt{xij} is a formula having $M$ terms
+(ignoring the intercept) which
+specifies the successive diagonal elements of the matrix $\bX_{(ik)}^{*}$.
+Thus each row of the constraint matrix may be multiplied by a different
+vector of values.
+The constraint matrices themselves are not affected by the
+\texttt{xij} argument.
+
+
+
+
+
+How can one fit such models in \VGAM{}?
+Let us fit (\ref{eqn:xij.eg.gotowork}).
+Suppose the journey cost and time variables have had the
+cost and time of walking subtracted from them.
+Then,
+using ``\texttt{.trn}'' to denote train,
+\begin{Code}
+fit2 <- vglm(cbind(bus, train, car, walk) ~ Cost + Time + Distance,
+             fam = multinomial(parallel = TRUE ~ Cost + Time + Distance - 1),
+             xij = list(Cost ~ Cost.bus + Cost.trn + Cost.car,
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  Cost.bus + Cost.trn + Cost.car +
+                        Time.bus + Time.trn + Time.car +
+                        Cost + Time + Distance,
+             data = gotowork)
+\end{Code}
+should do the job.
+Here, the argument \texttt{form2} is assigned a second \proglang{S} formula which
+is used in some special circumstances or by certain types
+of \VGAM{} family functions.
+The model has $\bH_{1} = \bI_{3}$ and $\bH_{2} = \bH_{3} = \bH_{4} = \bone_{3}$
+because the lack of parallelism only applies to the intercept.
+However, unless \texttt{Cost} is the same as \texttt{Cost.bus} and
+\texttt{Time} is the same as \texttt{Time.bus},
+this model should not be plotted with \texttt{plotvgam()};
+see the author's homepage for further documentation.
+
+
+By the way,
+suppose 
+$\beta_{(1)4}^{*}$
+in (\ref{eqn:xij.eg.gotowork})
+is replaced by $\beta_{(j)4}^{*}$.
+Then the above code but with
+\begin{Code}
+  fam = multinomial(parallel = FALSE ~ 1 + Distance),
+\end{Code}
+should fit this model.
+Equivalently,
+\begin{Code}
+  fam = multinomial(parallel = TRUE ~ Cost + Time - 1),
+\end{Code}
+
+
+
+
+
+
+\subsubsection{A more complicated example}
+\label{sec:xij.complicated}
+
+The above example is straightforward because the
+variables were entered linearly. However, things
+become more tricky if data-dependent functions are used in
+any \texttt{xij} terms, e.g., \texttt{bs()}, \texttt{ns()} or \texttt{poly()}.
+In particular, regression splines such as \texttt{bs()} and \texttt{ns()}
+can be used to estimate a general smooth function $f(x_{ij})$, which is
+very useful for exploratory data analysis.
+
+
+
+Suppose we wish to fit the variable \texttt{Cost} with a smoother.
+This is possible with regression splines and using a trick.
+Firstly note that
+\begin{Code}
+fit3 <- vglm(cbind(bus, train, car, walk) ~ ns(Cost) + Time + Distance,
+             multinomial(parallel = TRUE ~ ns(Cost) + Time + Distance - 1),
+             xij = list(ns(Cost) ~ ns(Cost.bus) + ns(Cost.trn) + ns(Cost.car),
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  ns(Cost.bus) + ns(Cost.trn) + ns(Cost.car) +
+                        Time.bus + Time.trn + Time.car +
+                        ns(Cost) + Cost + Time + Distance,
+             data = gotowork)
+\end{Code}
+will \textit{not} work because the basis functions for
+\texttt{ns(Cost.bus)}, \texttt{ns(Cost.trn)} and \texttt{ns(Cost.car)}
+are not identical since the knots differ.
+Consequently, they represent different functions despite
+having common regression coefficients.
+
+
+Fortunately, it is possible to force the \texttt{ns()} terms
+to have identical basis functions by using a trick:
+combine the vectors temporarily.
+To do this, one can let
+\begin{Code}
+NS <- function(x, ..., df = 3)
+      sm.ns(c(x, ...), df = df)[1:length(x), , drop = FALSE]
+\end{Code}
+This computes a natural cubic B-spline evaluated at \texttt{x} but it uses the
+other arguments as well to form an overall vector from which to obtain
+the (common) knots.
+Then the usage of \texttt{NS()} can be something like
+\begin{Code}
+fit4 <- vglm(cbind(bus, train, car, walk) ~ NS(Cost.bus, Cost.trn, Cost.car)
+                                          + Time + Distance,
+             multinomial(parallel = TRUE ~  NS(Cost.bus, Cost.trn, Cost.car)
+                                          + Time + Distance - 1),
+             xij = list(NS(Cost.bus, Cost.trn, Cost.car) ~
+                        NS(Cost.bus, Cost.trn, Cost.car) +
+                        NS(Cost.trn, Cost.car, Cost.bus) +
+                        NS(Cost.car, Cost.bus, Cost.trn),
+                        Time ~ Time.bus + Time.trn + Time.car),
+             form2 = ~  NS(Cost.bus, Cost.trn, Cost.car) +
+                        NS(Cost.trn, Cost.car, Cost.bus) +
+                        NS(Cost.car, Cost.bus, Cost.trn) +
+                        Time.bus + Time.trn + Time.car +
+                        Cost.bus + Cost.trn + Cost.car +
+                        Time + Distance,
+             data = gotowork)
+\end{Code}
+So \texttt{NS(Cost.bus, Cost.trn, Cost.car)}
+is the smooth term for
+\texttt{Cost.bus}, etc.
+Furthermore, \texttt{plotvgam()} may be applied to
+\texttt{fit4}, in which case the fitted regression spline is plotted
+against its first inner argument, viz. \texttt{Cost.bus}.
+
+
+One of the reasons why it will predict correctly, too,
+is due to ``smart prediction''
+\citep{Rnews:Yee:2008}.
+
+
+
+\subsubsection{Implementation details} 
+\label{sec:jss.xij.implementationDetails} 
+
+The \texttt{xij} argument operates \textit{after} the
+ordinary $\bX_{\sVLM}$ matrix is created. Then selected columns
+of $\bX_{\sVLM}$ are modified from the constraint matrices, \texttt{xij}
+and \texttt{form2} arguments. That is, from \texttt{form2}'s model
+matrix $\bX_{\sformtwo}$, and the $\bH_k$. This whole operation
+is possible because $\bX_{\sVLM}$ remains structurally the same.
+The crucial equation is (\ref{eqn:xij.vector}).
+
+
+Other \texttt{xij} examples are given in the online help of
+\texttt{fill()} and \texttt{vglm.control()},
+as well as at the package's webpage.
+
+
+
+
+
+
+
+
+
+
+
+% ----------------------------------------------------------------------
+\section{Discussion}
+\label{sec:jsscat.discussion}
+
+
+This article has sought to convey how VGLMs/VGAMs are well suited for
+fitting regression models for categorical data. Its primary strength
+is its simple and unified framework, and when reflected in software,
+makes practical CDA more understandable and efficient. Furthermore,
+there are natural extensions such as a reduced-rank variant and
+covariate-specific $\eta_{j}$. The \VGAM{} package potentially offers
+a wide selection of models and utilities.
+
+
+There is much future work to do.
+Some useful additions to the package include:
+\begin{enumerate}
+
+\item
+Bias-reduction \citep{firt:1993} is a method for removing the $O(n^{-1})$
+bias from a maximum likelihood estimate. For a substantial class of
+models including GLMs it can be formulated in terms of a minor adjustment
+of the score vector within an IRLS algorithm \citep{kosm:firt:2009}.
+One by-product, for logistic regression, is that while the maximum
+likelihood estimate (MLE) can be infinite, the adjustment leads to
+estimates that are always finite. At present the \R{} package \pkg{brglm}
+\citep{Kosmidis:2008} implements bias-reduction for a number of models.
+Bias-reduction might be implemented by adding an argument
+\texttt{bred = FALSE}, say, to some existing \VGAM{} family functions.
+
+
+\item
+Nested logit models were developed to overcome a fundamental shortcoming
+related to the multinomial logit model, viz. the independence of
+irrelevant alternatives (IIA) assumption. Roughly, the multinomial logit
+model assumes the ratio of the choice probabilities of two alternatives
+is not dependent on the presence or absence of other alternatives in
+the model. This presents problems that are often illustrated by the
+famed red bus-blue bus problem.
+
+
+
+
+\item
+The generalized estimating equations (GEE) methodology is largely
+amenable to IRLS and this should be added to the package in the future
+\citep{wild:yee:1996}.
+
+
+\item
+For logistic regression \proglang{SAS}'s \code{proc logistic} gives
+a warning if the data is {completely separate} or {quasi-completely
+separate}. Its effects are that some regression coefficients tend to $\pm
+\infty$. With such data, all (to my knowledge) \R{} implementations
+give warnings that are vague, if any at all, and this is rather
+unacceptable \citep{alli:2004}. The \pkg{safeBinaryRegression} package
+\citep{Konis:2009} overloads \code{glm()} so that a check for the
+existence of the MLE is made before fitting a binary response GLM.
+
+
+\end{enumerate}
+
+
+In closing, the \pkg{VGAM} package is continually being developed,
+therefore some future changes in the implementation details and usage
+may occur. These may include non-backward-compatible changes (see the
+\code{NEWS} file.) Further documentation and updates are available at
+the author's homepage whose URL is given in the \code{DESCRIPTION} file.
+
+
+
+% ----------------------------------------------------------------------
+\section*{Acknowledgments}
+
+The author thanks Micah Altman, David Firth and Bill Venables for helpful
+conversations, and Ioannis Kosmidis for a reprint.
+Thanks also to The Institute for Quantitative Social Science at Harvard
+University for their hospitality while this document was written during a
+sabbatical visit.
+
+
+
+
+
+\bibliography{categoricalVGAMbib}
+
+\end{document}
+
+
+
+
diff --git a/vignettes/categoricalVGAMbib.bib b/vignettes/categoricalVGAMbib.bib
new file mode 100644
index 0000000..7367aff
--- /dev/null
+++ b/vignettes/categoricalVGAMbib.bib
@@ -0,0 +1,653 @@
+ at article{yee:wild:1996,
+    Author = {Yee, T. W. and Wild, C. J.},
+    Title = {Vector Generalized Additive Models},
+    Year = 1996,
+   JOURNAL = {Journal of the Royal Statistical Society~B},
+    Volume = 58,
+    Pages = {481--493},
+    Keywords = {Nonparametric regression; Smoothing},
+    Number = 3,
+}
+
+ at article{gree:1984,
+    Author = {Green, P. J.},
+    Title = {Iteratively Reweighted Least Squares for Maximum Likelihood
+            Estimation, and Some Robust and Resistant Alternatives},
+    Year = 1984,
+   JOURNAL = {Journal of the Royal Statistical Society~B},
+    Volume = 46,
+    Pages = {149--192},
+    Keywords = {Scoring; Generalized linear model; Regression; Residual},
+    Number = 2,
+}
+
+ at book{hast:tibs:1990,
+    Author = {Hastie, T. J. and Tibshirani, R. J.},
+    Title = {Generalized Additive Models},
+    Year = 1990,
+    Publisher = {Chapman \& Hall},
+    Address = {London},
+    Pages = {335},
+    Keywords = {Regression; Nonparametric; Generalized linear model}
+}
+
+ at Manual{gam:pack:2009,
+    title = {\pkg{gam}: Generalized Additive Models},
+    author = {Trevor Hastie},
+    year = {2008},
+    note = {\proglang{R}~package version~1.01},
+    url = {http://CRAN.R-project.org/package=gam}
+}
+
+ at article{ande:1984,
+    Author = {Anderson, J. A.},
+    Title = {Regression and Ordered Categorical Variables},
+    Year = 1984,
+   JOURNAL = {Journal of the Royal Statistical Society~B},
+    Volume = 46,
+    Pages = {1--30},
+    Keywords = {Assessed variable; Logistic regression; Stereotype
+               regression; Maximum likelihood},
+    Number = 1,
+}
+
+ at article{firt:1993,
+author = {Firth, D.},
+title = {Bias Reduction of Maximum Likelihood Estimates},
+journal = {Biometrika},
+volume = {80},
+pages = {27--38},
+year = {1993},
+number = {1},
+abstract = {It is shown how, in regular parametric problems, the
+first-order term is removed from the asymptotic bias of maximum likelihood
+estimates by a suitable modification of the score function. In exponential
+families with canonical parameterization the effect is to penalize the
+likelihood by the Jeffreys invariant prior. In binomial logistic models,
+Poisson log linear models and certain other generalized linear models,
+the Jeffreys prior penalty function can be imposed in standard regression
+software using a scheme of iterative adjustments to the data.},
+}
+
+ at InProceedings{alli:2004,
+    Author = {Allison, P.},
+    Title = {Convergence Problems in Logistic Regression},
+    chapter = {10},
+    Year = 2004,
+    Crossref = {altm:gill:mcdo:2004},
+    Pages = {238--252},
+ BookTITLE = {Numerical Issues in Statistical Computing for the Social
+              Scientist},
+ PUBLISHER = {Wiley-Interscience},
+   ADDRESS = {Hoboken, NJ, USA},
+}
+
+ at book {altm:gill:mcdo:2004,
+    AUTHOR = {Altman, Micah and Gill, Jeff and McDonald, Michael P.},
+     TITLE = {Numerical Issues in Statistical Computing for the Social
+              Scientist},
+ PUBLISHER = {Wiley-Interscience},
+   ADDRESS = {Hoboken, NJ, USA},
+      YEAR = {2004},
+     PAGES = {xvi+323},
+   MRCLASS = {62-02 (62-04 62P25 65-02 91-02)},
+  MRNUMBER = {MR2020104},
+}
+
+ at article{yee:2010v,
+    Author = {Yee, T. W.},
+    Title = {{VGLM}s and {VGAM}s:
+             An Overview for Applications in Fisheries Research},
+    Year = 2010,
+    Journal = {Fisheries Research},
+   FJournal = {Fisheries Research},
+    Volume = {101},
+    Pages = {116--126},
+    Number = {1--2},
+}
+
+ at article{imai:king:lau:2008,
+    AUTHOR = {Imai, Kosuke and King, Gary and Lau, Olivia},
+     TITLE = {Toward A Common Framework for Statistical Analysis and
+              Development},
+  JOURNAL = {Journal of Computational and Graphical Statistics},
+  YEAR = 2008,
+  VOLUME = 17,
+  PAGES = {892--913},
+  NUMBER = 4,
+}
+
+ at book{stok:davi:koch:2000,
+    Author = {Stokes, W. and Davis, J. and Koch, W.},
+    Title = {Categorical Data Analysis Using The \proglang{SAS} System},
+    Year = 2000,
+    Edition = {2nd},
+    Publisher = {SAS Institute Inc.},
+    Address = {Cary, NC, USA},
+    PAGES = {648},
+}
+
+ at article{neld:wedd:1972,
+    Author = {Nelder, J. A. and Wedderburn, R. W. M.},
+    Title = {Generalized Linear Models},
+    Year = 1972,
+   JOURNAL = {Journal of the Royal Statistical Society~A},
+    Volume = 135,
+    Pages = {370--384},
+    Keywords = {Probit analysis; Analysis of variance; Contingency table;
+               Exponential family; Quantal response; Weighted least
+               squares},
+    Number = 3,
+}
+
+ at book{agre:2002,
+    Author = {Agresti, Alan},
+    Title = {Categorical Data Analysis},
+    Year = 2002,
+    Publisher = {John Wiley \& Sons},
+    Address = {New York, USA},
+    Edition = {2nd},
+}
+
+
+ at book{agre:2013,
+  Author = {Agresti, Alan},
+  Title = {Categorical Data Analysis},
+  Year = 2013,
+  Publisher = {Wiley},
+  Address = {Hoboken, NJ, USA},
+  Edition = {Third},
+}
+
+
+
+ at book{agre:2010,
+      Author = {Agresti, Alan},
+      Title = {Analysis of Ordinal Categorical Data},
+      Year = 2010,
+      Publisher = {Wiley},
+      Edition = {Second},
+      Address = {Hoboken, NJ, USA},
+      Pages = {396},
+}
+
+
+
+ at book{tutz:2012,
+    AUTHOR = {Tutz, G.},
+     TITLE = {Regression for Categorical Data},
+      YEAR = {2012},
+ PUBLISHER = {Cambridge University Press},
+   ADDRESS = {Cambridge},
+}
+
+
+ at book{fahr:tutz:2001,
+    Author = {Fahrmeir, L. and Tutz, G.},
+    Title = {Multivariate Statistical Modelling Based on Generalized Linear
+            Models},
+    Year = 2001,
+    Edition = {2nd},
+    Publisher = {Springer-Verlag},
+    ADDRESS = {New York, USA},
+}
+
+ at book{leon:2000,
+    Author = {Leonard, Thomas},
+    Title = {A Course in Categorical Data Analysis},
+    Year = 2000,
+    Publisher = {Chapman \& Hall/CRC},
+    Address = {Boca Raton, FL, USA},
+}
+
+ at book{lloy:1999,
+    Author = {Lloyd, C. J.},
+    Title = {Statistical Analysis of Categorical Data},
+    Year = 1999,
+    Publisher = {John Wiley \& Sons},
+    Address = {New York, USA}
+}
+
+ at book{long:1997,
+    Author = {Long, J. S.},
+    Title = {Regression Models for Categorical and Limited Dependent Variables},
+    Year = 1997,
+    Publisher = {Sage Publications},
+    ADDRESS = {Thousand Oaks, CA, USA},
+}
+
+ at book{mccu:neld:1989,
+    Author = {McCullagh, P. and Nelder, J. A.},
+    Title = {Generalized Linear Models},
+    Year = 1989,
+    Edition = {2nd},
+    Publisher = {Chapman \& Hall},
+    Address = {London},
+    Pages = {500}
+}
+
+ at book{simo:2003,
+    Author = {Simonoff, J. S.},
+    Title = {Analyzing Categorical Data},
+    Year = 2003,
+    Pages = {496},
+    Publisher = {Springer-Verlag},
+    Address = {New York, USA}
+}
+
+ at article{liu:agre:2005,
+    Author = {Liu, I. and Agresti, A.},
+    Title = {The Analysis of Ordered Categorical Data:
+             An Overview and a Survey of Recent Developments},
+    Year = 2005,
+    Journal = {Sociedad Estad{\'i}stica e Investigaci{\'o}n Operativa Test},
+    Volume = 14,
+    Pages = {1--73},
+    Number = 1,
+}
+
+ at MANUAL{thom:2009,
+    TITLE = {\proglang{R} (and \proglang{S-PLUS}) Manual to Accompany
+             Agresti's \textit{Categorical Data Analysis}~(2002),
+             2nd edition},
+    AUTHOR = {Thompson, L. A.},
+    YEAR = {2009},
+    URL = {https://home.comcast.net/~lthompson221/Splusdiscrete2.pdf},
+}
+
+ at article{yee:2008c,
+    Author = {Yee, T. W.},
+    Title = {The \pkg{VGAM} Package},
+    Year = 2008,
+    Journal = {\proglang{R} {N}ews},
+    Volume = 8,
+    Pages = {28--39},
+    Number = 2,
+}
+
+ at article{Rnews:Yee:2008,
+  author = {Thomas W. Yee},
+  title = {The \pkg{VGAM} Package},
+  journal = {\proglang{R}~News},
+  year = 2008,
+  volume = 8,
+  pages = {28--39},
+  month = {October},
+  url = {http://CRAN.R-project.org/doc/Rnews/},
+  number = 2,
+}
+
+ at article{yee:hast:2003,
+    AUTHOR = {Yee, T. W. and Hastie, T. J.},
+     TITLE = {Reduced-rank Vector Generalized Linear Models},
+   JOURNAL = {Statistical Modelling},
+    Volume = 3,
+     Pages = {15--41},
+      YEAR = {2003},
+     Number = 1,
+}
+
+article{yee:wild:1996,
+    Author = {Yee, T. W. and Wild, C. J.},
+    Title = {Vector Generalized Additive Models},
+    Year = 1996,
+   JOURNAL = {Journal of the Royal Statistical Society~B},
+    Volume = 58,
+    Pages = {481--493},
+    Keywords = {Nonparametric regression; Smoothing},
+    Number = 3,
+}
+
+ at article{good:1981,
+    Author = {Goodman, L. A.},
+    Title = {Association Models and Canonical Correlation in the Analysis
+             of Cross-classifications Having Ordered Categories},
+    Year = 1981,
+    Journal = {Journal of the American Statistical Association},
+    Volume = 76,
+    Pages = {320--334},
+    Number = 374,
+}
+
+ at article{buja:hast:tibs:1989,
+    Author = {Buja, Andreas and Hastie, Trevor and Tibshirani, Robert},
+    Title = {Linear Smoothers and Additive Models},
+    Year = 1989,
+   JOURNAL = {The Annals of Statistics},
+    Volume = 17,
+    Pages = {453--510},
+    Keywords = {Nonparametric; Regression; Kernel estimator},
+    Number = 2,
+}
+
+ at article{yee:step:2007,
+    AUTHOR = {Yee, Thomas W. and Stephenson, Alec G.},
+     TITLE = {Vector Generalized Linear and Additive Extreme Value Models},
+   JOURNAL = {Extremes},
+  FJOURNAL = {Extremes. Statistical Theory and Applications in Science,
+              Engineering and Economics},
+    VOLUME = {10},
+      YEAR = {2007},
+     PAGES = {1--19},
+   MRCLASS = {Database Expansion Item},
+  MRNUMBER = {MR2407639},
+    NUMBER = {1--2},
+}
+
+ at article{wand:orme:2008,
+    Author = {Wand, M. P. and Ormerod, J. T.},
+    Title = {On Semiparametric Regression with {O}'{S}ullivan Penalized Splines},
+    Year = 2008,
+    Journal = {The Australian and New Zealand Journal of Statistics},
+    Volume = 50,
+    Issue = 2,
+    Pages = {179--198},
+    Number = 2,
+}
+
+ at book{cham:hast:1993,
+    Editor = {Chambers, John M. and Hastie, Trevor J.},
+    Title = {Statistical Models in \proglang{S}},
+    Publisher = {Chapman \& Hall},
+    Year = 1993,
+    Pages = {608},
+    Address = {New York, USA},
+    Keywords = {Computing},
+}
+
+ at Article{pete:harr:1990,
+    Author = {Peterson, B. and Harrell, Frank E.},
+    Title = {Partial Proportional Odds Models for Ordinal Response Variables},
+    Year = 1990,
+    Journal = {Applied Statistics},
+    Volume = 39,
+    Pages = {205--217},
+    Number = 2,
+}
+
+ at article{pete:1990,
+    Author = {Peterson, B.},
+    Title = {Letter to the Editor: Ordinal Regression Models for
+             Epidemiologic Data},
+    Year = 1990,
+    Journal = {American Journal of Epidemiology},
+    Volume = 131,
+    Pages = {745--746}
+}
+
+ at article{hast:tibs:buja:1994,
+    AUTHOR = {Hastie, Trevor and Tibshirani, Robert and Buja, Andreas},
+     TITLE = {Flexible Discriminant Analysis by Optimal Scoring},
+   JOURNAL = {Journal of the American Statistical Association},
+    VOLUME = {89},
+      YEAR = {1994},
+     PAGES = {1255--1270},
+     CODEN = {JSTNAL},
+   MRCLASS = {62H30},
+  MRNUMBER = {95h:62099},
+    NUMBER = {428},
+}
+
+ at article{firth:2005,
+    Author = {Firth, David},
+    Title = {{B}radley-{T}erry Models in \proglang{R}},
+    Year = 2005,
+    Journal = {Journal of Statistical Software},
+    Volume = 12,
+    Number = 1,
+    Pages = {1--12},
+    URL = "http://www.jstatsoft.org/v12/i01/",
+}
+
+ at book{weir:1996,
+    Author = {Weir, Bruce S.},
+    Title = {Genetic Data Analysis II: Methods for Discrete Population
+             Genetic Data},
+    Year = 1996,
+    Publisher = {Sinauer Associates, Inc.},
+    Address = {Sunderland, MA, USA}
+}
+
+ at book{lang:2002,
+    Author = {Lange, Kenneth},
+    Title = {Mathematical and Statistical Methods for Genetic Analysis},
+    Year = 2002,
+    Edition = {2nd},
+    Publisher = {Springer-Verlag},
+    Address = {New York, USA},
+}
+
+ at article{macm:etal:1995,
+    Author = {MacMahon, S. and Norton, R. and Jackson, R. and Mackie, M. J. and
+              Cheng, A. and
+              Vander Hoorn, S. and Milne, A. and McCulloch, A.},
+    Title = {Fletcher {C}hallenge-{U}niversity of {A}uckland {H}eart \&
+             {H}ealth {S}tudy: Design and Baseline Findings},
+    Year = 1995,
+    Journal = {New Zealand Medical Journal},
+    Volume = 108,
+    Pages = {499--502},
+}
+
+ at article{altm:jack:2010,
+  author =      {Altman, M. and Jackman, S.},
+  title =       "Nineteen Ways of Looking at Statistical Software",
+  journal =     "Journal of Statistical Software",
+  year =        "2010",
+  note =        "Forthcoming"
+}
+
+ at article{fox:hong:2009,
+  author =    "John Fox and Jangman Hong",
+  title =    {Effect Displays in \proglang{R} for Multinomial and
+                 Proportional-Odds Logit Models:
+                 Extensions to the \pkg{effects} Package},
+  journal =    "Journal of Statistical Software",
+  volume =    "32",
+  number =    "1",
+  pages =    "1--24",
+  year =     "2009",
+  URL =      "http://www.jstatsoft.org/v32/i01/",
+}
+
+ at article{wild:yee:1996,
+    Author = {Wild, C. J. and Yee, T. W.},
+    Title = {Additive Extensions to Generalized Estimating Equation
+            Methods},
+    Year = 1996,
+   JOURNAL = {Journal of the Royal Statistical Society~B},
+    Volume = 58,
+    Pages = {711--725},
+    Keywords = {Longitudinal data; Nonparametric; Regression; Smoothing},
+    NUMBER = {4},
+}
+
+ at Article{Yee:2010,
+   author        = {Thomas W. Yee},
+   title         = {The \pkg{VGAM} Package for Categorical Data Analysis},
+   journal       = {Journal of Statistical Software},
+   year          = {2010},
+   volume        = {32},
+   number        = {10},
+   pages	 = {1--34},
+   url           = {http://www.jstatsoft.org/v32/i10/}
+}
+
+ at Manual{R,
+   title         = {\proglang{R}: {A} Language and Environment
+                    for Statistical Computing},
+   author        = {{\proglang{R} Development Core Team}},
+   organization  = {\proglang{R} Foundation for Statistical Computing},
+   address       = {Vienna, Austria},
+   year          = {2009},
+   note          = {{ISBN} 3-900051-07-0},
+   url           = {http://www.R-project.org/}
+}
+
+ at Book{Venables+Ripley:2002,
+   author        = {William N. Venables and Brian D. Ripley},
+   title         = {Modern Applied Statistics with \proglang{S}},
+   edition       = {4th},
+   year          = {2002},
+   pages         = {495},
+   publisher     = {Springer-Verlag},
+   address       = {New York},
+    url = {http://www.stats.ox.ac.uk/pub/MASS4/},
+}
+
+ at Manual{SAS,
+   author        = {{\proglang{SAS} Institute Inc.}},
+   title         = {The \proglang{SAS} System, Version 9.1},
+   year          = {2003},
+   address       = {Cary, NC},
+   url           = {http://www.sas.com/}
+}
+
+ at Manual{yee:VGAM:2010,
+   title        = {\pkg{VGAM}: Vector Generalized Linear and Additive Models},
+   author       = {Yee, T. W.},
+   year         = {2010},
+   note         = {\proglang{R}~package version~0.7-10},
+   url          = {http://CRAN.R-project.org/package=VGAM}
+}
+
+ at Manual{Harrell:2009,
+   title        = {\pkg{rms}: Regression Modeling Strategies},
+   author       = {Frank E. {Harrell, Jr.}},
+   year         = {2009},
+   note         = {\proglang{R}~package version~2.1-0},
+   url          = {http://CRAN.R-project.org/package=rms}
+}
+
+ at Manual{Meyer+Zeileis+Hornik:2009,
+   title        = {\pkg{vcd}: Visualizing Categorical Data},
+   author       = {David Meyer and Achim Zeileis and Kurt Hornik},
+   year         = {2009},
+   note         = {\proglang{R}~package version~1.2-7},
+   url          = {http://CRAN.R-project.org/package=vcd}
+}
+
+ at Article{Meyer+Zeileis+Hornik:2006,
+   author       = {David Meyer and Achim Zeileis and Kurt Hornik},
+   title        = {The Strucplot Framework: Visualizing Multi-Way
+                   Contingency Tables with \pkg{vcd}},
+   journal      = {Journal of Statistical Software},
+   year         = {2006},
+   volume       = {17},
+   number       = {3},
+   pages        = {1--48},
+   url          = {http://www.jstatsoft.org/v17/i03/}
+}
+
+ at Manual{Turner+Firth:2009,
+   title        = {Generalized Nonlinear Models in \proglang{R}:
+                   An Overview of the \pkg{gnm} Package},
+   author       = {Heather Turner and David Firth},
+   year         = {2009},
+   note         = {\proglang{R}~package version~0.10-0},
+   url          = {http://CRAN.R-project.org/package=gnm},
+}
+
+ at Article{Rnews:Turner+Firth:2007,
+  author       = {Heather Turner and David Firth},
+  title        = {\pkg{gnm}: A Package for Generalized Nonlinear Models},
+  journal      = {\proglang{R}~News},
+  year         = 2007,
+  volume       = 7,
+  number       = 2,
+  pages        = {8--12},
+  month        = {October},
+  url = {http://CRAN.R-project.org/doc/Rnews/},
+}
+
+
+ at Manual{ElemStatLearn:2009,
+    title = {\pkg{ElemStatLearn}: Data Sets, Functions and
+             Examples from the Book `The Elements
+             of Statistical Learning, Data Mining, Inference, and
+             Prediction' by Trevor Hastie, Robert Tibshirani and Jerome
+             Friedman},
+    author = {Kjetil Halvorsen},
+    year = {2009},
+    note = {\proglang{R}~package version~0.1-7},
+    url = {http://CRAN.R-project.org/package=ElemStatLearn},
+  }
+
+ at Manual{Zelig:2009,
+    title = {\pkg{Zelig}: Everyone's Statistical Software},
+    author = {Kosuke Imai and Gary King and Olivia Lau},
+    year = {2009},
+    note = {\proglang{R}~package version~3.4-5},
+    url = {http://CRAN.R-project.org/package=Zelig},
+}
+
+ at article{kosm:firt:2009,
+    author = {Kosmidis, I. and Firth, D.},
+     title = {Bias Reduction in Exponential Family Nonlinear Models},
+      year = {2009},
+   JOURNAL = {Biometrika},
+  FJOURNAL = {Biometrika},
+    volume = {96},
+     PAGES = {793--804},
+    NUMBER = {4},
+}
+
+ at techreport{kosm:firt:2008,
+    author = {Kosmidis, I. and Firth, D.},
+    title = {Bias Reduction in Exponential Family Nonlinear Models},
+    Journal = {CRiSM Paper No.~08-05v2},
+    year = {2008},
+    URL =  "http://www.warwick.ac.uk/go/crism",
+    Institution = {Department of Statistics, Warwick University},
+}
+
+ at Manual{Kosmidis:2008,
+   title        = {\pkg{brglm}: Bias Reduction in Binary-Response {GLMs}},
+   author       = {Ioannis Kosmidis},
+   year         = {2008},
+   note         = {\proglang{R}~package version~0.5-4},
+   url          = {http://CRAN.R-project.org/package=brglm},
+}
+
+ at Manual{Hatzinger:2009,
+    title = {\pkg{prefmod}: Utilities to Fit Paired Comparison
+             Models for Preferences},
+    author = {Reinhold Hatzinger},
+    year = {2009},
+    note = {\proglang{R}~package version~0.8-16},
+    url = {http://CRAN.R-project.org/package=prefmod},
+}
+
+ at Manual{firth:2008,
+    title = {\pkg{BradleyTerry}: Bradley-Terry Models},
+    author = {David Firth},
+    year = {2008},
+    note = {\proglang{R}~package version~0.8-7},
+    url = {http://CRAN.R-project.org/package=BradleyTerry},
+  }
+
+ at Manual{gnlm:2007,
+    title = {\pkg{gnlm}: Generalized Nonlinear Regression Models},
+    author = {Jim Lindsey},
+    year = {2007},
+    note = {\proglang{R}~package version~1.0},
+    url = {http://popgen.unimaas.nl/~jlindsey/rcode.html},
+}
+
+ at Manual{Konis:2009,
+    title = {\pkg{safeBinaryRegression}: Safe Binary Regression},
+    author = {Kjell Konis},
+    year = {2009},
+    note = {\proglang{R}~package version~0.1-2},
+    url = {http://CRAN.R-project.org/package=safeBinaryRegression},
+}
+
+ at book{smit:merk:2013,
+     TITLE = {Generalized Linear Models for Categorical and
+              Continuous Limited Dependent Variables},
+    AUTHOR = {Smithson, M. and Merkle, E. C.},
+      YEAR = {2013},
+ Publisher = {Chapman \& Hall/CRC},
+   Address = {London},
+}
+
diff --git a/vignettes/crVGAM.Rnw b/vignettes/crVGAM.Rnw
new file mode 100644
index 0000000..6a46807
--- /dev/null
+++ b/vignettes/crVGAM.Rnw
@@ -0,0 +1,2247 @@
+\documentclass[article,shortnames,nojss]{jss}
+\usepackage{thumbpdf}
+%% need no \usepackage{Sweave.sty}
+
+
+%% Packages.
+
+\usepackage{amssymb}
+\usepackage{amsmath}
+\usepackage{bm}
+\usepackage{xspace}
+
+
+
+
+%\VignetteIndexEntry{The VGAM Package for Capture--Recapture Data Using the Conditional Likelihood}
+%\VignetteDepends{VGAM}
+%\VignetteKeywords{closed population size estimation, conditional likelihood,mark--capture--recapture, vector generalized additive model, VGAM}
+%\VignettePackage{VGAM}
+
+%% new commands
+%% Shortcut commands.
+\newcommand{\logit}{\mbox{\rm logit}}
+\newcommand{\bone}{{\bf 1}}
+\newcommand{\bzero}{{\bf 0}}
+\newcommand{\bid}{\mbox{$\bm{\mathcal{D}}$}}
+\newcommand{\bib}{\mbox{$\bm{b}$}}
+\newcommand{\bif}{\mbox{$\bm{f}$}}
+\newcommand{\bix}{\mbox{$\bm{x}$}}
+\newcommand{\biy}{\mbox{$\bm{y}$}}
+\newcommand{\biz}{\mbox{$\bm{z}$}}
+\newcommand{\bB}{\mbox{\rm \bf B}}
+\newcommand{\bX}{\mbox{\rm \bf X}}
+\newcommand{\bH}{\mbox{\rm \bf H}}
+\newcommand{\bI}{\mbox{\rm \bf I}}
+\newcommand{\bOO}{\mbox{\rm \bf O}}
+\newcommand{\bW}{\mbox{\rm \bf W}}
+\newcommand{\bY}{\mbox{\rm \bf Y}}
+\newcommand{\bbeta}{\mbox{$\bm{\beta}$}}
+\newcommand{\boldeta}{\mbox{$\bm{\eta}$}}
+\newcommand{\btheta}{\mbox{$\bm{\theta}$}}
+\newcommand{\calM}{\mbox{$\mathcal{M}$}}
+\newcommand{\mytilde}{\mbox{\lower.80ex\hbox{\char`\~}\xspace}}
+
+
+\author{Thomas W. Yee\\The University of Auckland \And
+Jakub Stoklosa\\The University of New South Wales \AND
+Richard M. Huggins\\The University of Melbourne}
+\title{The \pkg{VGAM} Package for Capture--Recapture Data Using the Conditional Likelihood}
+
+%% for pretty printing and a nice hypersummary also set:
+
+\Plainauthor{Thomas W. Yee, Jakub Stoklosa, Richard M. Huggins} %% comma-separated
+\Plaintitle{The VGAM Package for Capture--Recapture Data Using the Conditional Likelihood} %% without formatting
+\Shorttitle{The VGAM Package for Capture--Recapture Data} %% a short title (if necessary)
+
+%% an abstract and keywords
+\Abstract{
+It is well known that using individual covariate information 
+(such as body weight or gender) to model heterogeneity
+in capture--recapture (CR) experiments can greatly enhance
+inferences on the size of a closed population. Since individual 
+covariates are only observable for captured individuals, complex 
+conditional likelihood methods are usually required and these do 
+not constitute a standard generalized linear model (GLM) family. 
+Modern statistical techniques such as generalized additive models
+(GAMs), which allow a relaxing of the linearity assumptions on the 
+covariates, are readily available for many standard GLM families.
+Fortunately, a natural statistical framework for maximizing
+conditional likelihoods is available in the Vector GLM and Vector 
+GAM classes of models. We present several new \proglang{R}-functions 
+(implemented within the \pkg{VGAM} package) specifically developed to allow 
+the incorporation of individual covariates in the analysis of 
+closed population CR data using a GLM/GAM-like approach 
+and the conditional likelihood. As a result, a wide variety of 
+practical tools are now readily available in the \pkg{VGAM} object 
+oriented framework. We discuss and demonstrate their advantages, 
+features and flexibility using the new \pkg{VGAM} CR functions on several 
+examples.
+}
+
+
+
+\Keywords{closed population size estimation, conditional likelihood, 
+mark--capture--recapture, vector generalized additive model, \pkg{VGAM}}
+\Plainkeywords{closed population, conditional likelihood, 
+mark--capture--recapture, vector generalized additive model, VGAM R package}
+
+
+
+\Address{
+  Thomas W. Yee \\
+  Department of Statistics \\
+  University of Auckland, Private Bag 92019 \\
+  Auckland Mail Centre \\
+  Auckland 1142, New Zealand \\
+  E-mail: \email{t.yee at auckland.ac.nz}\\
+  URL: \url{http://www.stat.auckland.ac.nz/~yee/}
+}
+
+
+\begin{document}
+
+
+<<echo=FALSE, results=hide>>=
+library("VGAM")
+library("VGAMdata")
+ps.options(pointsize = 12)
+options(width = 72, digits = 4)
+options(SweaveHooks = list(fig = function() par(las = 1)))
+options(prompt = "R> ", continue = "+")
+@
+
+
+
+
+
+
+
+
+
+
+%*********************************************************************
+\section[Introduction]{Introduction}
+%% Note: If there is markup in \(sub)section, then it has to be escape as above.
+\label{sec:intro}
+
+
+Capture--recapture (CR) surveys are widely used in ecology and 
+epidemiology to estimate population sizes. In essence they are 
+sampling schemes that allow the estimation of both $n$ and $p$ 
+in a Binomial($n$, $p$) experiment \citep{hugg:hwan:2011}.
+The simplest CR sampling design consists of units or individuals 
+in some population that are captured or tagged across several 
+sampling occasions, e.g., trapping a nocturnal mammal species
+on seven consecutive nights. In these experiments, when an individual 
+is captured for the first time then it is marked or tagged so that 
+it can be identified upon subsequent recapture. On each occasion recaptures 
+of individuals which have been previously marked are also noted. Thus 
+each observed individual has a capture history: a vector of 1s and 0s
+denoting capture/recapture and noncapture respectively. The unknown
+population size is then estimated using the observed capture histories 
+and any other additional information collected on captured individuals, 
+such as weight or sex, along with environmental information such as 
+rainfall or temperature.
+
+
+We consider closed populations, where there are no births, deaths,
+emigration or immigration throughout the sampling period
+\citep{amst:mcdo:manl:2005}. Such an assumption is often reasonable
+when the overall time period is relatively short. 
+\citet{otis:etal:1978} provided eight specific closed population CR
+models (see also \citet{pollock:1991}), which permit the individual
+capture probabilities to depend on time and behavioural response,
+and be heterogeneous between individuals.
+The use of covariate information (or explanatory variables)
+to explain heterogeneous capture probabilities 
+in CR experiments has received considerable attention over the
+last 30 years \citep{pollock:2002}. Population size estimates that
+ignore this heterogeneity typically result in biased population
+estimates \citep{amst:mcdo:manl:2005}.
+A recent book on CR experiements as a whole is \cite{mccr:morg:2014}.
+
+
+Since individual covariate information (such as gender or body weight)
+can only be collected on observed individuals, conditional likelihood 
+models are employed \citep{pollock:1984,hugg:1989,alho:1990,lebreton:1992}. 
+That is, one conditions on the individuals seen at least once through-out 
+the experiment, hence they allow for individual covariates to be 
+considered in the analysis. The capture probabilities are typically 
+modelled as logistic functions of the covariates, and parameters are 
+estimated using maximum likelihood. Importantly, these CR models are
+generalized linear models \citep[GLMs;][]{mccull:1989,hugg:hwan:2011}.
+
+
+Here, we maximize the conditional likelihood (or more
+formally the positive-Bernoulli distribution) models
+of \citet{hugg:1989}. This approach has become standard practice to carry 
+out inferences when considering individual covariates, with several different 
+software packages currently using this methodology, including:
+\proglang{MARK} \citep{cooch:white:2012},
+\proglang{CARE-2} \citep{hwang:chao:2003},
+and the \proglang{R} packages \citep{R:2014}:
+\pkg{mra} \citep{mcdonald:2010}, \pkg{RMark} \citep{laake:2013}
+and \pkg{Rcapture} \citep{rcapturepackage:2012,Baillargeon:Rivest:2007},
+the latter package uses a log-linear approach, which can be shown to be 
+equivalent to the conditional likelihood \citep{cormak:1989,hugg:hwan:2011}. 
+These programs are quite user friendly, and specifically, allow modelling 
+capture probabilities as linear functions of the covariates. So an obvious 
+question is to ask why develop yet another implementation for closed population
+CR modelling?
+
+
+Firstly, nonlinearity arises quite naturally in many ecological applications,
+\citep{schluter1988,yee:mitc:1991,craw:1993,gimenez:2006,bolk:2008}. 
+In the CR context, capture probabilities may depend nonlinearly on 
+individual covariates, e.g., mountain pygmy possums with lighter or 
+heavier body weights may have lower capture probabilities compared 
+with those having mid-ranged body weights 
+\citep[e.g.,][]{hugg:hwan:2007,stok:hugg:2012}.
+However, in our experience, the vast majority of CR software does not handle 
+nonlinearity well in regard to both estimation and in the plotting 
+of the smooth functions. Since GAMs \citep[]{hastie:1990,wood:2006} 
+were developed in the mid-1980s they have become a standard tool for 
+data analysis in regression. The nonlinear relationship between the 
+response and covariate is flexibly modelled, and few assumptions are 
+made on the functional relationship. The drawback in applying these
+models to CR data has been the difficult programming required to 
+implement the approach.
+
+
+Secondly, we have found several implementations of conditional 
+likelihood slow, and in some instances unreliable and difficult to use. 
+We believe our implementation has superior capabilities, and has 
+good speed and reliability. The results of 
+Section \ref{sec:poz:posbernoulli.eg.timingtests} contrast our software 
+with some others. Moreover, the incorporation of these methods in a general, 
+maintained statistical package will result in them being updated as 
+the package is updated.
+
+
+Standard GLM and GAM methodologies are unable to cope with the CR 
+models considered in this article because they are largely restricted 
+to one linear/additive predictor $\eta$. Fortunately however, a 
+natural extension in the form of the vector generalized linear 
+and additive model (VGLM/VGAM) classes do allow for multiple $\eta$s. 
+VGAMs and VGLMs are described in \citet{yee:wild:1996} and \citet{yee:hast:2003}. 
+Their implementation in the \pkg{VGAM} package \citep{yee:2008,yee:2010,yee:VGAM:2013-093} 
+has become increasing popular and practical over the last few years, due to 
+large number of exponential families available for discrete/multinomial 
+response data. In addition to flexible modelling of both VGLMs and VGAMs, 
+a wide range of useful features are also available:
+\begin{itemize}
+\item smoothing capabilities;
+
+\item model selection using, e.g., AIC or BIC \citep{burnham:anderson:1999}; 
+
+\item regression diagnostics and goodness--of--fit tools;
+
+\item reduced-rank regression \citep{yee:hast:2003} for dimension 
+reduction;
+
+\item computational speed and robustness;
+
+\item choice of link functions;
+
+\item offsets and prior weights; and
+
+\item (specifically) when using \proglang{R}: generic functions 
+based on object oriented programming, e.g., \code{fitted()}, 
+\code{coef()}, \code{vcov()}, \code{summary()}, \code{predict()}, 
+\code{AIC()}, etc.
+\end{itemize}
+
+
+Our goal is to provide users with an easy-to-use object-oriented \pkg{VGAM} 
+structure, where four \code{family}-type functions based on the conditional
+likelihood are available to fit the eight models of \citet{otis:etal:1978}. 
+We aim to give the user additional tools and features,
+such as those listed above, to carry out a more informative and
+broader analysis of CR data; particularly when considering more than
+one covariate. Finally, this article primarily focuses on the technical 
+aspects of the proposed package, and less so on the biological interpretation 
+for CR experiments. The latter will be presented elsewhere.
+
+
+An outline of this article is as follows. In Section \ref{sec:cr} we
+present the conditional likelihood for CR models and a description of
+the eight \citet{otis:etal:1978} models. Section \ref{sec:vgam}
+summarizes pertinent details of VGLMs and VGAMs. Their connection to
+the CR models is made in Section \ref{sec:meth}. Software details 
+are given in Section \ref{sec:software}, and examples on real and 
+simulated data using the new software are demonstrated in 
+Section \ref{sec:body:exam}. Some final remarks are given in 
+Section \ref{sec:discussion}. The two appendices give some
+technical details relating to the first and second derivatives
+of the conditional log-likelihood, and the means.
+
+
+\begin{table}[tt]
+\centering
+\begin{tabular}{cl}
+\hline 
+\ \ \ Symbol \ \ \ & Explanation \\
+\hline
+% --------------------------------------
+$N$ & (Closed) population size to be estimated \\
+% --------------------------------------
+$n$ & Total number of distinct individuals caught in the trapping experiment \\
+% --------------------------------------
+$\tau$ & Number of sampling occasions, where $\tau \geq 2$ \\
+% --------------------------------------
+$\biy_i$ & Vector of capture histories for individual $i$ $(i=1,\ldots,n)$ with observed values\\
+& 1 (captured) and 0 (noncaptured). Each $\biy_i$ has at least one observed 1 \\
+% --------------------------------------
+``$h$'' & Model $\calM$ subscript, for heterogeneity \\
+% --------------------------------------
+``$b$'' & Model $\calM$ subscript, for behavioural effects \\
+% --------------------------------------
+``$t$'' & Model $\calM$ subscript, for temporal effects \\
+% --------------------------------------
+$p_{ij}$ & Probability that individual $i$ is captured at sampling occasion $j$ $(j=1,\ldots,\tau)$ \\
+% --------------------------------------
+$z_{ij}$ & $= 1$ if individual $i$ has been captured before occasion $j$,
+else $= 0$ \\
+% --------------------------------------
+$\btheta^{}$ & Vector of regression coefficients to be estimated related to $p_{ij}$
+\\
+% --------------------------------------
+$\boldeta$ & Vector of linear predictors (see Table \ref{tab2} 
+for further details)
+\\
+% --------------------------------------
+$g$ & Link function applied to, e.g., $p_{ij}$. Logit by default
+\\
+% --------------------------------------
+\hline
+\end{tabular}
+\caption{
+Short summary of the notation used for the positive-Bernoulli distribution
+for capture--recapture (CR) experiments. Additional details are in the text.
+\label{tab0}
+}
+\end{table}
+
+
+%*********************************************************************
+\section[Capture--recapture models]{Capture--recapture models}
+\label{sec:cr}
+
+
+In this section we give an outline for closed population CR models
+under the conditional likelihood/GLM approach. For further details 
+we recommend \citet{hugg:1991} and \citet{hugg:hwan:2011}. 
+The notation of Table \ref{tab0} is used throughout this article.
+
+
+% ---------------------------------------------------------------
+\subsection{Conditional likelihood}
+\label{sec:condlik}
+
+
+Suppose we have a closed population of $N$ individuals,
+labelled $i=1,\ldots,N$ and $\tau$ capture occasions 
+labelled $j=1,\ldots,\tau$. We make the usual assumptions that 
+individuals in the population behave independently of each other, 
+individuals do not lose their tags, and tags are recorded correctly. 
+Let $y_{ij}=1$ if the $i$th individual was caught on the $j$th 
+occasion and be zero otherwise, and let $n$ be the number of 
+distinct individuals captured.
+
+
+Let $p_{ij}$ denote the probability of capturing individual $i$ 
+on occasion $j$. As noted in Section \ref{sec:intro},
+\citet{otis:etal:1978} 
+describe eight models for the capture probabilities,
+see Section \ref{sec:8models} 
+for further details. Label the individuals captured in the experiment 
+by $i=1,\ldots,n$ and those never captured by $i=n+1,\ldots,N$. The full 
+likelihood is given by
+\begin{eqnarray}
+L_{f} & = & K \prod_{i=1}^{N}\prod_{j=1}^{\tau} p_{ij}^{y_{ij}}
+(1-p_{ij})^{1- y_{ij}} 
+\nonumber
+\\
+& = & K 
+\left\{\prod_{i=1}^{n}\prod_{j=1}^{\tau}p_{ij}^{y_{ij}} 
+(1-p_{ij})^{1 - y_{ij}}\right\}\cdot
+\left\{\prod_{i=n+1}^{N} \prod_{j=1}^{\tau} (1-p_{ij})\right\}
+\label{eq:posbern.uncondlikelihood}
+\end{eqnarray}
+where $K$ is independent of the $p_{ij}$ but may depend on $N$. The
+RHS of (\ref{eq:posbern.uncondlikelihood}) requires knowledge of the
+uncaptured individuals and in general cannot be computed. Consequently 
+no MLE of $N$ will be available unless some homogeneity assumption is 
+made about the noncaptured individuals. Instead, a conditional likelihood 
+function based only on the individuals observed at least once is
+\begin{eqnarray}
+\label{eq:posbern.condlikelihood}
+L_{c} & \propto & \prod_{i=1}^{n} 
+\frac{\prod\limits_{j=1}^{\tau} p_{ij}^{y_{ij}} (1-p_{ij})^{1 - y_{ij}}}
+{1-\prod_{s=1}^{\tau}(1-p_{is}^{\dagger})}.
+\end{eqnarray}
+is used. Here $p_{is}^{\dagger}$ are the $p_{ij}$ computed as if the
+individual had not been captured prior to $j$ so that the denominator 
+is the probability individual $i$ is captured at least once. This 
+conditional likelihood (\ref{eq:posbern.condlikelihood}) is a modified
+version of the likelihood corresponding to a positive-Bernoulli
+distribution \citep{patil:1962}.
+
+
+\renewcommand{\arraystretch}{1.2}
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{|c||c|c|c|c|}
+\hline
+Capture & \multicolumn{4}{c|}{Joint probability}\\
+\cline{2-5}
+history & \multicolumn{1}{c|}{$\calM_0$/$\calM_h$} 
+& \multicolumn{1}{c|}{$\calM_b$/$\calM_{bh}$}
+& \multicolumn{1}{c|}{$\calM_t$/$\calM_{th}$} 
+& \multicolumn{1}{c|}{$\calM_{tb}$/$\calM_{tbh}$} \\
+\hline
+01 & $(1-p) p$ & $(1-p_{c}) \, p_{c}$ & $(1-p_1) p_2$ 
+& $(1-p_{c1}) \, p_{c2}$ \\
+10 & $p(1-p)$ & $p_{c} (1-p_{r})$ & $p_1 (1-p_2)$ 
+& $p_{c1} (1-p_{r2})$ \\
+11 & $p^2$ & $p_{c} \, p_{r}$ & $p_1 \, p_2$ & $p_{c1} \, p_{r2}$ \\
+\hline
+00 & $(1-p)^2$ & $(1-p_{c})^2$ & $(1-p_1)(1-p_2)$ 
+& $(1-p_{c1})(1-p_{c2})$ \\
+\hline \hline
+$ M \equiv \dim(\boldeta)$ & 1 & 2 & 2 $(=\tau)$ 
+& 3 $(=2 \tau - 1)$ \\
+\hline
+\end{tabular}
+\end{center}
+\caption{Capture history sample space and corresponding probabilities 
+for the eight models of \citet{otis:etal:1978}, with $\tau=2$ capture occasions 
+in closed population CR experiment. Here, $p_{cj}=$ capture probability for 
+unmarked individuals at sampling period $j$, $p_{rj}=$ recapture 
+probability for marked individuals at sampling period $j$, and $p=$ 
+constant capture probability across $\tau=2$. Note that the ``00'' row 
+is never realized in sample data.}
+\label{tab1}
+\end{table}
+\renewcommand{\arraystretch}{1.0}
+
+
+% ---------------------------------------------------------------
+\subsection{The eight models}
+\label{sec:8models}
+
+
+Models which allow capture probabilities to depend on one or a 
+combination of time, heterogeneity or behavioural effects are defined 
+using appropriate subscripts, e.g., $\calM_{th}$ depends on time and 
+heterogeneity. These eight models have a nested structure 
+of which $\calM_{tbh}$ is the most general. The homogeneous 
+model $\calM_0$ is the simplest (but most unrealistic) and has equal 
+capture probabilities for each individual $H_0: p_{ij}=p$, regardless 
+of the sampling occasion. All eight models are GLMs, since the 
+conditional likelihood (\ref{eq:posbern.condlikelihood}) 
+belongs to the exponential family \citep{hugg:hwan:2011}.
+
+
+To illustrate the approach, we use the following toy example throughout,
+consider a CR experiment with two occasions---morning and evening
+(i.e., $\tau=2$), with capture 
+probabilities varying between the two occasions. Furthermore, suppose we
+have collected some individual covariates---weight and gender.
+The joint probabilities of all the eight models are listed in
+Table \ref{tab1}.  It can be seen that all but the positive-Binomial
+model ($\calM_{0}/\calM_{h}$) 
+require more than one probability and hence more than
+one linear predictor, so that the 
+original \cite{neld:wedd:1972} GLM framework is inadequate. Further, there 
+are two noteworthy points from Table \ref{tab1} which apply for 
+any $\tau\ge 2$:
+\begin{itemize}
+
+\item first, for $\calM_{t}$-type models, as $\tau$ increases
+so will the number of linear predictors and hence the potential
+number of parameters;
+ 
+\item secondly, it is evident that there are four main categories 
+consisting of non-heterogeneity models ($\calM_{0}$, $\calM_{b}$, $\calM_{t}$ 
+and $\calM_{tb}$), which are paired with a heterogeneity sub-model 
+(respectively $\calM_{h}$, $\calM_{bh}$, $\calM_{th}$ and $\calM_{tbh}$).
+
+\end{itemize}
+
+
+The four heterogeneity models allow for each individual to have 
+their own probability of capture/recapture. In our toy example, 
+the capture probabilities are dependent on an individual's weight 
+and gender. We discuss these models further in Section \ref{sec:vgam.basics}. 
+It is natural to consider individual covariates such as weight 
+and gender as linear/additive predictors. Let $x_{i}$ denote a 
+covariate (either continuous or discrete) for the $i$th individual, 
+which is constant across the capture occasions $j=1,\ldots,\tau$, 
+e.g., for continuous covariates one could use the first 
+observed value or the mean across all $j$. If there are $d-1$ covariates, 
+we write $\bix_i=(x_{i1},\ldots,x_{id})^{\top}$ with $x_{i1}=1$ if 
+there is an intercept. Also, let $g^{-1}(\eta)={\exp(\eta)}/\{{1+\exp(\eta)}\}$ 
+be the inverse \logit{} function. Consider model $\mathcal{M}_{tbh}$, then the
+capture/recapture probabilities are given as [notation follows
+Section \ref{sec:VGAMs.basics}]
+\begin{eqnarray*}
+p_{ij}^{\dagger} & = & g^{-1} \!
+\left(\qquad \quad \, \beta^*_{(j+1)1} + \bix_{i[-1]}^{\top} \, 
+\bbeta_{1[-1]}^{} \right), \qquad j=1,\ldots,\tau, \\
+p_{ij} & = & g^{-1} \!\left(\beta^*_{(1)1} + \beta^*_{(j+1)1} + 
+\bix_{i[-1]}^{\top} \,\bbeta_{1[-1]}^{} \right),\qquad j=2,\ldots,\tau,
+\end{eqnarray*}
+where $\beta^*_{(1)1}$ is the behavioural effect of prior capture, 
+$\beta^*_{(j+1)1}$ for $j=1,\ldots,\tau$ are time effects, 
+and $\bbeta_{1[-1]}$ are the remaining regression parameters 
+associated with the covariates. Computationally, the conditional 
+likelihood (\ref{eq:posbern.condlikelihood}) is maximized with 
+respect to all the parameters (denote by $\btheta{}$) by the 
+Fisher scoring algorithm using the derivatives given in 
+Appendix A.
+
+
+% ---------------------------------------------------------------
+\subsection[Estimation of N]{Estimation of $N$}
+\label{sec:Nhat}
+
+
+In the above linear models, to estimate $N$ let $\pi_{i}(\btheta)=1-\prod\limits_{s=1}^{\tau}(1-p_{is}^{\dagger})$ 
+be the probability that individual $i$ is captured at least once 
+in the course of the study. Then, if $\btheta$ is known, 
+the Horvitz--Thompson \citep[HT;][]{horv:thom:1952} estimator
+\begin{eqnarray}
+\label{eq:HT}
+\widehat{N}(\btheta) &=& \sum_{i=1}^{n} \; {\pi}_{i}(\btheta)^{-1}
+\end{eqnarray}
+is unbiased for the population size $N$ and an associated estimate of 
+the variance of $\widehat{N}(\btheta)$ is $s^2(\btheta) = \sum_{i=1}^{n} 
+\; {\pi}_{i}(\btheta)^{-2} \, \left[1-{\pi}_{i}(\btheta)\right]$. 
+If $\btheta$ is estimated by $\widehat{\btheta}$ then one can use
+\begin{eqnarray}
+\label{eq:est.varNhat2}
+\VAR\left(\widehat{N}(\widehat{\btheta}) \right) &  \approx  & 
+s^2(\widehat{\btheta}) + \widehat{\bid}^{\top} 
+\widehat{\VAR}(\widehat{\btheta}) \,\widehat{\bid}
+\end{eqnarray}
+where, following from a Taylor series expansion 
+of $\widehat{N}(\widehat{\btheta})$ 
+about $\widehat{N}(\btheta)$,
+\begin{eqnarray*}
+\bid\,  =  \, \frac{d N(\btheta)}{d \btheta}
+&  =  &\sum_{i=1}^n \; {\pi}_{i}(\btheta)^{-2} \; \,
+\frac{d {\pi}_{i}(\btheta)}{d \btheta} \\
+&  =  &\sum_{i=1}^n \; \frac{-1}{{\pi}_{i}(\btheta)^{2}} \; 
+\sum_{s=1}^{\tau} \; \left[\prod_{t=1,\ t \neq s}^{\tau}
+\left( 1 - p_{it}^{\dagger}\right)\right]
+\frac{\partial p_{is}^{\dagger}}{\partial \btheta}.
+\end{eqnarray*}
+
+
+%*********************************************************************
+\section[Vector generalized linear and additive models]{Vector generalized 
+linear and additive models}
+\label{sec:vgam}
+
+
+To extend the above linear models, we use VGLMs and VGAMs which we briefly 
+describe in this section. These models fit within a large statistical 
+regression framework which will be described in \citet{yee:2015}. 
+The details here are purposely terse; readers are directed
+to \citet{yee:2008,yee:2010} 
+for accessible overviews and examples, and \citet{yee:wild:1996} 
+and \citet{yee:hast:2003} for technical details.
+
+
+% ---------------------------------------------------------------
+\subsection[Basics]{Basics}
+\label{sec:vgam.basics}
+
+
+Consider observations on independent pairs $(\bix_i,\biy_i)$,
+$i=1,\ldots,n$. We use ``$[-1]$'' to delete the first element, 
+e.g., $\bix_{i[-1]} =(x_{i2},\ldots,x_{id})^{\top}$. For 
+simplicity, we will occasionally drop the subscript $i$ and simply 
+write $\bix =(x_{1},\ldots,x_{d})^{\top}$. Consider a single observation 
+where \biy{} is a $Q$-dimensional vector. For the CR models of this 
+paper, $Q=\tau$ when the response is entered as a matrix of 0s and 1s.
+The only exception is for the $\calM_0/\calM_h$ where the aggregated
+counts may be inputted, see Section \ref{sec:M0Mh}. VGLMs are defined 
+through the model for the conditional density
+\[
+f(\biy | \bix ; \bB)  =  f(\biy,\eta_1,\ldots,\eta_M)
+\]
+for some known function $f(\cdot)$, 
+where $\bB =(\bbeta_1 \,\bbeta_2 \,\cdots \,\bbeta_M)$ is 
+a $d\times M$ matrix of regression coefficients to be estimated.
+We may also write $\bB^{\top} = (\bbeta_{(1)} \,\bbeta_{(2)}\,\cdots\,
+\bbeta_{(d)})$ so that $\bbeta_j$ is the $j$th column of $\bB$ 
+and $\bbeta_{(k)}$ is the $k$th row.
+
+
+The $j$th linear predictor is then
+\begin{equation}
+\eta_j  =  \bbeta_j^{\top} \bix  =  \sum_{k=1}^d \beta_{(j)k} \, 
+x_{k},     j=1, \ldots, M,
+\label{gammod2}
+\end{equation}
+where $\beta_{(j)k}$ is the $k$th component of $\bbeta_j$.
+In the CR context, we remind the reader that,
+as in Table \ref{tab1}, we have $M=2$ for $\calM_{bh}$, 
+$M=\tau$ for $\calM_{th}$ and $M=2\tau-1$ for $\calM_{tbh}$.
+
+
+In GLMs the linear predictors are used to model the means.
+The $\eta_j$ of VGLMs model the parameters of a model. 
+In general, for a parameter $\theta_j$ we take
+\[
+\eta_j  =  g_j(\theta_j),     j=1,\ldots,M
+\]
+and we say $g_j$ is a parameter link function. Write
+\begin{equation}
+\boldeta_i  =  \left(\begin{array}{c}\eta_1(\bix_{i})\\
+\vdots \\
+\eta_M(\bix_{i})\end{array}\right)  = \bB^{\top} \bix_{i}  = 
+\left(\begin{array}{c}\bbeta_1^{\top} \bix_{i} \\\vdots \\
+\bbeta_M^{\top} \bix_{i}\end{array} \right).
+\label{eq:lin.pred}
+\end{equation}
+
+
+In practice we may wish to constrain the effect of a covariate to
+be the same for some of the $\eta_j$ and to have no effect for others.
+In our toy example, model $\calM_{th}$ with $\tau=M=2$, $d=3$, we have
+\begin{eqnarray*}
+\eta_1(\bix_i) & = & \beta_{(1)1} + \beta_{(1)2} \, x_{i2} + 
+\beta_{(1)3} \, x_{i3}, \\
+\eta_2(\bix_i) & = & \beta_{(2)1} + \beta_{(2)2} \, x_{i2} + 
+\beta_{(2)3} \, x_{i3},
+\end{eqnarray*}
+which correspond to $x_{i2}$ being the individual's weight 
+and $x_{i3}$ an indicator of gender say, then we have the 
+constraints $\beta_{(1)2}\equiv\beta_{(2)2}$
+and $\beta_{(1)3}\equiv\beta_{(2)3}$. Then, with ``${}^*$'' 
+denoting the parameters that are estimated,
+\begin{eqnarray*}
+\eta_1(\bix_i) & = & \beta^*_{(1)1} + \beta^*_{(1)2} \, x_{i2} + 
+\beta^*_{(1)3} \, x_{i3}, \\
+\eta_2(\bix_i) & = & \beta^*_{(2)1} + \beta^*_{(1)2} \, x_{i2} + 
+\beta^*_{(1)3} \, x_{i3}, \\
+\end{eqnarray*}
+and we may write
+\begin{eqnarray*}
+\boldeta(\bix_i) =
+\begin{pmatrix}\eta_1(\bix_i)\\
+\eta_2(\bix_i)\end{pmatrix}
+& = & \sum_{k=1}^3 \, \bbeta_{(k)} \, x_{ik}\\
+& = & \begin{pmatrix}\beta_{(1)1} & \beta_{(1)2} & \beta_{(1)3}\\
+\beta_{(2)1} & \beta_{(2)2} & \beta_{(2)3} \end{pmatrix}
+\begin{pmatrix}x_{i1}\\ x_{i2}\\ x_{i3} \end{pmatrix}\\
+& = &
+\begin{pmatrix}
+\beta^*_{(1)1} & \beta^*_{(1)2} & \beta^*_{(1)3}\\
+\beta^*_{(2)1} & \beta^*_{(1)2} & \beta^*_{(1)3}
+\end{pmatrix}
+\begin{pmatrix}x_{i1}\\ x_{i2}\\ x_{i3}\end{pmatrix}\\
+& = &
+\begin{pmatrix}1 & 0\\ 0 & 1\end{pmatrix} 
+\begin{pmatrix}
+\beta^*_{(1)1}\\ \beta^*_{(2)1}
+\end{pmatrix}
+x_{i1}+
+\begin{pmatrix}1\\1\end{pmatrix}
+\beta^*_{(1)2} \, x_{i2}+
+\begin{pmatrix}
+1\\
+1\end{pmatrix}
+\beta^*_{(1)3} \, x_{i3}\\
+& = & \sum_{k=1}^3 \, \bH_k \, \bbeta^*_{(k)} \, x_{ik}.
+\end{eqnarray*}
+We can also write this as (noting that $x_{i1}=1$)
+\begin{eqnarray*}
+\boldeta(\bix_i) & = & \begin{pmatrix}x_{i1} & 0 \\ 0 & x_{i1}
+\end{pmatrix} \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix} 
+\begin{pmatrix} \beta^*_{(1)1}\\ \beta^*_{(2)1} \end{pmatrix} + 
+\begin{pmatrix} x_{i2} & 0 \\ 0 & x_{i2} \end{pmatrix}
+\begin{pmatrix} 1 \\ 1\end{pmatrix}
+\beta^*_{(1)2} + \begin{pmatrix} x_{i3} & 0 \\ 0 & x_{i3} 
+\end{pmatrix}
+\begin{pmatrix}
+1 \\
+1
+\end{pmatrix}
+\beta^*_{(1)3}\\ 
+& = & \sum_{k=1}^3 \, \mathrm{diag}(x_{ik},x_{ik}) \, \bH_k \,\bbeta_{(k)}^{*}. 
+\end{eqnarray*}
+In general, for VGLMs, we represent the models as
+\begin{eqnarray}
+\boldeta(\bix_i) 
+& = & \sum_{k=1}^d \, \bbeta_{(k)} \, x_{ik} \nonumber \\
+& = & \sum_{k=1}^d \, \bH_k \, \bbeta_{(k)}^{*} \, x_{ik}
+\label{eq:constraints.VGLM}\\
+& = & \sum_{k=1}^d \, \mathrm{diag}(x_{ik},\ldots,x_{ik}) \,
+\bH_k \, \bbeta_{(k)}^{*}
+\nonumber
+\end{eqnarray}
+where $\bH_1,\bH_2,\ldots,\bH_d$ are known constraint matrices
+of full column-rank (i.e., rank \code{ncol}($\bH_k$)), $\bbeta_{(k)}^*$ 
+is a vector containing a possibly reduced set of regression coefficients.
+Then we may write
+\begin{equation}
+\label{eq:lin.coefs4}
+{\bB}^{\top}  =  
+\left( 
+\bH_1 \bbeta_{(1)}^* \; \; \; 
+\bH_2 \bbeta_{(2)}^* \;\;\; \cdots \;\;\; 
+\bH_d \bbeta_{(d)}^* 
+\right)
+\end{equation}
+as an expression of (\ref{eq:lin.pred}) concentrating on columns rather
+than rows. Note that with no constraints at all, all $\bH_k = \bI_M$
+and $\bbeta_{(k)}^*=\bbeta_{(k)}$. We need both (\ref{eq:lin.pred}) 
+and (\ref{eq:lin.coefs4}) since we focus on the $\eta_j$
+and at other times on the variables $x_{k}$. The constraint matrices 
+for common models are pre-programmed in \pkg{VGAM}
+and can be set up by using arguments such as \code{parallel} and \code{zero}
+found in \pkg{VGAM} family functions. Alternatively, there
+is the argument \code{constraints} where they may be explicitly
+inputted. Using \code{constraints} is less convenient but provides
+the full generality of its power.
+
+ 
+% ---------------------------------------------------------------
+\subsection[Handling time-varying covariates]{Handling time-varying covariates}
+\label{sec:xij}
+
+
+Often, the covariates may be time-varying, e.g., when using 
+temperature as a covariate, then a different value is observed and 
+measured for each occasion $j$ for $j=1,\dots,\tau$.
+Again, using our toy example with $M=2$, $d=3$,
+and $\tau=2$, suppose we have time-dependent covariates $\bix_{ij}$, $j=1,2$. 
+We may have the model
+\begin{eqnarray*}
+\eta_1(\bix_{i1}) & = & \beta^*_{(1)1} + \beta^*_{(1)2} \, x_{i21} +
+\beta^*_{(1)3}\, x_{i31},\\
+\eta_2(\bix_{i2}) & = & \beta^*_{(2)1} + \beta^*_{(1)2} \, x_{i22} +
+\beta^*_{(1)3}\, x_{i32},\\
+\end{eqnarray*}
+for the linear predictor on the two occasions. Here, $x_{ikt}$ is for 
+the $i$th animal, $k$th explanatory variable and $t$th time. We write this model as
+\begin{eqnarray*}
+\boldeta(\bix_{ij}) & = & \begin{pmatrix} x_{i11} & 0\\ 0 & x_{i12} \end{pmatrix}
+\begin{pmatrix} 1 & 0\\ 0 & 1\end{pmatrix} \begin{pmatrix} \beta^*_{(1)1}\\
+\beta^*_{(2)1} \end{pmatrix} + \begin{pmatrix} x_{i21} & 0\\ 0 & x_{i22}\end{pmatrix}
+\begin{pmatrix} 1\\ 1\end{pmatrix} \beta^*_{(1)2} + 
+\begin{pmatrix} x_{i31} & 0\\ 0 & x_{i32}\end{pmatrix}
+\begin{pmatrix} 1 \\ 1\end{pmatrix}
+\beta^*_{(1)3}\\ & = & \sum_{k=1}^3 \, \mathrm{diag}(x_{ik1},x_{ik2}) \,
+\bH_k\,\bbeta_{(k)}^{*}. 
+\end{eqnarray*}
+Thus to handle time-varying covariates one needs the \code{xij} facility of \pkg{VGAM} 
+(e.g., see Section \ref{sec:poz:posbernoulli.eg.hugg:1991}), which allows a covariate 
+to have different values for different $\eta_{j}$ through the general formula
+\begin{eqnarray}
+\boldeta(\bix_{ij})
+&  =  & \sum_{k=1}^{d}\, \mathrm{diag}(x_{ik1},\ldots,x_{ikM})\,
+\bH_k \,\bbeta_{(k)}^{*}=\sum_{k=1}^d \, 
+\bX^{\#}_{(ik)}\bH_k \,\bbeta_{(k)}^{*}
+\label{eq:vglimo:xij.vector.diag}
+\end{eqnarray}
+where $x_{ikj}$ is the value of variable $x_{k}$ for unit $i$ for $\eta_{j}$.
+The derivation of (\ref{eq:vglimo:xij.vector.diag}),
+followed by some examples are given in \cite{yee:2010}.
+Implementing this model requires specification 
+of the diagonal elements of the matrices $\bX^*_{ik}$ and we see
+its use in Section \ref{sec:poz:posbernoulli.eg.hugg:1991}.
+Clearly, a model may include a mix of time-dependent and
+time-independent covariates. 
+The model is then specified through the constraint matrices $\bH_k$ 
+and the covariate matrices $\bX^{\#}_{(ik)}$. Typically in CR experiments, 
+the time-varying covariates will be environmental effects. Fitting 
+time-varying individual covariates requires some interpolation when 
+an individual is not captured and is beyond the scope of the present 
+work.
+
+
+% ---------------------------------------------------------------
+\subsection[VGAMs]{VGAMs}
+\label{sec:VGAMs.basics}
+
+
+VGAMs replace the linear functions in (\ref{eq:constraints.VGLM})
+by smoothers such as splines. Hence, the central formula is
+\begin{equation}
+\boldeta_i  =  \sum_{k=1}^d \; \bH_k \, \bif_k^*(x_{ik})
+\label{eq:vgam}
+\end{equation}
+where $\bif_k^*(x_k) = (f_{k(1)}^*(x_k),\ldots,f_{k(M_k)}^*(x_k))^{\top}$
+is a vector of $M_k$ smooth functions of $x_k$, where $M_k=\mathtt{ncol}(\bH_k)$ 
+is the rank of the constraint matrix for $x_k$. Note that standard error bands 
+are available upon plotting the estimated component functions (details at \cite{yee:wild:1996}),
+e.g., see Figure \ref{fig:poz:deermice}.
+
+
+
+%*********************************************************************
+\section[VGLMs and VGAMs applied to CR data]{VGLMs and VGAMs applied to CR data}
+\label{sec:meth}
+
+
+In this section we merge the results of Sections \ref{sec:cr}
+and \ref{sec:vgam} to show how the eight models of \citet{otis:etal:1978} 
+can be fitted naturally within the VGLM/VGAM framework.
+
+
+% ---------------------------------------------------------------
+\subsection[Linear predictors and constraint matrices]{Linear predictors and constraint matrices}
+\label{sec:constraints}
+
+
+As in Section \ref{sec:vgam.basics}, we now write $\biy_i$ as the 
+capture history vector for individual $i$. Written technically, 
+$\biy_i \in (\{0,1\})^{\tau} \backslash\{\bzero_\tau\}$ so that 
+there is at least one 1 (capture). For simplicity let $p_c$ and $p_r$ 
+be the capture and recapture probabilities. Recall that the value 
+for $M$ will depend on the CR model type and the number of 
+capture occasions considered in the experiment,
+for example, consider model $\calM_b$ as in 
+Table \ref{tab1}, then $(\eta_1,\eta_2)=(g(p_c),g(p_r))$
+for some link function $g$, thus $M=2$. The upper half of Table \ref{tab2} 
+gives these for the eight \citet{otis:etal:1978} 
+models. The lower half of Table \ref{tab2} gives
+the names of the \pkg{VGAM} family function that fits those 
+models. They work very similarly to the \code{family} argument 
+of \code{glm()}, e.g.,
+
+
+
+<<label = example-posber, eval = FALSE, prompt = FALSE>>=
+vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+           family = posbernoulli.t, data = pdata)
+@
+
+
+
+is a simple call to fit a $\calM_{th}$ model. The response is a 
+matrix containing 0 and 1 values only, and three individual covariates
+are used here. The argument name \code{family} was chosen for not
+necessitating \code{glm()} users learning a new argument
+name; and the concept of error distributions as for the GLM
+class does not carry over for VGLMs. Indeed, \code{family} denotes some 
+full-likelihood specified statistical model worth fitting in its own right 
+regardless of an `error distribution' which may not make sense.
+Each family function has \code{logit()} as their default link, 
+however, alternatives such as \code{probit()} and \code{cloglog()} 
+are also permissible. Section \ref{sec:software} discusses the software side 
+of \pkg{VGAM} in detail, and Section \ref{sec:body:exam} gives more examples.
+
+
+
+As noted above, constraint matrices are used to simplify complex
+models, e.g., model $\calM_{tbh}$ into model $\calM_{th}$. The default 
+constraint matrices for the $\calM_{tbh}(\tau)$ model are given 
+in Table \ref{tab3}. These are easily constructed using the
+\code{drop.b}, \code{parallel.b} and \code{parallel.t}
+arguments in the family function. More generally, the $\bH_k$
+may be inputted using the \code{constraints} 
+argument---see \cite{yee:2008} and \cite{yee:2010} 
+for examples. It can be seen that the building blocks of 
+the $\bH_k$ are \bone, \bzero, \bI{} and \bOO. 
+This is because one wishes to constrain the effect of $x_k$ 
+to be the same for capture and recapture probabilities. In general, 
+we believe the $\bH_k$ in conjunction with (\ref{eq:vglimo:xij.vector.diag}) 
+can accommodate all linear constraints between the estimated regression 
+coefficients $\widehat{\beta}_{(j)k}$.
+
+
+
+For time-varying covariates models, the $M$ diagonal elements $x_{ikj}$
+in (\ref{eq:vglimo:xij.vector.diag}) correspond to the value of
+covariate $x_k$ at time $j$ for individual $i$. These are inputted
+successively in order using the \code{xij} argument, e.g., as in
+Section \ref{sec:poz:posbernoulli.eg.hugg:1991}.
+
+
+
+\clearpage
+% ---------------------------------------------------------------
+\subsection[Penalized likelihood and smoothing parameters]{Penalized 
+likelihood and smoothing parameters}
+\label{sec:gam}
+
+
+
+For each covariate $x_{k}$, the smoothness of each component 
+function $f^{*}_{(j)k}$ in (\ref{eq:vgam}) can be controlled 
+by the non-negative smoothing parameters $\lambda_{(j)k}$. 
+\cite{yee:wild:1994a} show that, when vector splines are used 
+as the smoother, the penalized conditional log-likelihood 
+\begin{eqnarray}
+\label{eq:posbern.pen.condlikelihood}
+\ell_p  \equiv \log\, L_p  =  \ell_c - \frac12 \sum_{k=1}^d 
+\sum_{j=1}^{\mathtt{ncol}(\mathbf{H}_k)}\,\lambda_{(j)k}\int_{a_k}^{b_k} 
+\left\{f^{*''}_{(j)k}(t) \right\}^2 {\rm d}t 
+\end{eqnarray}
+is maximized. Here, $\ell_c$ is the logarithm of the conditional likelihood 
+function (\ref{eq:posbern.condlikelihood}). 
+The penalized conditional likelihood (\ref{eq:posbern.pen.condlikelihood}) 
+is a natural extension of the penalty approach described 
+in \citet{gree:silv:1994} to models with multiple $\eta_j$.
+
+
+
+An important practical issue is to control for overfitting and 
+smoothness in the model. The \code{s()} function used within \code{vgam()} 
+signifies the smooth functions $f^{*}_{(j)k}$ estimated by vector splines, 
+and there is an argument \code{spar} for the smoothing parameters, 
+and a relatively small (positive) value will mean much flexibility and wiggliness. 
+As \code{spar} increases the solution converges to the least squares 
+estimate. More commonly, the argument \code{df} is used, and this is 
+known as the equivalent degrees of freedom (EDF). A value of unity 
+means a linear fit, and the default is the value 4 which affords
+a reasonable amount of flexibility.
+
+
+
+\renewcommand{\arraystretch}{1.3}
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{ll}
+\hline 
+\hline 
+% --------------------------------------
+Model       & $\bm{\eta}^{\top}$ \\
+\hline
+$\calM_{0}$/$\calM_{h}$ & $g(p)$ \\
+% --------------------------------------
+$\calM_{b}$/$\calM_{bh}$ & $(g(p_c), g(p_r))$ \\
+% --------------------------------------
+$\calM_{t}$/$\calM_{th}$ & $(g(p_{1}),\ldots,g(p_{\tau}))$ \\ 
+% --------------------------------------
+$\calM_{tb}$/$\calM_{tbh}$ \ \ \ &
+$(g(p_{c1}),\ldots,g(p_{c\tau}),g(p_{r2}),\ldots,g(p_{r\tau}))$ \\
+\hline
+% --------------------------------------
+% --------------------------------------
+\hline
+Model \ \ \ \ \ & \code{family =} \\
+\hline
+%--------------------------------------
+$\calM_{0}$/$\calM_{h}$ & \code{posbinomial(omit.constant = TRUE)} \\
+& \code{posbernoulli.b(drop.b = FALSE \mytilde{} 0)} \\
+& \code{posbernoulli.t(parallel.t = FALSE \mytilde{} 0)} \\
+& 
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 0, parallel.t = FALSE \mytilde{} 0)} \\
+% --------------------------------------
+$\calM_{b}$/$\calM_{bh}$ &
+\code{posbernoulli.b()} \\
+&
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 1, parallel.t = FALSE \mytilde{} 0)} \\
+% --------------------------------------
+$\calM_{t}$/$\calM_{th}$ &
+\code{posbernoulli.t()} \\
+ &
+\code{posbernoulli.tb(drop.b = FALSE \mytilde{} 0, parallel.t = FALSE \mytilde{} 1)} \\
+% --------------------------------------
+$\calM_{tb}$/$\calM_{tbh}$ \ \ \ &
+\code{posbernoulli.tb()} \\
+\hline
+% --------------------------------------
+\end{tabular}
+\end{center}
+\caption{Upper table gives the $\boldeta$ for the eight \citet{otis:etal:1978} 
+models. Lower table gives the relationships between the eight models 
+and function calls. See Table \ref{tab1} for definitions. 
+The $g=\logit$ link is default for all.\label{tab2}}
+\end{table}
+\renewcommand{\arraystretch}{1.0}
+
+
+%*********************************************************************
+\section[Software details for CR models in VGAM]{Software details for 
+CR models in \pkg{VGAM}}
+\label{sec:software}
+
+
+Having presented the conditional likelihood (\ref{eq:posbern.condlikelihood})
+and VGLMs/VGAMs for CR models, we further discuss the fitting in \pkg{VGAM}. 
+It is assumed that users are somewhat familiar with modelling in \proglang{R} 
+and using \code{glm()} class objects. \pkg{VGAM}, authored by TWY, uses S4 classes.
+In order to present the new \code{family} functions developed for \code{vglm()} 
+and \code{vgam()}, some additional preliminaries for \pkg{VGAM} are given below. 
+Version 0.9-4 or later is assumed, and the latest prerelease version is 
+available at \url{http://www.stat.auckland.ac.nz/ yee/VGAM/prerelease}.
+
+
+In \code{vglm()}/\code{vgam()}, both $\calM_0$ and $\calM_h$ 
+are serviced by \code{family = posbinomial()}, i.e., the 
+positive-binomial family. For models $\calM_{b}$, $\calM_{t}$ 
+and $\calM_{tb}$, each of these are serviced by their 
+corresponding \code{family = posbernoulli.}-type functions 
+as in Table \ref{tab2}. Formulas of the form \code{\mytilde{} 1} 
+correspond to $\calM_{0}$, $\calM_{b}$, $\calM_{t}$ and $\calM_{tb}$;
+otherwise they are $\calM_{h}$, $\calM_{bh}$,$\calM_{th}$ and $\calM_{tbh}$.
+
+
+Below we describe each of the eight models with their \pkg{VGAM}
+representation and their default values, we also give additional remarks.
+All eight models can be fit using \code{posbernoulli.tb()}, it is generally 
+not recommended as it is less efficient in terms of memory requirements and speed.
+
+
+\begin{table}[tt]
+\begin{center}
+\begin{tabular}{ccc}
+& \multicolumn{1}{c}{\code{ parallel.t}} 
+& \code{!parallel.t} \\[0.9em]
+\cline{2-3}
+% --------------------------------------
+\multicolumn{1}{r|}{\code{ parallel.b}} &
+\multicolumn{1}{r|}{
+%
+$\left(
+\begin{array}{ll}
+\bzero_\tau & \bone_\tau\\
+\bone _{\tau-1} \  & \bone_{\tau-1}
+\end{array} 
+\right)$,
+$\left(
+\begin{array}{l}
+\bone_\tau\\
+\bone_{\tau-1}
+\end{array} 
+\right)$ 
+}
+& % ---------------------------------------------
+\multicolumn{1}{r|}{ 
+$\left(
+\begin{array}{ll}
+\bzero_\tau & \bI_\tau\\
+\bone _{\tau-1} \  & \bI_{{\tau}[-1,]}
+\end{array} 
+\right)$,
+$\left(
+\begin{array}{l}
+\bI_\tau\\
+\bI_{{\tau}[-1,]}
+\end{array} 
+\right)$ 
+}
+\\[1.5em] % This gives problems
+% --------------------------------------
+\cline{2-3}
+% --------------------------------------
+\multicolumn{1}{r|}{\code{!parallel.b}} &
+\multicolumn{1}{r|}{
+$\left(
+\begin{array}{ll}
+\bOO_{\tau \times (\tau-1)} \  & \bone_{\tau}\\
+\bI_{\tau-1} & \bone_{\tau-1} 
+\end{array} 
+\right)$,
+%
+$\left(
+\begin{array}{l}
+\bone_\tau\\
+\bone_{\tau-1}
+\end{array} 
+\right)$ 
+}
+& % --------------------------------------
+\multicolumn{1}{r|}{ 
+$\left(
+\begin{array}{ll}
+\bOO_{\tau \times(\tau-1)} \  & \bI_{\tau}\\
+\bI_{\tau-1}& \bI_{{\tau}[-1,]}
+\end{array} 
+\right)$,
+%
+$\left(
+\begin{array}{l}
+\bI_\tau\\
+\bI_{{\tau}[-1,]}
+\end{array} 
+\right)$ 
+} 
+\\[1.5em]
+% --------------------------------------
+\cline{2-3}
+\end{tabular}
+\end{center}
+\caption{ For the general $\calM_{tbh}(\tau)$
+family \code{posbernoulli.tb()}, 
+the constraint matrices corresponding to the arguments \code{parallel.t},
+\code{parallel.b} and \code{drop.b}. In each cell the 
+LHS matrix is $\bH_k$ when \code{drop.b} is \code{FALSE} 
+for $x_k$. The RHS matrix is when \code{drop.b} is \code{TRUE} 
+for $x_k$; it simply deletes the left submatrix of $\bH_k$.
+These $\bH_k$ should be seen in light of Table \ref{tab2}.
+Notes:
+(i) the default for \code{posbernoulli.tb()}
+is $\bH_1 = $ the LHS matrix of the top-right cell
+and $\bH_k = $ the RHS matrix of the top-left cell; and
+(ii) $\bI_{{\tau}[-1,]} = (\bzero_{\tau-1} | \bI_{\tau-1})$.
+\label{tab3}}
+\end{table}
+
+
+% ---------------------------------------------------------------
+\subsection[Basic software details]{Basic software details}
+\label{sec:furthersoftware}
+
+
+All family functions except \code{posbinomial()} should have
+a $n\times\tau$ capture history matrix as the response, preferably 
+with column names. Indicators of the past capture of individual $i$, 
+defined as $z_{ij}$, are stored on \pkg{VGAM} objects as the \code{cap.hist1}
+component in the \code{extra} slot. Also, there is a component
+called \code{cap1} which indicates on which sampling occasion the
+first capture occurred.
+
+
+As will be illustrated in Section \ref{sec:poz:posbernoulli.eg.hugg:1991}, 
+a fitted CR object stores the point estimate for the population
+size estimator (\ref{eq:HT}), in the \code{extra} slot with
+component name \code{N.hat}. Likewise, its standard error (\ref{eq:est.varNhat2}) 
+has component name \code{SE.N.hat}. By default all the family functions
+return fitted values corresponding to the probabilities in the conditional 
+likelihood function (\ref{eq:posbern.condlikelihood}), however,
+Appendix B describes an alternative type of
+fitted value; the choice is made by the argument \code{type.fitted}, 
+and the fitted values are returned by the \code{fitted()} methods function.
+
+
+Notice that in Table \ref{tab2}, the \pkg{VGAM} family functions have 
+arguments such as \verb+parallel.b+ which may be assigned a logical or 
+else a formula with a logical as the response. If it is a single logical 
+then the function may or may not apply that constraint to the intercept. 
+The formula is the most general and some care must be taken with the 
+intercept term. Here are some examples of the syntax:
+\begin{itemize}
+\item \code{parallel.b = TRUE \mytilde{} x2} means a parallelism assumption is
+applied to variables $x_2$ and the intercept, since
+formulas include the intercept by default. 
+ 
+\item \code{parallel.b = TRUE \mytilde{} x2-1} means a parallelism assumption is
+applied to variable $x_2$ only.
+ 
+\item \code{parallel.b = FALSE \mytilde{} 0} means a parallelism assumption 
+is applied to every variable including the intercept.
+\end{itemize}
+
+
+% ---------------------------------------------------------------
+\subsection[Models M0/Mh]{Models $\calM_0$/$\calM_h$}
+\label{sec:M0Mh}
+
+
+For $\calM_0$/$\calM_h$, the defaults are given as
+
+
+<<label = poz-args-posbinomial>>=
+args(posbinomial)
+@ 
+
+
+
+Both models can alternatively be fitted using \code{posbernoulli.t()},
+\code{posbernoulli.b()} and\\ \code{posbernoulli.tb()} by setting the
+appropriate constrains/arguments (Table \ref{tab2}). For example, 
+setting \code{posbernoulli.t(parallel.t = FALSE \mytilde{} 0)}
+constrains all the $p_{j}$ to be equal.
+
+
+If comparing all eight models using \code{AIC()} or \code{BIC()} then 
+setting \code{omit.constant = TRUE} will allow for comparisons to be 
+made with the positive-Bernoulli functions given below. The reason is that 
+this omits the log-normalizing constant $\log{\tau \choose \tau{y}_i^{*}}$
+from its conditional log-likelihood so that it is comparable with the logarithm
+of (\ref{eq:posbern.condlikelihood}).
+
+
+An extreme case for $\calM_h$ is where $p_{ij} = p_i$ with $p_i$ being
+parameters in their own right \citep{otis:etal:1978}. While this could
+possibly be fitted by creating a covariate of the form \code{factor(1:n)} 
+there would be far too many parameters for comfort. Such an extreme 
+case is not recommended to avoid over-parameterization.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mt/Mth]{Models $\calM_t$/$\calM_{th}$}
+\label{sec:MtMth}
+
+
+<<label = poz-args-posbernoulli-t>>=
+args(posbernoulli.t)
+@ 
+
+
+
+Note that for $\calM_t$, capture probabilities are the same for each 
+individual but may vary with time, i.e., $H_0: p_{ij} = p_{j}$.
+One might wish to constrain the probabilities of a subset
+of sampling occasions to be equal by forming the appropriate
+constraint matrices.
+
+
+Argument \code{iprob} is for an optional initial value for the
+probability, however all \pkg{VGAM} family functions are self-starting
+and usually do not need such input.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mb/Mbh]{Models $\calM_b$/$\calM_{bh}$}
+\label{sec:MbMbh}
+
+
+<<label = poz-args-posbernoulli-b>>=
+args(posbernoulli.b)
+@
+
+
+
+Setting \code{drop.b = FALSE \mytilde{} 0} assumes 
+there is no behavioural effect and this reduces to $\calM_0$/$\calM_h$.
+The default constraint matrices are
+\[\bH_1 =\left(\begin{array}{cc}
+0 & 1 \\ 1 & 1 \\
+\end{array} \right), \ \ \ \ \bH_2 = \cdots = \bH_d =
+\left(\begin{array}{c}
+1 \\ 1 \\
+\end{array}\right)\]
+so that the first coefficient $\beta_{(1)1}^{*}$ corresponds to the 
+behavioural effect. Section \ref{sec:poz:posbernoulli.eg.ephemeral}
+illustrates how the VGLM/VGAM framework can handle short-term and
+long-term behavioural effects.
+
+
+% ---------------------------------------------------------------
+\subsection[Models Mtb/Mtbh]{Models $\calM_{tb}$/$\calM_{tbh}$}
+\label{sec:MtbMtbh}
+
+
+There are three arguments which determine whether there 
+are behavioural effects and/or time effects: \code{parallel.b},
+\code{parallel.t} and \code{drop.b}. The last two are as above.
+The defaults are
+
+
+
+<<label = poz-args-posbernoulli-tb>>=
+args(posbernoulli.tb)
+@
+
+
+
+One would usually want to keep the behavioural effect to be equal over
+different sampling occasions, therefore \code{parallel.b} should be
+normally left to its default. Allowing it to be \code{FALSE} for a
+covariate $x_k$ means an additional $\tau-1$ parameters, something
+that is not warranted unless the data set is very large and/or the
+behavioural effect varies greatly over time.
+
+
+
+Arguments \code{ridge.constant} and \code{ridge.power}
+concern the working weight matrices and are explained in 
+Appendix A. 
+
+
+
+Finally, we note that using
+
+
+
+<<label = poz-posbernoulli-tb-gen, prompt=FALSE, eval=FALSE>>=
+vglm(..., family = posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0,
+                                   drop.b = TRUE ~ 0))
+@
+
+
+
+fits the most general model. Its formula is effectively (\ref{gammod2})
+for $M=2\tau-1$, hence there are $(2\tau-1)d$ regression coefficients
+in total---far too many for most data sets.
+
+
+
+%*********************************************************************
+\newpage
+\section[Examples]{Examples}
+\label{sec:body:exam}
+
+
+We present several examples using \pkg{VGAM} on both real-life and
+simulated CR data.
+
+
+% ---------------------------------------------------------------
+\subsection[Deer mice]{Deer mice}
+\label{sec:deer.mice}
+
+
+Our first example uses a well-known data set analyzed in both \citet{hugg:1991} 
+and \citet{amst:mcdo:manl:2005}. The CR data was collected on the deer mouse 
+(\textit{Peromyscus maniculatus}), a small rodent native to North America, 
+and about 8 to 10 cm long, not counting the length of the tail. There 
+were $n=38$ individual mice caught over $\tau=6$ trapping occasions. 
+Individual body weight, sex and age (young or adult) were also recorded, 
+which we used as covariates to model heterogeneity. The data are given 
+in the following data frame \code{deermice}:
+
+
+<<label = eg-deermice-look>>=
+head(deermice, 4)
+@
+
+
+Each row represents the capture history followed by the corresponding 
+covariate values for each observed individual. We compared our 
+results with those given in \citet{hugg:1991}, who
+reported an analysis which involved fitting all eight model variations. 
+Prior to this we relabelled the age and sex covariates to match those
+given in \citet{hugg:1991}.
+
+
+<<label = example1-model>>=
+deermice <- within(deermice, {
+  age <- 2 - as.numeric(age) 
+  sex <- 1 - as.numeric(sex)
+})
+@
+
+
+Below we demonstrate model fitting for each model in \pkg{VGAM}:
+
+
+<<label = example2-model>>=
+M.0 <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1,
+            posbernoulli.t(parallel = TRUE ~ 1), data = deermice)
+M.b <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.b, data = deermice)
+M.t <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+            posbernoulli.t, data = deermice)
+M.h <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+            posbernoulli.t(parallel = TRUE ~ weight + sex + age), data = deermice)
+M.th <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.t, data = deermice)
+M.tb <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ 1, 
+             posbernoulli.tb, data = deermice)
+M.bh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+             posbernoulli.b, data = deermice)
+M.tbh <- vglm(cbind(y1, y2, y3, y4, y5, y6) ~ weight + sex + age,
+              posbernoulli.tb, data = deermice)
+@
+
+
+Notice that \code{parallel = TRUE} was used for models $\calM_{0}/\calM_{h}$. 
+Population size estimates with standard errors (SE), log-likelihood and 
+AIC values, can all be easily obtained using the following, 
+for example, consider model $\calM_{bh}$:
+
+
+<<label = eg-deermice-Nhat>>=
+c(M.bh at extra$N.hat, M.bh at extra$SE.N.hat)
+c(logLik(M.bh), AIC(M.bh))
+@
+
+
+We did this for each model, and obtained the following:
+
+
+<<maketable, echo=FALSE, results=hide, message=FALSE, warning=FALSE>>=
+
+Table <- rbind(c(round(M.tbh at extra$N.hat,2), 
+                 round(M.bh at extra$N.hat,2), 
+                 round(M.tb at extra$N.hat,2), 
+                 round(M.th at extra$N.hat,2), 
+                 round(M.h at extra$N.hat,2), 
+                 round(M.b at extra$N.hat,2),
+                 round(M.t at extra$N.hat,2), 
+                 round(M.0 at extra$N.hat,2)), 
+               
+              c(round(M.tbh at extra$SE.N.hat,2), 
+                round(M.bh at extra$SE.N.hat,2), 
+                round(M.tb at extra$SE.N.hat,2),
+                round(M.th at extra$SE.N.hat,2), 
+                round(M.h at extra$SE.N.hat,2), 
+                round(M.b at extra$SE.N.hat,2),
+                round(M.t at extra$SE.N.hat,2), 
+                round(M.0 at extra$SE.N.hat,2)), 
+               
+              -2*c(round(logLik(M.tbh),2), 
+                   round(logLik(M.bh),2), 
+                   round(logLik(M.tb),2), 
+                   round(logLik(M.th),2), 
+                   round(logLik(M.h),2), 
+                   round(logLik(M.b),2),
+                   round(logLik(M.t),2), 
+                   round(logLik(M.0),2)), 
+               
+              c(round(AIC(M.tbh),2), 
+                round(AIC(M.bh),2), 
+                round(AIC(M.tb),2), 
+                round(AIC(M.th),2),
+                round(AIC(M.h),2), 
+                round(AIC(M.b),2), 
+                round(AIC(M.t),2), 
+                round(AIC(M.0),2)));
+
+colnames(Table) <- c("M.tbh", "M.bh", "M.tb", 
+                     "M.th", "M.h", "M.b", "M.t", "M.0");
+rownames(Table) <- c("N.hat", "SE","-2ln(L)", "AIC");
+@
+
+
+
+<<label = example2-table>>=
+Table
+@
+
+
+Based on the AIC, it was concluded that $\calM_{bh}$ was 
+superior (although other criteria can also be considered), 
+yielding the following coefficients (as well as their SEs):
+
+
+<<label = poz-posbernoulli-eg-deermice-coefs>>=
+round(coef(M.bh), 2)
+round(sqrt(diag(vcov(M.bh))), 2)
+@
+
+
+which, along with the estimates for the population size, 
+agree with the results of \citet{hugg:1991}.
+The first coefficient, \Sexpr{round(coef(M.bh)[1],2)}, is positive 
+and hence implies a trap-happy effect.
+
+
+Now to illustrate the utility of fitting VGAMs, we performed some model
+checking on $\calM_{bh}$ by confirming that the component function
+of \code{weight} is indeed linear. To do this, we smoothed this covariate 
+but did not allow it to be too flexible due to the size of the data set.
+
+
+<<label = poz-posbernoulli-eg-deermice-smooth, fig.keep = 'none', message=FALSE, warning=FALSE>>=
+fit.bh <- vgam(cbind(y1, y2, y3, y4, y5, y6) ~ s(weight, df = 3) + sex + age,
+               posbernoulli.b, data = deermice)
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5)
+@
+
+
+Notice that the \code{s()} function was used to smooth over 
+the weight covariate with the equivalent degrees of freedom set 
+to 3. Plots of the estimated component functions against each 
+covariate are given in Figure \ref{fig:poz:deermice}.
+In general, \code{weight} does seem to have a (positive) 
+linear effect on the logit scale. Young deer mice
+appear more easily caught compared to adults,
+and gender seems to have a smaller effect than weight.
+A more formal test of linearity is
+
+
+<<label = poz-posbernoulli-eg-deermice-summary>>=
+summary(fit.bh)
+@ 
+
+
+and not surprisingly, this suggests there is no significant nonlinearity.
+This is in agreement with Section 6.1 of \citet{hwan:hugg:2011} 
+who used kernel smoothing.
+
+
+Section \ref{sec:poz:posbernoulli.eg.ephemeral}
+reports a further analysis of the \code{deermice} data using a
+behavioural effect comprising of long-term and
+short-term memory.
+
+
+<<label = poz-posbernoulli-eg-deermice-smooth-shadow, eval=FALSE, echo = FALSE, message=FALSE, warning=FALSE>>=
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+@
+
+
+
+
+
+% ---------------------------------------------------------------------
+\begin{figure}[tt]
+\begin{center}
+<<plot-deermice, width=6.0, height=5, echo=FALSE, message=FALSE, warning=FALSE>>=
+par(mfrow = c(2, 2))
+par(las = 1, cex = 1.1, mar = c(3.8, 4, 0.5, 0.2) + 0.1)
+par(mgp = c(2.3, 1, 0))  # Default is c(3, 1, 0)
+
+
+
+plot(fit.bh, se = TRUE, las = 1, lcol = "blue", scol = "orange",
+     rcol = "purple", scale = 5, mgp = c(2.0, 1, 0))
+
+# < < poz-posbernoulli-eg-deermice-smooth-shadow> >
+
+
+
+
+@
+\caption{Estimated component functions with approximate $\pm 2$ 
+pointwise SE bands fitting a $\calM_{bh}$-VGAM, using 
+the \code{deermice} data. The rugplot gives jittered values of 
+each covariate value $x_{ik}$.\label{fig:poz:deermice}
+}
+\end{center}
+\end{figure}
+
+
+% ---------------------------------------------------------------------
+
+
+<<birds91read, echo = FALSE>>=
+data("prinia", package = "VGAM")
+@
+
+
+\subsection[Yellow-bellied Prinia]{Yellow-bellied Prinia}
+\label{sec:bird}
+
+
+Our second example also uses a well-known and well-studied data 
+set collected on the Yellow-bellied Prinia (\textit{Prinia flaviventris}),
+a common bird species located in Southeast Asia. A CR experiment was 
+conducted at the Mai Po Nature Reserve in Hong Kong during 1991, 
+where captured individuals had their wing lengths measured and 
+fat index recorded. A total of $\tau=19$ weekly capture occasions 
+were considered, where $n=151$ distinct birds were captured. In previous
+studies, models $\calM_h$ and $\calM_{th}$ have both been fitted 
+to these data, where both wing length and fat index were used as
+covariates. We focus our attention on the former model, and considered
+the \code{posbinomial()} function, with some further emphasis on
+demonstrating smoothing on covariates. The \code{prinia} data
+consists of four columns and rows corresponding to each observed
+individual:
+
+
+<<label = example2a, size = "small">>=
+head(prinia, 4)[, 1:4]
+@
+
+
+The first two columns give the observed covariate values for each
+individual, followed by the number of times each individual was captured/not
+captured respectively (columns 3--4). Notice that the wing
+length (\code{length}) was standardized here.
+We considered smoothing over the wing length, 
+and now plotted the fitted capture probabilities with and 
+without fat content against wing length present, see Figure \ref{fig:bird}.
+
+
+<<label = example2b>>=
+M.h.GAM <- 
+  vgam(cbind(cap, noncap) ~ s(length, df = 3) + fat, 
+       posbinomial(omit.constant = TRUE, parallel = TRUE ~ s(length, df = 3) + fat),
+       data = prinia)
+M.h.GAM at extra$N.hat     
+M.h.GAM at extra$SE.N.hat  
+@
+
+
+<<label = eg-bird-smooth-shadow1, echo=FALSE, fig.keep = 'none', message = FALSE, warning = FALSE>>=
+plot.info <- plot(M.h.GAM,
+                  se = TRUE, las = 1, plot.arg = FALSE,
+                  lcol = "blue",
+                  scol = "orange",
+                  rcol = "purple",
+                  scale = 5)
+@
+
+
+
+<<label = eg-bird-smooth-shadow2, echo=FALSE, eval=FALSE>>=
+info.fit2 <- plot.info at preplot[[1]]
+fat.effect <- coef(M.h.GAM)["fat"] 
+intercept <- coef(M.h.GAM)["(Intercept)"]  
+
+ooo <- order(info.fit2$x)
+centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+
+plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+                                     centering.const + info.fit2$y[ooo],
+                        lin.pred.0 = intercept + fat.effect * 0 +
+                                     centering.const + info.fit2$y[ooo],
+                        x2 = info.fit2$x[ooo])
+
+plotframe <- transform(plotframe,
+                       up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+                       up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+
+plotframe <- transform(plotframe,
+                       fv.b    = logit(lin.pred.b,    inverse = TRUE),
+                       up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+                       lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+                       fv.0    = logit(lin.pred.0,    inverse = TRUE),
+                       up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+                       lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+
+with(plotframe,
+     matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+             lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+             main = "", ylab = "", xlab = "Wing length (standardized)"))
+mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+                         col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+       lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+@
+
+
+
+
+
+
+
+
+\begin{figure}[tt]
+\begin{center}
+<<plot-bird, width=6.0, height=5.5, echo=FALSE, message=FALSE, warning=FALSE>>=
+par(mfrow = c(1, 1))
+
+
+
+info.fit2 <- plot.info at preplot[[1]]
+fat.effect <- coef(M.h.GAM)["fat"] 
+intercept <- coef(M.h.GAM)["(Intercept)"]  
+
+ooo <- order(info.fit2$x)
+centering.const <- mean(prinia$length) - coef(M.h.GAM)["s(length, df = 3)"]
+
+plotframe <- data.frame(lin.pred.b = intercept + fat.effect * 1 +
+                                     centering.const + info.fit2$y[ooo],
+                        lin.pred.0 = intercept + fat.effect * 0 +
+                                     centering.const + info.fit2$y[ooo],
+                        x2 = info.fit2$x[ooo])
+
+plotframe <- transform(plotframe,
+                       up.lin.pred.b = lin.pred.b + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.b = lin.pred.b - 2*info.fit2$se.y[ooo],
+                       up.lin.pred.0 = lin.pred.0 + 2*info.fit2$se.y[ooo],
+                       lo.lin.pred.0 = lin.pred.0 - 2*info.fit2$se.y[ooo])
+
+plotframe <- transform(plotframe,
+                       fv.b    = logit(lin.pred.b,    inverse = TRUE),
+                       up.fv.b = logit(up.lin.pred.b, inverse = TRUE),
+                       lo.fv.b = logit(lo.lin.pred.b, inverse = TRUE),
+                       fv.0    = logit(lin.pred.0,    inverse = TRUE),
+                       up.fv.0 = logit(up.lin.pred.0, inverse = TRUE),
+                       lo.fv.0 = logit(lo.lin.pred.0, inverse = TRUE))
+
+with(plotframe,
+     matplot(x2, cbind(up.fv.b, fv.b, lo.fv.b), type = "l", col = "blue",
+             lty = c(2, 1, 2), las = 1, cex.lab = 1.5, lwd = 2,
+             main = "", ylab = "", xlab = "Wing length (standardized)"))
+mtext( ~ hat(p), side = 2, cex = 1.4, line = 4, adj = 0.5, las = 1)
+with(plotframe, matlines(x2, cbind(up.fv.0, fv.0, lo.fv.0),
+                         col = "darkorange", lty = c(2, 1, 2)), lwd = 2)
+legend("topleft", legend = c("Fat present", "Fat not present"), bty = "n",
+       lwd = 2, col = c("blue", "darkorange"), merge = TRUE, cex = 1.5)
+
+
+
+# < < eg-bird-smooth-shadow2 > >
+    
+    
+    
+@
+\caption{
+Capture probability estimates with approximate $\pm 2$ pointwise SEs, 
+versus wing length with (blue) and without (orange) fat content present 
+fitting a $\calM_{h}$-VGAM, using the \code{prinia} data. 
+Notice that the standard errors are wider at the boundaries.
+\label{fig:bird}
+}
+\end{center}
+\end{figure}
+
+
+Both the estimates for the population size and shape of the fitted 
+capture probabilities with smoothing (Figure \ref{fig:bird}) matched 
+those in previous studies, e.g., see Figure 1 of \citet{hwan:hugg:2007}. 
+Notice that capture probabilities are larger for individuals with 
+fat content present, also the approximate $\pm 2$ pointwise SEs 
+become wider at the boundaries---this feature is commonly seen
+in smooths.
+
+
+% ------------------------------------------------------------
+\subsection[A time-varying covariate example]{A time-varying covariate example}
+\label{sec:poz:posbernoulli.eg.hugg:1991}
+
+
+To illustrate time-varying covariates in the $\calM_{th}$ 
+and $\calM_{tbh}$ model via the \code{xij} argument, we 
+mimicked the results of \citet{hugg:1989} who fitted the $\calM_{tbh}$ 
+model to a small simulated data set of $n=18$ observed individuals 
+and $\tau=10$ trapping occasions. To help illustrate the procedure 
+we also fitted model $\calM_{th}$. The true population was $N=20$. 
+For the $i$th individual, model $\calM_{th}$ will be written 
+as ($i=1,\ldots,18$, $j=1,\ldots,10$)
+\begin{eqnarray}
+\label{eq:huggins89t0}
+\logit \, p_{ij} & = & \beta_{(1)1}^{*} + \beta_{(1)2}^{*} 
+\cdot \mathtt{x2}_{i} + \beta_{(1)3}^{*} \cdot \mathtt{x3}_{j}, \ \ %%
+\end{eqnarray}
+and model $\calM_{tbh}$ will be written as ($i=1,\ldots,18$, $j=1,\ldots,10$)
+\begin{eqnarray}
+\label{eq:huggins89t1}
+\logit \, p_{ij} &=&
+\beta_{(1)1}^{*} \,z_{ij} + \beta_{(2)1}^{*} + \beta_{(1)2}^{*} 
+\cdot \mathtt{x2}_{i} + \beta_{(1)3}^{*} \cdot \mathtt{x3}_{j}, \ \ \ %%
+\end{eqnarray}
+where $\beta_{(1)1}^{*}$ in (\ref{eq:huggins89t1}) is the behavioural 
+effect, and $z_{ij}$ is defined in Table \ref{tab0}. Variable \code{x2} 
+is an ordinary individual covariate such as weight, as in the previous 
+examples. The variable \code{x3} is a time-varying or occasion-specific 
+covariate such as temperature or daily rainfall that is handled using 
+the \code{xij} argument described in Section \ref{sec:xij}. Note that 
+the environmental covariates are involved in the $\eta_j$ for individuals 
+that have not been and have been previously captured so that if behavioural
+response is included in the model (e.g., $\calM_{tbh}$) these must be
+repeated to construct the overall model matrix. Also,
+note that there can be no recaptures on the first occasion so that
+the environmental variable for this occasion 
+need not be repeated. We first examined the data
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-data>>=
+head(Huggins89table1, 4)
+@
+
+
+The time-varying/occasion-specific covariate variable \code{x3} 
+is represented by variables \code{t01}--\code{t10}. As noted above, we need 
+to construct the \code{T02}--\code{T10} to model the recapture probabilities 
+through $\eta_{j}$ for $j=11,\ldots,19$
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-look>>=
+Hdata <- transform(Huggins89table1, x3.tij = t01,
+                   T02 = t02, T03 = t03, T04 = t04, T05 = t05, T06 = t06,
+                   T07 = t07, T08 = t08, T09 = t09, T10 = t10)
+Hdata <- subset(Hdata,
+                y01 + y02 + y03 + y04 + y05 + y06 + y07 + y08 + y09 + y10 > 0)
+@
+  
+  
+The last step deletes the two observations which were never caught, such that $n=18$. 
+Thus model (\ref{eq:huggins89t0}) can be fitted by
+
+
+<<label = poz-posbernoulli-th-huggins89t0-fit>>=
+fit.th <-
+   vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+        xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                            t09 + t10 - 1),
+        posbernoulli.t(parallel.t = TRUE ~ x2 + x3.tij), 
+        data = Hdata, trace = FALSE, 
+        form2 = ~ x2 + x3.tij + t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 +
+                                t09 + t10)
+@
+
+
+The \code{form2} argument is required if \code{xij} is used 
+and it needs to include all the variables in the model. It is from
+this formula that a very large model matrix is constructed, from which
+the relevant columns are extracted to construct the diagonal matrix
+in (\ref{eq:vglimo:xij.vector.diag}) in the specified order of diagonal 
+elements given by \code{xij}. Their names need to be uniquely specified.
+To check the constraint matrices we can use
+
+
+<<label = poz-posbernoulli-th-huggins89t0-constraints>>=
+constraints(fit.th, matrix = TRUE)
+@
+
+
+Model (\ref{eq:huggins89t1}) can be fitted by
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-fit>>=
+fit.tbh <-
+  vglm(cbind(y01, y02, y03, y04, y05, y06, y07, y08, y09, y10) ~  x2 + x3.tij,
+       xij = list(x3.tij ~ t01 + t02 + t03 + t04 + t05 + t06 +
+                                 t07 + t08 + t09 + t10 +
+                                 T02 + T03 + T04 + T05 + T06 +
+                                 T07 + T08 + T09 + T10 - 1),
+       posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+       data = Hdata, trace = FALSE,
+       form2 = ~  x2 + x3.tij +
+                  t01 + t02 + t03 + t04 + t05 + t06 + t07 + t08 + t09 + t10 +
+                        T02 + T03 + T04 + T05 + T06 + T07 + T08 + T09 + T10)
+@
+
+
+To compare with model (\ref{eq:huggins89t0}) we have
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-aic>>=
+c(logLik(fit.th), AIC(fit.th))
+c(logLik(fit.tbh), AIC(fit.tbh))
+@
+
+
+so that the behavioural response model does indeed give a better fit. 
+To check, the constraint matrices are (cf., Table \ref{tab3})
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-constraints>>=
+head(constraints(fit.tbh, matrix = TRUE), 4)
+tail(constraints(fit.tbh, matrix = TRUE), 4)
+@
+
+
+The coefficients $\widehat{\beta}_{(j)k}^{*}$ and their standard errors are
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-coefs>>=
+coef(fit.tbh)
+sqrt(diag(vcov(fit.tbh))) 
+@
+
+
+The first coefficient, \Sexpr{round(coef(fit.tbh)[1], 2)}, is positive and
+hence implies a trap-happy effect. The Wald statistic for the behavioural effect,
+being \Sexpr{round(c(coef(fit.tbh) / sqrt(diag(vcov(fit.tbh))))[1], 2)},
+suggests the effect is real.
+
+
+Estimates of the population size can be obtained from
+
+
+<<label = poz-posbernoulli-tb-huggins89t1-Nhat>>=
+fit.tbh at extra$N.hat   
+fit.tbh at extra$SE.N.hat
+@
+
+
+This compares with $\widehat{N}=20.86$ with a standard error 
+of $4.51$ \citep{hugg:1989}.
+
+
+In closing, we refit model \code{fit.tbh} using
+\code{Select()} to illustrate the avoidance of
+manual specification of cumbersome formulas and response matrices
+with many columns. For example, suppose \code{pdata} is a data frame
+with columns \code{y01}, \code{y02}, \ldots, \code{y30}.
+Then \code{Select(pdata, "y")} will return the matrix 
+\code{cbind(y01, y02, \ldots, y30)} if there are no other 
+variables beginning with \code{"y"}.
+
+
+Starting with \code{Huggins89table1}, the following
+code works quite generally provided the original variables
+are labelled as \code{y01}, \code{y02}, \ldots,
+and \code{t01}, \code{t02}, \ldots.
+The code makes a copy of \code{cbind(t01,\ldots,t10)}
+for the capture probabilities
+and calls the variables \code{cbind(T01,\ldots,T10)}
+for the recapture probabilities.
+Also, \code{Form2} contains more variables than what is needed.
+
+
+<<label = poz-posbernoulli-tbh-huggins89t1-fit-Select, eval=T>>=
+Hdata <- subset(Huggins89table1, rowSums(Select(Huggins89table1, "y")) > 0)
+Hdata.T <- Select(Hdata, "t")
+colnames(Hdata.T) <- gsub("t", "T", colnames(Hdata.T))
+Hdata <- data.frame(Hdata, Hdata.T)
+Hdata <- transform(Hdata, x3.tij = y01)
+Form2 <- Select(Hdata, prefix = TRUE, as.formula = TRUE)
+Xij   <- Select(Hdata, c("t", "T"), as.formula = TRUE,
+                sort = FALSE, rhs = "0", lhs = "x3.tij", exclude = "T01")
+fit.tbh <- vglm(Select(Hdata, "y") ~ x2 + x3.tij,
+                form2 = Form2,  xij = list(Xij),
+                posbernoulli.tb(parallel.t = TRUE ~ x2 + x3.tij),
+                data = Hdata, trace = FALSE)
+coef(fit.tbh)
+@
+
+
+Note that this illustrates the ability to enter a matrix response without 
+an explicit \code{cbind()}, e.g., \code{Y <- Select(Hdata, "y")} and the 
+invocation \code{vglm(Y \mytilde{}} $\cdots$\code{)} would work as well.
+However, the utility of \code{cbind()} encourages the use of column names, 
+which is good style and avoids potential coding errors.
+
+
+% ------------------------------------------------------------
+\subsection[Ephemeral and enduring memory]{Ephemeral and enduring memory}
+\label{sec:poz:posbernoulli.eg.ephemeral}
+
+
+\cite{yang:chao:2005} consider modelling the behavioural effect
+with both enduring (long-term) and ephemeral (short-term) memory components.
+For example, the short-term component depends on whether or not the animal
+was caught on the most recent sampling occasion. We call this a lag-1 effect.
+In the example of this section, which combines aspects of
+Sections \ref{sec:deer.mice} 
+and \ref{sec:poz:posbernoulli.eg.hugg:1991}, we illustrate how this
+may be easily achieved
+within the VGLM framework; it is another case of using the \code{xij}
+argument. We retain the enduring component as with the $\calM_{tbh}$:
+$\bH_1$ contains a column that applies to all the recapture probabilities. 
+For simplicity, we first consider a lag-1 effect only
+(as in \cite{yang:chao:2005}) 
+for the short-term component.
+
+
+In the following, we fit a $\calM_{tbh}$ model to \code{deermice}
+with both long-term and short-term effects:
+\begin{eqnarray*}
+\mathrm{logit}\, p_{cs} &=&
+\beta_{(2)1}^{*} + 
+\beta_{(1)2}^{*} \, \mathtt{sex} +
+\beta_{(1)3}^{*} \, \mathtt{weight},
+\\
+\mathrm{logit}\, p_{rt} &=&
+\beta_{(1)1}^{*} + \beta_{(2)1}^{*} + 
+\beta_{(1)2}^{*} \, \mathtt{sex} +
+\beta_{(1)3}^{*} \, \mathtt{weight} +
+\beta_{(1)4}^{*} \, y_{t-1},
+\end{eqnarray*}
+where $s=2,\ldots,\tau$, $t=1,\ldots,\tau$ and $\tau=6$.
+
+
+<<label = poz-posbernoulli-bh-ephemeral-method1>>=
+deermice <- transform(deermice, Lag1 = y1)
+M.tbh.lag1 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag1 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = ~ sex + weight + Lag1 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + y2 + y3 + y4 + y5 + y6,
+       data = deermice)
+coef(M.tbh.lag1)
+@ 
+
+
+The coefficient of \code{Lag1}, \Sexpr{round(coef(M.tbh.lag1)["Lag1"], dig = 4)},
+is the estimated ephemeral effect $\widehat{\beta}_{(1)4}^{*}$.
+The estimated enduring effect $\widehat{\beta}_{(1)1}^{*}$
+has value \Sexpr{round(coef(M.tbh.lag1)["(Intercept):1"], dig = 4)}.
+Note that the \code{fill()} function is used to create 6 variables
+having 0 values, i.e., $\bzero_n$.
+
+
+There is an alternative method to fit the above model; here we 
+set $\bH_{\mathtt{Lag1}} = (\bzero_{\tau}^{\top}, \bone_{\tau-1}^{\top})^{\top}$
+and the variables \code{fill(y1)},\ldots,\code{fill(y6)}
+can be replaced by variables that do not need to be 0.
+Importantly, the two methods have $\bX^{\#}_{(ik)}\bH_k$ in (\ref{eq:vglimo:xij.vector.diag})
+being the same regardless. The second alternative method requires 
+constraint matrices to be inputted using the \code{constraints} argument.
+For example,
+
+
+<<label = poz-posbernoulli-bh-ephemeral-method2>>=
+deermice <- transform(deermice, Lag1 = y1)
+deermice <- transform(deermice, f1 = y1, f2 = y1, f3 = y1, f4 = y1,
+                                f5 = y1, f6 = y1)
+tau <- 6
+H2 <- H3 <- cbind(rep(1, 2*tau-1))
+H4 <- cbind(c(rep(0, tau), rep(1, tau-1)))
+M.tbh.lag1.method2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag1,
+       posbernoulli.tb(parallel.b = TRUE ~ 0, parallel.t = TRUE ~ 0),
+       constraints = list("(Intercept)" = cbind(H4, 1), sex = H2, weight= H3, 
+                          Lag1 = H4),
+       xij = list(Lag1 ~ f1 + f2 + f3 + f4 + f5 + f6 +
+                         y1 + y2 + y3 + y4 + y5),
+       form2 = Select(deermice, prefix = TRUE, as.formula = TRUE),
+       data = deermice)
+coef(M.tbh.lag1.method2)
+@ 
+
+
+is identical. In closing, it can be noted that
+more complicated models can be handled.
+For example, the use of \code{pmax()} to handle lag-2 effects
+as follows.
+
+
+<<label = poz-posbernoulli-bh-ephemeral-lag2>>=
+deermice <- transform(deermice, Lag2 = y1)
+M.bh.lag2 <-
+  vglm(cbind(y1, y2, y3, y4, y5, y6) ~ sex + weight + Lag2,
+       posbernoulli.tb(parallel.t = FALSE ~ 0,
+                       parallel.b = FALSE ~ 0,
+                       drop.b = FALSE ~ 1),
+       xij = list(Lag2 ~ fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                         fill(y5) + fill(y6) +
+                         y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                         pmax(y4, y5)),
+       form2 = ~ sex + weight + Lag2 +
+                 fill(y1) + fill(y2) + fill(y3) + fill(y4) +
+                 fill(y5) + fill(y6) +
+                 y1 + pmax(y1, y2) + pmax(y2, y3) + pmax(y3, y4) + 
+                 pmax(y4, y5) + y6,
+       data = deermice)
+coef(M.bh.lag2)
+@ 
+
+
+Models with separate lag-1 and lag-2 effects may also be similarly estimated as above.
+
+
+
+
+
+
+
+%*********************************************************************
+\section[Discussion]{Discussion}
+\label{sec:discussion}
+
+
+We have presented how the VGLM/VGAM framework naturally handles the 
+conditional-likelihood and closed population CR models in a GLM-like
+manner. Recently, \citet{stok:2011} proposed a partial likelihood approach for
+heterogeneous models with covariates. There, the recaptures of the 
+observed individuals were modelled, which yielded a binomial distribution, 
+and hence a GLM/GAM framework in \proglang{R} is also possible. However, some 
+efficiency is lost, as any individuals captured only once on the last 
+occasion are excluded. The advantage of partial likelihood is that the 
+full range of GLM based techniques, which includes more than GAMs, 
+are readily applicable. \citet{hugg:hwan:2007,hwan:hugg:2011} 
+and \citet{stok:hugg:2012} implemented smoothing on covariates for more 
+general models, however these methods required implementing sophisticated 
+coding for estimating the model parameters. \citet{zwane:2004} also used 
+the \pkg{VGAM} package for smoothing and CR data, but considered multinomial
+logit models as an alternative to the conditional likelihood. We 
+believe the methods here, based on spline smoothing and classical GAM, 
+are a significant improvement in terms of ease of use, capability and 
+efficiency.
+
+
+When using any statistical software, the user must take a 
+careful approach when analyzing and interpreting their output data. 
+In our case, one must be careful when estimating the population via 
+the HT estimator. Notice that (\ref{eq:HT}) is a sum of the reciprocal of 
+the estimated capture probabilities seen at least 
+once, $\widehat{\pi}_{i}(\btheta)$. Hence, for very 
+small $\widehat{\pi}_{i}(\btheta)$, the population size estimate may 
+give a large and unrealistic value (this is also apparent when 
+using the \pkg{mra} package and \pkg{Rcapture} which gives the 
+warning message: \code{The abundance estimation for this model 
+can be unstable}). To avoid this, \citet{stok:hugg:2012} 
+proposed a robust HT estimator which places a lower bound 
+on $\widehat{\pi}_{i}(\btheta)$ to prevent it from giving 
+unrealistically large values. In \pkg{VGAM}, a warning similar 
+to \pkg{Rcapture} is also implemented, and there are 
+arguments to control how close to 0 ``very small'' is and to 
+suppress the warning entirely.
+
+
+There are limitations for $\calM_{h}$-type models, in that they rely 
+on the very strong assumption that all the heterogeneity is explained 
+by the unit-level covariates. This assumption is often not true,
+see, e.g., \cite{rive:bail:2014}. To this end, a proposal is to add
+random-effects to the VGLM class. This would result in the VGLMM class 
+(``M'' for mixed) which would be potentially very useful if developed
+successfully. Of course, VGLMMs would contain
+GLMMs \citep{mccu:sear:neuh:2008} as a special case.
+Further future implementations also include: 
+automatic smoothing parameter selection (via, say, generalized cross 
+validation or AIC); including a bootstrap procedure as an alternative 
+for standard errors.
+
+
+GAMs are now a standard statistical tool in the modern data analyst's
+toolbox. With the exception of the above references, CR analysis has 
+since been largely confined to a few regression coefficients (at most), 
+and devoid of any data-driven exploratory analyses involving graphics. 
+This work has sought to rectify this need by introducing GAM-like 
+analyses using a unified statistical framework. Furthermore, the 
+functions are easy to use and often can be invoked by a single 
+line of code. Finally, we believe this work is a substantial 
+improvement over other existing software for closed population 
+estimation, and we have shown \pkg{VGAM}'s favourable speed and 
+reliability over other closed population CR \proglang{R}-packages.
+
+
+%*********************************************************************
+\section*{Acknowledgements}
+
+
+We thank the reviewers for their helpful feedback that led to substantial 
+improvements in the manuscript. TWY thanks Anne Chao for a helpful 
+conversation, and the Department of Mathematics and Statistics at 
+the University of Melbourne for hospitality, during a sabbatical visit 
+to Taiwan and a workshop, respectively. Part of his work was also 
+conducted while as a visitor to the Institute of Statistical Science, 
+Academia Sinica, Taipei, during October 2012. JS visited TWY on the 
+Tweedle-funded Melbourne Abroad Travelling Scholarship, the 
+University of Melbourne, during September 2011. All authors would 
+also like to thank Paul Yip for providing and giving permission for 
+use of the \code{prinia} data set, and Zachary Kurtz for some helpful 
+comments.
+
+
+
+
+\bibliography{./crVGAM}
+
+
+
+
+%*********************************************************************
+\section*{Appendix A: Derivatives}
+\label{sec:posbernoulli.technical}
+
+
+We give the first and (expected) second derivatives of the models. 
+Let $z_{ij}= 1$ if individual $i$ has been captured before occasion $j$,
+else $=0$. Also, let $p_{cj}$ and $p_{rj}$ be the probability that an
+individual is captured/recaptured at sampling occasion $j$, 
+and $Q_{s:t} = \prod\limits_{j=s}^{t} (1-p_{cj})$.
+Occasionally, subscripts $i$ are omitted for clarity.
+\cite{hugg:1989} gives a general form for the derivatives of the
+conditional likelihood (\ref{eq:posbern.condlikelihood}).
+
+
+For the $\calM_{tbh}$, the score vector is 
+\begin{eqnarray*}
+\frac{\partial \ell_{i}}{\partial p_{cj}^{}} 
+&  =  & (1 - z_{ij})
+\left[\frac{y_{ij}}{p_{cj}^{}} - \frac{1-y_{ij}}{1-p_{cj}^{}} \right] -
+\frac{Q_{1:\tau} / (1-p_{cj}^{})}{1-Q_{1:\tau}},\ \ \ \ j=1,\ldots,\tau,\\
+\frac{\partial \ell_{i}}{\partial p_{rj}^{}}
+&  =  & z_{ij} \left[\frac{ y_{ij}}{ p_{rj}^{}} - \frac{1-y_{ij}}{1-p_{rj}^{}} \right],
+\ \ \ \ j=2,\ldots,\tau,
+\end{eqnarray*}
+and the non-zero elements of the expected information matrix (EIM)
+can be written
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{cj}^{2}}\right)
+& = &
+\frac{Q_{1:(j-1)}}{1 - Q_{1:\tau}} \left\{\frac{1}{p_{cj}} + 
+\frac{1 - Q_{(j+1):\tau}}{1 - p_{cj}}\right\} -
+\left(\frac{\partial Q_{1:\tau} / \partial p_{cj}}{1-Q_{1:\tau}}\right)^2\\
+& = & \frac{1}{(1 - p_{cj})^2 (1 - Q_{1:\tau})} 
+\left\{\frac{Q_{1:j}}{p_{cj}} -\frac{Q_{1:\tau}}{1 - Q_{1:\tau}} \right\}, \\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{rj}^{2}}\right)
+& = &\frac{1-Q_{1:j}/(1-p_{cj})}{p_{rj}(1-p_{rj})(1 - Q_{1:\tau})},\\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{cj} \,\partial p_{ck} }\right)
+& = & 
+\displaystyle{\frac{\displaystyle{ -\frac{\partial Q_{1:\tau}}{\partial p_{cj}^{}} 
+\frac{\partial Q_{1:\tau}}{\partial p_{ck}^{}} }}{(1-Q_{1:\tau})^2}} - 
+\displaystyle{\frac{\displaystyle{\frac{\partial^2 Q_{1:\tau}}{\partial p_{cj}^{} \, 
+\partial p_{ck}^{}}}}{(1-Q_{1:\tau})}},\ \ \ j\neq k,
+\end{eqnarray*}
+where $\partial Q_{1:\tau} / \partial p_{cj}^{} = -Q_{1:\tau} / (1-p_{cj})$ 
+and $\partial^2 Q_{1:\tau} / (\partial p_{cj} \, \partial p_{ck}) = 
+Q_{1:\tau} / \{(1-p_{cj})(1-p_{ck})\}$.
+
+
+Arguments \code{ridge.constant} and \code{ridge.power}
+in \code{posbernoulli.tb()} add a ridge parameter to the first $\tau$
+EIM diagonal elements, i.e., those for $p_{cj}$. This ensures that
+the working weight matrices are positive-definite, and is needed
+particularly in the first few iteratively reweighted
+least squares iterations. Specifically, at iteration ${a}$ a 
+positive value ${\omega K \times a^p}$ is added, where $K$ and $p$ 
+correspond to the two arguments, and $\omega$ is the
+mean of elements of such working weight matrices. The ridge factor
+decays to zero as iterations proceed and plays a negligible role upon
+convergence.
+
+
+For individual $i$, let $y_{0i}$ be the number of noncaptures before
+the first capture, $y_{r0i}$ be the number of noncaptures after the
+first capture, and $y_{r1i}$ be the number of recaptures after the
+first capture. For the $\calM_{bh}$, the score vector is
+\begin{eqnarray*}
+\frac{\partial\ell_{i}}{\partial p_{c}^{}} & = & \frac{1}{p_{c}^{}} -
+\frac{y_{0i}}{1 - p_{c}^{}} - \frac{\tau (1 - p_{ij}^{})^{\tau-1}}{1-Q_{1:\tau}},\\
+\frac{\partial\ell_{i}}{\partial p_{r}^{}} & = & 
+\frac{y_{r1i}}{p_{r}^{}} - \frac{y_{r0i}}{1 - p_{c}^{}}.
+\end{eqnarray*}
+The non-zero elements of the EIM can be written
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{c}^{2}}\right)
+& = &\frac{p_c}{1-Q_{1:\tau}} \;\sum_{j=1}^{\tau} \;(1-p_c)^{j-1}
+\left(\frac{j-1}{(1-p_c)^2} + \frac{1}{p_c^2}\right) - \frac{\partial}{\partial p_c} \!
+\left(\frac{\partial Q_{1:\tau} / \partial p_c}{1-Q_{1:\tau}} \right)\\
+& = &\frac{1 - Q_{1:\tau} - p_c [1 + (\tau-1)Q_{1:\tau}]}{p_c \, (1-p_c)^2 \, (1-Q_{1:\tau})} 
++ \frac{1}{p_c^2} -\mbox{}\\
+& & \tau (\tau-1) \, \frac{(1-p_c)^{\tau-2}}{1-Q_{1:\tau}} +\tau^2 \, 
+\frac{(1-p_c)^{\tau-2}}{(1-Q_{1:\tau})^2}, \\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{r}^{2}}\right)
+& = & \frac{1}{p_r \, (1-p_r) \, (1-Q_{1:\tau})} \; \sum_{j=1}^{\tau}
+\left\{1 - (1-p_c)^{j-1}\right\}\\
+& = & \frac{\tau - (1-Q_{1:\tau}) / p_c}{p_r (1-p_r) (1-Q_{1:\tau})}.
+\end{eqnarray*}
+
+
+For the $\calM_{th}$, the score vector is
+\begin{eqnarray*}
+\frac{\partial \ell_{i}}{\partial p_{j}^{}}
+& = & \frac{y_{ij}}{p_{ij}^{}} -\frac{1-y_{ij}}{1-p_{ij}^{}} - 
+\frac{Q_{1:\tau} /(1-p_{ij}^{})}{1-Q_{1:\tau}},\ \ \ \ j=1,\ldots,\tau,
+\end{eqnarray*}
+and the EIM elements are
+\begin{eqnarray*}
+-\E \left(\frac{\partial^2 \ell}{\partial p_{j}^{2}}\right)
+& = & \frac{1 - p_{j} - Q_{1:\tau}}{p_{j} \,(1-p_{j})^2 \, (1 - Q_{1:\tau})^2},\\
+-\E \left(\frac{\partial^2 \ell}{\partial p_{j} \,\partial p_{k}}\right)
+& = & \frac{p_{j} \, p_{k} \, Q_{1:\tau}(1-Q_{1:\tau}) + Q_{1:\tau}^2}{(1-Q_{1:\tau})^2 \,(1-p_{j})\, (1-p_{k})},
+\ \ \ \ \ j\neq k.
+\end{eqnarray*}
+
+
+%*********************************************************************
+\section*{Appendix B: Fitted values}
+\label{sec:fitted.values}
+
+
+By default all the family functions have fitted values corresponding
+to the probabilities in the conditional
+likelihood function (\ref{eq:posbern.condlikelihood}), viz.
+\[
+{\widehat{p}}_{ij}^{ y_{ij}} \left(1-\widehat{p}_{ij}\right)^{1-y_{ij}}
+\cdot\left[1 - \prod\limits_{s=1}^{\tau}
+\left( 1 - \widehat{p}_{i,cs}^{}\right)\right]^{-1}.
+\]
+Alternatively, the unconditional means of the $Y_j$ can be
+returned as the fitted values upon selecting
+\code{type.fitted = "mean"} argument.
+They are $\mu_1 = E(Y_1) =p_{c1} / (1 - Q_{1:\tau})$,
+ $\mu_2 = [(1 - p_{c1}) \,p_{c2} + p_{c1}\, p_{r2}]/(1 - Q_{1:\tau})$, and
+for $j=3,4,\ldots,\tau$,
+\[
+\mu_j = 
+\left( 1 - Q_{1:\tau} \right)^{-1}
+\left\{p_{cj}\, Q_{1:(j-1)} + p_{rj} \!
+\left[ p_{c1} + \sum_{s=2}^{j-1} \, p_{cs} \,Q_{1:(s-1)}
+\right]
+\right\}.
+\]
+
+
+
+
+
+\end{document}
+
+
+
+
diff --git a/vignettes/crVGAM.bib b/vignettes/crVGAM.bib
new file mode 100644
index 0000000..fb184d4
--- /dev/null
+++ b/vignettes/crVGAM.bib
@@ -0,0 +1,516 @@
+ at Article{alho:1990,
+  author =	 {J. M. Alho},
+  title =	 {Logistic Regression in Capture-Recapture Models},
+  journal =	 {Biometrics},
+  volume =	 54,
+  year =	 1990,
+  number =	 3,
+  pages =	 {623--635},
+}
+
+ at Book{amst:mcdo:manl:2005,
+  author =	 {S. C. Amstrup and T. L. McDonald and B. F. J. Manly},
+  title =	 {Handbook of Capture-Recapture Analysis},
+  publisher =	 {Princeton University Press},
+  address =	 {Princeton},
+  year =	 2005,
+}
+
+ at Article{baillargeon:rivest:2007,
+  author =	 {S. Baillargeon and L.-P. Rivest},
+  title =	 {\pkg{Rcapture}: Loglinear Models for
+                  Capture-Recapture in {\proglang{R}}},
+  journal =	 {Journal of Statistical Software},
+  volume =	 19,
+  number =	 5,
+  pages =	 {1--31},
+  year =	 2007,
+  url =		 {http://www.jstatsoft.org/v19/i05/},
+}
+
+ at Book{bolk:2008,
+  author =	 {Benjamin M. Bolker},
+  title =	 {Ecological Models and Data in \proglang{R}},
+  publisher =	 {Princeton University Press},
+  address =	 {Princeton},
+  year =	 2008,
+}
+
+ at Book{burnham:anderson:1999,
+  author =	 {K. P. Burnham and D. R. Anderson},
+  title =	 {Model Selection and Inference: A Practical
+                  Information-Theoretic Approach},
+  year =	 1999,
+  publisher =	 {Springer-Verlag},
+  address =	 {New York},
+}
+
+ at Manual{cooch:white:2012,
+  author =	 {E. G. Cooch and G. C. White},
+  title =	 {Program {\pkg{MARK}}: A Gentle Introduction,
+                  11th Edition},
+  year =	 2012,
+  url =		 {http://www.phidot.org/software/mark/docs/book/},
+}
+
+ at Article{cormak:1989,
+  author =	 {R. M. Cormack},
+  title =	 {Log-Linear Models for Capture-Recapture},
+  journal =	 {Biometrics},
+  volume =	 45,
+  number =	 2,
+  pages =	 {395--413},
+  year =	 1989,
+}
+
+ at Book{craw:1993,
+  author =	 {M. J. Crawley},
+  title =	 {\proglang{GLIM} for Ecologists},
+  year =	 1993,
+  publisher =	 {Blackwell Scientific Publications},
+  address =	 {Boston},
+}
+
+ at Article{gimenez:2006,
+  author =	 {O. Gimenez and R. Covas and C. R. Brown and
+                  M. D. Anderson and M. B. Brown and T. Lenormand},
+  title =	 {Nonparametric Estimation of Natural Selection on a
+                  Quantitative Trait Using Mark-Recapture Data},
+  journal =	 {Evolution},
+  volume =	 60,
+  number =	 3,
+  pages =	 {460--466},
+  year =	 2006,
+}
+
+ at Book{gree:silv:1994,
+  author =	 {P. J. Green and B. W. Silverman},
+  title =	 {Nonparametric Regression and Generalized Linear
+                  Models: A Roughness Penalty Approach},
+  year =	 1994,
+  publisher =	 {Chapman \& Hall/CRC},
+  address =	 {London},
+  pages =	 182,
+}
+
+ at Book{hastie:1990,
+  author =	 {T. Hastie and R. Tibshirani},
+  title =	 {Generalized Additive Models},
+  publisher =	 {Chapman \& Hall/CRC},
+  address =	 {London},
+  year =	 1990,
+}
+
+ at Article{horv:thom:1952,
+  author =	 {D. G. Horvitz and D. J. Thompson},
+  title =	 {A Generalization of Sampling Without Replacement
+                  from a Finite Universe},
+  journal =	 {Journal of the American Statistical Association},
+  volume =	 47,
+  number =	 260,
+  year =	 1952,
+  pages =	 {663--685},
+}
+
+ at Article{hugg:1989,
+  author =	 {R. M. Huggins},
+  title =	 {On the Statistical Analysis of Capture Experiments},
+  journal =	 {Biometrika},
+  volume =	 76,
+  year =	 1989,
+  pages =	 {133--140},
+  number =	 1,
+}
+
+ at Article{hugg:1991,
+  author =	 {R. M. Huggins},
+  title =	 {Some Practical Aspects of a Conditional Likelihood
+                  Approach to Capture Experiments},
+  journal =	 {Biometrics},
+  volume =	 47,
+  year =	 1991,
+  number =	 2,
+  pages =	 {725--732},
+}
+
+ at Article{hugg:hwan:2007,
+  author =	 {R. M. Huggins and W.-H. Hwang},
+  title =	 {Non-Parametric Estimation of Population Size from
+                  Capture-Recapture Data when the Capture Probability
+                  Depends on a Covariate},
+  journal =	 {Journal of the Royal Statistical Society C},
+  volume =	 56,
+  number =	 4,
+  pages =	 {429--443},
+  year =	 2007,
+}
+
+ at Article{hugg:hwan:2011,
+  author =	 {R. M. Huggins and W.-H. Hwang},
+  title =	 {A Review of the Use of Conditional Likelihood in
+                  Capture-Recapture Experiments},
+  journal =	 {International Statistical Review},
+  volume =	 79,
+  number =	 3,
+  pages =	 {385--400},
+  year =	 2011,
+}
+
+ at Article{hwan:hugg:2007,
+  author =	 {W.-H. Hwang and R. M. Huggins},
+  title =	 {Application of Semiparametric Regression Models in
+                  the Analysis of Capture-Recapture Experiments},
+  journal =	 {Australian \& New Zealand Journal of Statistics},
+  volume =	 49,
+  number =	 2,
+  pages =	 {191--202},
+  year =	 2007,
+}
+
+ at Article{hwan:hugg:2011,
+  author =	 {W.-H. Hwang and R. M. Huggins},
+  title =	 {A Semiparametric Model for a Functional Behavioural
+                  Response to Capture in Capture-Recapture
+                  Experiments},
+  journal =	 {Australian \& New Zealand Journal of Statistics},
+  volume =	 53,
+  number =	 4,
+  pages =	 {403--421},
+  year =	 2011,
+}
+
+ at Manual{hwang:chao:2003,
+  title =	 {Brief User Guide for Program \pkg{CARE-3}:
+                  Analyzing Continuous-Time Capture-Recapture Data},
+  author =	 {W. H. Hwang and A. Chao},
+  year =	 2003,
+  url =
+                  {http://chao.stat.nthu.edu.tw/blog/software-download/care/},
+}
+
+ at TechReport{laake:2013,
+  address =	 {Seattle},
+  type =	 {{AFSC} Processed Report},
+  title =	 {\pkg{RMark}: An \proglang{R} Interface for Analysis
+                  of Capture-Recapture Data with \pkg{MARK}},
+  url =
+                  {http://www.afsc.noaa.gov/Publications/ProcRpt/PR2013-01.pdf},
+  number =	 {2013-01},
+  author =	 {J. L. Laake},
+  year =	 2013,
+  pages =	 25,
+  institution =	 {Alaska Fisheries Science Center, {NOAA}, National
+                  Marine Fisheries Service},
+}
+
+ at Proceedings{lawl:2014,
+  editor =	 {J. F. Lawless},
+  title =	 {Statistics in Action: A {C}anadian Outlook},
+  year =	 2014,
+  publisher =	 {Chapman \& Hall/CRC Press},
+  address =	 {Boca Raton},
+}
+
+ at Article{lebreton:1992,
+  author =	 {J. D. Lebreton and K. P. Burnham and J. Clobert and
+                  D. R. Anderson},
+  title =	 {Modelling Survival and Testing Biological Hypothesis
+                  Using Marked Animals: Unified Approach with Case
+                  Studies.},
+  journal =	 {Biometrics},
+  volume =	 62,
+  year =	 1992,
+  number =	 1,
+  pages =	 {67--118},
+}
+
+ at Book{mccr:morg:2014,
+  title =	 {Analysis of Capture-Recapture Data},
+  author =	 {R. S. McCrea and B. J. T. Morgan},
+  year =	 2014,
+  publisher =	 {Chapman \& Hall/CRC},
+  address =	 {London},
+}
+
+ at Book{mccu:sear:neuh:2008,
+  author =	 {Charles E. McCulloch and Shayle R. Searle and John
+                  M. Neuhaus},
+  title =	 {Generalized, Linear, and Mixed Models},
+  edition =	 {2nd},
+  publisher =	 {John Wiley \& Sons},
+  address =	 {Hoboken},
+  year =	 2008,
+}
+
+ at Book{mccull:1989,
+  author =	 {P. McCullagh and J. A. Nelder},
+  title =	 {Generalized Linear Models},
+  edition =	 {2nd},
+  publisher =	 {Chapman \& Hall/CRC},
+  address =	 {London},
+  year =	 1989,
+}
+
+ at Manual{mcdonald:2010,
+  title =	 {{\pkg{mra}}: Analysis of Mark-Recapture Data},
+  author =	 {T. L. McDonald},
+  year =	 2012,
+  note =	 {{\proglang{R}} package version 2.13},
+  url =		 {http://CRAN.R-project.org/package=mra},
+}
+
+ at Article{neld:wedd:1972,
+  author =	 {J. A. Nelder and R. W. M. Wedderburn},
+  title =	 {Generalized Linear Models},
+  year =	 1972,
+  journal =	 {Journal of the Royal Statistical Society A},
+  volume =	 135,
+  issue =	 3,
+  number =	 3,
+  pages =	 {370--384},
+}
+
+ at Article{otis:etal:1978,
+  author =	 {D. L. Otis and K. P. Burnham and G. C. White and
+                  D. R. Anderson},
+  title =	 {Statistical Inference From Capture Data on Closed
+                  Animal Populations},
+  journal =	 {Wildlife Monographs},
+  year =	 1978,
+  pages =	 {3--135},
+  volume =	 62,
+}
+
+ at Article{patil:1962,
+  author =	 {G. P. Patil},
+  title =	 {Maximum Likelihood Estimation for Generalized Power
+                  Series Distributions and its Application to a
+                  Truncated Binomial Distribution},
+  journal =	 {Biometrika},
+  volume =	 49,
+  year =	 1962,
+  number =	 1,
+  pages =	 {227--237},
+}
+
+ at Article{pollock:1984,
+  author =	 {K. H. Pollock and J. Hines and J. Nichols},
+  title =	 {The Use of Auxiliary Variables in Capture-Recapture
+                  and Removal Experiments.},
+  journal =	 {Biometrics},
+  volume =	 40,
+  year =	 1984,
+  pages =	 {329--340},
+  number =	 2,
+}
+
+ at Article{pollock:1991,
+  author =	 {K. H. Pollock},
+  title =	 {Modeling Capture, Recapture, and Removal Statistics
+                  for Estimation of Demographic Parameters for Fish
+                  and Wildlife Populations: {P}ast, Present and
+                  Future},
+  journal =	 {Journal of the American Statistical Association},
+  volume =	 86,
+  year =	 1991,
+  pages =	 {225--238},
+  number =	 413,
+}
+
+ at Article{pollock:2002,
+  author =	 {K. H. Pollock},
+  title =	 {The Use of Auxiliary Variables in Capture-Recapture
+                  Modelling: An Overview},
+  journal =	 {Journal of Applied Statistics},
+  volume =	 29,
+  year =	 2002,
+  pages =	 {85--102},
+  number =	 {1--4},
+}
+
+ at Manual{r:2014,
+  title =	 {\proglang{R}: A Language and Environment for Statistical
+                  Computing},
+  author =	 {{\proglang{R} Core Team}},
+  organization = {\proglang{R} Foundation for Statistical Computing},
+  address =	 {Vienna, Austria},
+  year =	 2015,
+  url =		 {http://www.R-project.org},
+}
+
+ at Manual{rcapturepackage:2012,
+  title =	 {\pkg{Rcapture}: Loglinear Models for
+                  Capture-Recapture Experiments},
+  author =	 {S. Baillargeon and L.-P. Rivest},
+  year =	 2014,
+  url =		 {http://CRAN.R-project.org/package=Rcapture},
+  note =	 {{\proglang{R}} package version 1.4-2},
+}
+
+ at InProceedings{rive:bail:2014,
+  author =	 {Louis-Paul Rivest and S. Baillargeon},
+  year =	 2014,
+  chapter =	 18,
+  title =	 {Capture-Recapture Methods for Estimating the Size of
+                  a Population: Dealing with Variable Capture
+                  Probabilities},
+  booktitle =	 {Statistics in Action: A {C}anadian Outlook},
+  pages =	 {289--304},
+}
+
+ at Article{schluter1988,
+  author =	 {D. Schluter},
+  title =	 {Estimating the Form of Natural Selection on a
+                  Quantitative Trait},
+  journal =	 {Evolution},
+  volume =	 42,
+  number =	 5,
+  pages =	 {849--861},
+  year =	 1988,
+}
+
+ at Article{stok:2011,
+  author =	 {J. Stoklosa and W.-H. Hwang and S. H. Wu and
+                  R. M. Huggins},
+  title =	 {Heterogeneous Capture-Recapture Models with
+                  Covariates: A Partial Likelihood Approach for Closed
+                  Populations},
+  journal =	 {Biometrics},
+  volume =	 67,
+  year =	 2011,
+  number =	 4,
+  pages =	 {1659--1665},
+}
+
+ at Article{stok:hugg:2012,
+  author =	 {J. Stoklosa and R. M. Huggins},
+  title =	 {A Robust P-Spline Approach to Closed Population
+                  Capture-Recapture Models with Time Dependence and
+                  Heterogeneity},
+  journal =	 {Computational Statistics \& Data Analysis},
+  volume =	 56,
+  year =	 2012,
+  pages =	 {408--417},
+  number =	 2,
+}
+
+ at Book{wood:2006,
+  author =	 {Simon N. Wood},
+  title =	 {Generalized Additive Models: An Introduction with
+                  \proglang{R}},
+  year =	 2006,
+  publisher =	 {Chapman \& Hall/CRC},
+  address =	 {London},
+}
+
+ at Article{yang:chao:2005,
+  author =	 {Hsin-Chou Yang and Anne Chao},
+  title =	 {Modeling Animals' Behavioral Response by {M}arkov
+                  Chain Models for Capture-Recapture Experiments},
+  journal =	 {Biometrics},
+  volume =	 61,
+  year =	 2005,
+  number =	 4,
+  pages =	 {1010--1017},
+}
+
+ at Article{yee:2008,
+  author =	 {T. W. Yee},
+  title =	 {The \pkg{VGAM} Package},
+  journal =	 {\proglang{R} News},
+  volume =	 8,
+  year =	 2008,
+  number =	 2,
+  pages =	 {28--39},
+}
+
+ at Article{yee:2010,
+  author =	 {T. W. Yee},
+  title =	 {The \pkg{VGAM} Package for Categorical Data
+                  Analysis},
+  journal =	 {Journal of Statistical Software},
+  volume =	 32,
+  year =	 2010,
+  number =	 10,
+  pages =	 {1--34},
+  url =		 {http://www.jstatsoft.org/v32/i10/},
+}
+
+ at Book{yee:2015,
+  author =	 {T. W. Yee},
+  title =	 {Vector Generalized Linear and Additive Models: With
+                  an Implementation in \proglang{R}},
+  year =	 2015,
+  publisher =	 {Springer-Verlag},
+  address =	 {New York},
+}
+
+ at Article{yee:hast:2003,
+  author =	 {T. W. Yee and T. J. Hastie},
+  title =	 {Reduced-Rank Vector Generalized Linear Models},
+  journal =	 {Statistical Modelling},
+  volume =	 3,
+  number =	 1,
+  pages =	 {15--41},
+  year =	 2003,
+}
+
+ at Article{yee:mitc:1991,
+  author =	 {T. W. Yee and N. D. Mitchell},
+  title =	 {Generalized Additive Models in Plant Ecology},
+  year =	 1991,
+  journal =	 {Journal of Vegetation Science},
+  volume =	 2,
+  issue =	 5,
+  number =	 5,
+  pages =	 {587--602},
+}
+
+ at Manual{yee:vgam:2013-093,
+  title =	 {\pkg{VGAM}: Vector Generalized Linear and Additive
+                  Models},
+  author =	 {T. W. Yee},
+  year =	 2014,
+  note =	 {\proglang{R} package version 0.9-6},
+  url =		 {http://CRAN.R-project.org/package=VGAM},
+}
+
+ at TechReport{yee:wild:1994a,
+  author =	 {T. W. Yee and C. J. Wild},
+  title =	 {Vector Generalized Additive Models},
+  year =	 1994,
+  institution =	 {Department of Statistics, University of Auckland},
+  address =	 {Auckland, New Zealand},
+  number =	 {STAT04},
+}
+
+ at Article{yee:wild:1996,
+  author =	 {T. W. Yee and C. J. Wild},
+  title =	 {Vector Generalized Additive Models},
+  journal =	 {Journal of the Royal Statistical Society B},
+  volume =	 58,
+  year =	 1996,
+  number =	 3,
+  pages =	 {481--493},
+}
+
+ at Article{zwane:2004,
+  author =	 {E. N. Zwane and P. G. M. van der Heijden},
+  title =	 {Semiparametric Models for Capture-Recapture Studies
+                  with Covariates},
+  journal =	 {Computational Statistics \& Data Analysis},
+  volume =	 47,
+  year =	 2004,
+  pages =	 {729--743},
+  number =	 4,
+}
+
+ at Article{bunge:2013,
+  author =	 {J. A. Bunge},
+  title =	 {A Survey of Software for Fitting Capture-recapture Models},
+  journal =	 {Wiley Interdisciplinary Reviews: Computational Statistics},
+  volume =	 5,
+  year =	 2013,
+  pages =	 {114--120},
+  number =	 2,
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/r-cran-vgam.git